package persistence import ( "database/sql" "errors" "fmt" "path" "offline_twitter/scraper" ) /** * Save the given User to the database. * If the User is already in the database, it will update most of its attributes (follower count, etc) * * args: * - u: the User */ func (p Profile) SaveUser(u *scraper.User) error { if u.IsNeedingFakeID { err := p.DB.QueryRow("select id from users where lower(handle) = lower(?)", u.Handle).Scan(&u.ID) if errors.Is(err, sql.ErrNoRows) { // We need to continue-- create a new fake user u.ID = p.NextFakeUserID() } else if err == nil { // We're done; a user exists with this handle already. No need to fake anything, and we have no new data // to provide (since the ID is fake). // ID has already been scanned into the User, for use by the caller. return nil } else { // A real error occurred panic(fmt.Errorf("Error checking for existence of fake user with handle %q:\n %w", u.Handle, err)) } } _, err := p.DB.Exec(` insert into users (id, display_name, handle, bio, following_count, followers_count, location, website, join_date, is_private, is_verified, is_banned, profile_image_url, profile_image_local_path, banner_image_url, banner_image_local_path, pinned_tweet_id, is_content_downloaded, is_id_fake) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) on conflict do update set handle=?, bio=?, display_name=?, following_count=?, followers_count=?, location=?, website=?, is_private=?, is_verified=?, is_banned=?, profile_image_url=?, profile_image_local_path=?, banner_image_url=?, banner_image_local_path=?, pinned_tweet_id=?, is_content_downloaded=(is_content_downloaded or ?) `, u.ID, u.DisplayName, u.Handle, u.Bio, u.FollowingCount, u.FollowersCount, u.Location, u.Website, u.JoinDate, u.IsPrivate, u.IsVerified, u.IsBanned, u.ProfileImageUrl, u.ProfileImageLocalPath, u.BannerImageUrl, u.BannerImageLocalPath, u.PinnedTweetID, u.IsContentDownloaded, u.IsIdFake, u.Handle, u.Bio, u.DisplayName, u.FollowingCount, u.FollowersCount, u.Location, u.Website, u.IsPrivate, u.IsVerified, u.IsBanned, u.ProfileImageUrl, u.ProfileImageLocalPath, u.BannerImageUrl, u.BannerImageLocalPath, u.PinnedTweetID, u.IsContentDownloaded, ) if err != nil { return fmt.Errorf("Error executing SaveUser(%s):\n %w", u.Handle, err) } return nil } /** * Check if the database has a User with the given user handle. * * args: * - handle: the user handle to search for * * returns: * - true if there is such a User in the database, false otherwise */ func (p Profile) UserExists(handle scraper.UserHandle) bool { db := p.DB var dummy string err := db.QueryRow("select 1 from users where lower(handle) = lower(?)", handle).Scan(&dummy) if err != nil { if !errors.Is(err, sql.ErrNoRows) { // A real error panic(err) } return false } return true } /** * Retrieve a User from the database, by handle. * * args: * - handle: the user handle to search for * * returns: * - the User, if it exists */ func (p Profile) GetUserByHandle(handle scraper.UserHandle) (scraper.User, error) { db := p.DB var ret scraper.User err := db.Get(&ret, ` select id, display_name, handle, bio, following_count, followers_count, location, website, join_date, is_private, is_verified, is_banned, profile_image_url, profile_image_local_path, banner_image_url, banner_image_local_path, pinned_tweet_id, is_content_downloaded, is_followed from users where lower(handle) = lower(?) `, handle) if errors.Is(err, sql.ErrNoRows) { return ret, ErrNotInDatabase{"User", handle} } return ret, nil } /** * Retrieve a User from the database, by user ID. * * args: * - id: the user ID to search for * * returns: * - the User, if it exists */ func (p Profile) GetUserByID(id scraper.UserID) (scraper.User, error) { db := p.DB var ret scraper.User err := db.Get(&ret, ` select id, display_name, handle, bio, following_count, followers_count, location, website, join_date, is_private, is_verified, is_banned, profile_image_url, profile_image_local_path, banner_image_url, banner_image_local_path, pinned_tweet_id, is_content_downloaded, is_followed from users where id = ? `, id) if errors.Is(err, sql.ErrNoRows) { return ret, ErrNotInDatabase{"User", id} } if err != nil { panic(err) } return ret, nil } /** * Returns `true` if content download is needed, `false` otherwise * * If the user is banned, returns false because downloading will be impossible. * * If: * - the user isn't in the DB at all (first time scraping), OR * - `is_content_downloaded` is false in the DB, OR * - the banner / profile image URL has changed from what the DB has * then it needs to be downloaded. * * The `user` object will always have `is_content_downloaded` = false on every scrape. This is * why the No Worsening Principle is needed. */ func (p Profile) CheckUserContentDownloadNeeded(user scraper.User) bool { row := p.DB.QueryRow(`select is_content_downloaded, profile_image_url, banner_image_url from users where id = ?`, user.ID) var is_content_downloaded bool var profile_image_url string var banner_image_url string err := row.Scan(&is_content_downloaded, &profile_image_url, &banner_image_url) if err != nil { if errors.Is(err, sql.ErrNoRows) { return true } else { panic(err) } } if !is_content_downloaded { return true } banner_path := p.get_banner_image_output_path(user) if banner_path != "" && !file_exists(banner_path) { return true } profile_path := p.get_profile_image_output_path(user) return !file_exists(profile_path) } /** * Follow / unfollow a user. Update the given User object's IsFollowed field. */ func (p Profile) SetUserFollowed(user *scraper.User, is_followed bool) { result, err := p.DB.Exec("update users set is_followed = ? where id = ?", is_followed, user.ID) if err != nil { panic(fmt.Errorf("Error inserting user with handle %q:\n %w", user.Handle, err)) } count, err := result.RowsAffected() if err != nil { panic(fmt.Errorf("Unknown error retrieving row count:\n %w", err)) } if count != 1 { panic(fmt.Errorf("User with handle %q not found", user.Handle)) } user.IsFollowed = is_followed } func (p Profile) NextFakeUserID() scraper.UserID { _, err := p.DB.Exec("update fake_user_sequence set latest_fake_id = latest_fake_id + 1") if err != nil { panic(err) } var ret scraper.UserID err = p.DB.QueryRow("select latest_fake_id from fake_user_sequence").Scan(&ret) if err != nil { panic(err) } return ret } func (p Profile) GetAllFollowedUsers() []scraper.UserHandle { rows, err := p.DB.Query("select handle from users where is_followed = 1") if err != nil { panic(err) } ret := []scraper.UserHandle{} var tmp scraper.UserHandle for rows.Next() { err = rows.Scan(&tmp) if err != nil { panic(err) } ret = append(ret, tmp) } return ret } func (p Profile) IsFollowing(user scraper.User) bool { row := p.DB.QueryRow("select is_followed from users where id like ?", user.ID) var ret bool err := row.Scan(&ret) if err != nil { if errors.Is(err, sql.ErrNoRows) { return false } panic(err) // A real error } return ret } /** * Utility function to compute the path to save banner image to */ func (p Profile) get_banner_image_output_path(u scraper.User) string { return path.Join(p.ProfileDir, "profile_images", u.BannerImageLocalPath) } /** * Utility function to compute the path to save profile image to */ func (p Profile) get_profile_image_output_path(u scraper.User) string { if u.ProfileImageUrl == "" { return path.Join(p.ProfileDir, "profile_images", path.Base(scraper.DEFAULT_PROFILE_IMAGE_URL)) } return path.Join(p.ProfileDir, "profile_images", u.ProfileImageLocalPath) }