diff --git a/persistence/media_download.go b/persistence/media_download.go index e0395bf..5c92b82 100644 --- a/persistence/media_download.go +++ b/persistence/media_download.go @@ -187,7 +187,7 @@ func (p Profile) DownloadUserContentWithInjector(u *scraper.User, downloader Med } u.IsContentDownloaded = true - return p.SaveUser(*u) + return p.SaveUser(u) } /** diff --git a/persistence/media_download_test.go b/persistence/media_download_test.go index 6ed7624..005ad8d 100644 --- a/persistence/media_download_test.go +++ b/persistence/media_download_test.go @@ -81,7 +81,7 @@ func TestDownloadUserContent(t *testing.T) { user := create_dummy_user() // Persist the User - err := profile.SaveUser(user) + err := profile.SaveUser(&user) if err != nil { t.Fatalf("Failed to save the user: %s", err.Error()) } diff --git a/persistence/schema.sql b/persistence/schema.sql index 130f76f..4e7ba84 100644 --- a/persistence/schema.sql +++ b/persistence/schema.sql @@ -20,6 +20,7 @@ create table users (rowid integer primary key, pinned_tweet_id integer check(typeof(pinned_tweet_id) = 'integer' or pinned_tweet_id = ''), is_followed boolean default 0, + is_id_fake boolean default 0, is_content_downloaded boolean default 0 ); @@ -151,3 +152,6 @@ create table hashtags (rowid integer primary key, create table database_version(rowid integer primary key, version_number integer not null unique ); + +create table fake_user_sequence(latest_fake_id integer not null); +insert into fake_user_sequence values(0x4000000000000000); diff --git a/persistence/tweet_trove_queries.go b/persistence/tweet_trove_queries.go index a770e10..5d53ace 100644 --- a/persistence/tweet_trove_queries.go +++ b/persistence/tweet_trove_queries.go @@ -18,7 +18,7 @@ func (p Profile) SaveTweetTrove(trove TweetTrove) { panic(fmt.Sprintf("Error downloading user content for user with ID %d and handle %s: %s", u.ID, u.Handle, err.Error())) } - err = p.SaveUser(u) + err = p.SaveUser(&u) if err != nil { panic(fmt.Sprintf("Error saving user with ID %d and handle %s: %s", u.ID, u.Handle, err.Error())) } diff --git a/persistence/user_queries.go b/persistence/user_queries.go index e81b372..bd383ec 100644 --- a/persistence/user_queries.go +++ b/persistence/user_queries.go @@ -14,12 +14,24 @@ import ( * args: * - u: the User */ -func (p Profile) SaveUser(u scraper.User) error { - db := p.DB +func (p Profile) SaveUser(u *scraper.User) error { + if u.IsNeedingFakeID { + err := p.DB.QueryRow("select id from users where lower(handle) = lower(?)", u.Handle).Scan(&u.ID) + if err == sql.ErrNoRows { + // We need to continue-- create a new fake user + u.ID = p.NextFakeUserID() + } else if err == nil { + // We're done; everything is fine (ID has already been scanned into the User) + return nil + } else { + // A real error occurred + panic(fmt.Sprintf("Error checking for existence of fake user with handle %q: %s", u.Handle, err.Error())) + } + } - _, err := db.Exec(` - insert into users (id, display_name, handle, bio, following_count, followers_count, location, website, join_date, is_private, is_verified, is_banned, profile_image_url, profile_image_local_path, banner_image_url, banner_image_local_path, pinned_tweet_id, is_content_downloaded) - values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + _, err := p.DB.Exec(` + insert into users (id, display_name, handle, bio, following_count, followers_count, location, website, join_date, is_private, is_verified, is_banned, profile_image_url, profile_image_local_path, banner_image_url, banner_image_local_path, pinned_tweet_id, is_content_downloaded, is_id_fake) + values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) on conflict do update set bio=?, display_name=?, @@ -37,7 +49,7 @@ func (p Profile) SaveUser(u scraper.User) error { pinned_tweet_id=?, is_content_downloaded=(is_content_downloaded or ?) `, - u.ID, u.DisplayName, u.Handle, u.Bio, u.FollowingCount, u.FollowersCount, u.Location, u.Website, u.JoinDate.Unix(), u.IsPrivate, u.IsVerified, u.IsBanned, u.ProfileImageUrl, u.ProfileImageLocalPath, u.BannerImageUrl, u.BannerImageLocalPath, u.PinnedTweetID, u.IsContentDownloaded, + u.ID, u.DisplayName, u.Handle, u.Bio, u.FollowingCount, u.FollowersCount, u.Location, u.Website, u.JoinDate.Unix(), u.IsPrivate, u.IsVerified, u.IsBanned, u.ProfileImageUrl, u.ProfileImageLocalPath, u.BannerImageUrl, u.BannerImageLocalPath, u.PinnedTweetID, u.IsContentDownloaded, u.IsIdFake, u.Bio, u.DisplayName, u.FollowingCount, u.FollowersCount, u.Location, u.Website, u.IsPrivate, u.IsVerified, u.IsBanned, u.ProfileImageUrl, u.ProfileImageLocalPath, u.BannerImageUrl, u.BannerImageLocalPath, u.PinnedTweetID, u.IsContentDownloaded, ) if err != nil { @@ -208,3 +220,16 @@ func (p Profile) SetUserFollowed(user *scraper.User, is_followed bool) { } user.IsFollowed = is_followed } + +func (p Profile) NextFakeUserID() scraper.UserID { + _, err := p.DB.Exec("update fake_user_sequence set latest_fake_id = latest_fake_id + 1") + if err != nil { + panic(err) + } + var ret scraper.UserID + err = p.DB.QueryRow("select latest_fake_id from fake_user_sequence").Scan(&ret) + if err != nil { + panic(err) + } + return ret +} diff --git a/persistence/user_queries_test.go b/persistence/user_queries_test.go index 29baa31..de0d072 100644 --- a/persistence/user_queries_test.go +++ b/persistence/user_queries_test.go @@ -3,11 +3,15 @@ package persistence_test import ( "testing" "time" + "fmt" + "math/rand" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/go-test/deep" + + "offline_twitter/scraper" ) @@ -21,7 +25,7 @@ func TestSaveAndLoadUser(t *testing.T) { fake_user := create_dummy_user() // Save the user, then reload it and ensure it's the same - err := profile.SaveUser(fake_user) + err := profile.SaveUser(&fake_user) if err != nil { panic(err) } @@ -64,7 +68,7 @@ func TestModifyUser(t *testing.T) { fake_user.IsContentDownloaded = true // Save the user so it can be modified - err := profile.SaveUser(fake_user) + err := profile.SaveUser(&fake_user) if err != nil { panic(err) } @@ -81,7 +85,7 @@ func TestModifyUser(t *testing.T) { fake_user.IsContentDownloaded = false // test No Worsening // Save the modified user - err = profile.SaveUser(fake_user) + err = profile.SaveUser(&fake_user) if err != nil { panic(err) } @@ -150,7 +154,7 @@ func TestUserExists(t *testing.T) { if exists { t.Errorf("It shouldn't exist, but it does: %d", user.ID) } - err := profile.SaveUser(user) + err := profile.SaveUser(&user) if err != nil { panic(err) } @@ -178,7 +182,7 @@ func TestCheckUserContentDownloadNeeded(t *testing.T) { user.BannerImageUrl = "banner url1" user.ProfileImageUrl = "profile url1" user.IsContentDownloaded = false - err := profile.SaveUser(user) + err := profile.SaveUser(&user) if err != nil { panic(err) } @@ -190,7 +194,7 @@ func TestCheckUserContentDownloadNeeded(t *testing.T) { // Mark `is_content_downloaded` as "true" again user.IsContentDownloaded = true - err = profile.SaveUser(user) + err = profile.SaveUser(&user) if err != nil { panic(err) } @@ -225,7 +229,7 @@ func TestFollowUnfollowUser(t *testing.T) { user := create_dummy_user() assert.False(user.IsFollowed) - err := profile.SaveUser(user) + err := profile.SaveUser(&user) assert.NoError(err) profile.SetUserFollowed(&user, true) @@ -237,7 +241,7 @@ func TestFollowUnfollowUser(t *testing.T) { assert.Equal(user.ID, user_reloaded.ID) // Verify it's the same user assert.True(user_reloaded.IsFollowed) - err = profile.SaveUser(user) // should NOT un-set is_followed + err = profile.SaveUser(&user) // should NOT un-set is_followed assert.NoError(err) user_reloaded, err = profile.GetUserByHandle(user.Handle) require.NoError(t, err) @@ -253,3 +257,62 @@ func TestFollowUnfollowUser(t *testing.T) { assert.Equal(user.ID, user_reloaded.ID) // Verify it's the same user assert.False(user_reloaded.IsFollowed) } + +/** + * Should create a new Unknown User from the given handle. + * The Unknown User should work consistently with other Users. + */ +func TestCreateUnknownUserWithHandle(t *testing.T) { + assert := assert.New(t) + + profile_path := "test_profiles/TestUserQueries" + profile := create_or_load_profile(profile_path) + + next_id := profile.NextFakeUserID() + + handle := scraper.UserHandle(fmt.Sprintf("UnknownUser%d", rand.Int31())) + user := scraper.GetUnknownUserWithHandle(handle) + assert.Equal(scraper.UserID(0), user.ID) + assert.True(user.IsIdFake) + + err := profile.SaveUser(&user) + assert.NoError(err) + assert.Equal(scraper.UserID(next_id + 1), user.ID) + + // Ensure the change was persisted + user_reloaded, err := profile.GetUserByHandle(user.Handle) + require.NoError(t, err) + assert.Equal(handle, user_reloaded.Handle) // Verify it's the same user + assert.Equal(scraper.UserID(next_id + 1), user_reloaded.ID) + + // Why not tack this test on here: make sure NextFakeUserID works as expected + assert.Equal(next_id + 2, profile.NextFakeUserID()) +} + +/** + * Should update the unknown User's UserID with the correct ID if it already exists + */ +func TestCreateUnknownUserWithHandleThatAlreadyExists(t *testing.T) { + assert := assert.New(t) + + profile_path := "test_profiles/TestUserQueries" + profile := create_or_load_profile(profile_path) + + user := create_stable_user() + + + unknown_user := scraper.GetUnknownUserWithHandle(user.Handle) + assert.Equal(scraper.UserID(0), unknown_user.ID) + + err := profile.SaveUser(&unknown_user) + assert.NoError(err) + assert.Equal(user.ID, unknown_user.ID) + + // The real user should not have been overwritten at all + user_reloaded, err := profile.GetUserByID(user.ID) + assert.NoError(err) + assert.False(user_reloaded.IsIdFake) // This one particularly + assert.Equal(user.Handle, user_reloaded.Handle) + assert.Equal(user.Bio, user_reloaded.Bio) + assert.Equal(user.DisplayName, user_reloaded.DisplayName) +} diff --git a/persistence/utils_test.go b/persistence/utils_test.go index fcf1c4f..8229ed4 100644 --- a/persistence/utils_test.go +++ b/persistence/utils_test.go @@ -21,7 +21,8 @@ func create_or_load_profile(profile_path string) persistence.Profile { if err != nil { panic(err) } - err = profile.SaveUser(create_stable_user()) + u := create_stable_user() + err = profile.SaveUser(&u) if err != nil { panic(err) } diff --git a/persistence/versions.go b/persistence/versions.go index 49ac958..712cbdf 100644 --- a/persistence/versions.go +++ b/persistence/versions.go @@ -8,7 +8,7 @@ import ( ) -const ENGINE_DATABASE_VERSION = 9 +const ENGINE_DATABASE_VERSION = 10 type VersionMismatchError struct { @@ -62,6 +62,9 @@ var MIGRATIONS = []string{ `alter table urls add column short_text text not null default ""`, `insert into tombstone_types (rowid, short_name, tombstone_text) values (7, 'age-restricted', 'Age-restricted adult content. This content might not be appropriate for people under 18 years old. To view this media, you’ll need to log in to Twitter')`, `alter table users add column is_followed boolean default 0`, +`create table fake_user_sequence(latest_fake_id integer not null); + insert into fake_user_sequence values(0x4000000000000000); + alter table users add column is_id_fake boolean default 0;`, } /** diff --git a/scraper/user.go b/scraper/user.go index 33ac599..0d43fb9 100644 --- a/scraper/user.go +++ b/scraper/user.go @@ -46,6 +46,8 @@ type User struct { IsFollowed bool IsContentDownloaded bool + IsNeedingFakeID bool + IsIdFake bool } func (u User) String() string { @@ -100,6 +102,27 @@ func ParseHandleFromTweetUrl(tweet_url string) (UserHandle, error) { return UserHandle(matches[1]), nil } +func GetUnknownUserWithHandle(handle UserHandle) User { + return User{ + ID: UserID(0), // 2^62 + 1... + DisplayName: string(handle), + Handle: handle, + Bio: "", + FollowersCount: 0, + FollowingCount: 0, + Location: "", + Website:"", + JoinDate: time.Unix(0, 0), + IsVerified: false, + IsPrivate: true, + ProfileImageUrl: DEFAULT_PROFILE_IMAGE_URL, + ProfileImageLocalPath: path.Base(DEFAULT_PROFILE_IMAGE_URL), + BannerImageUrl: "", + BannerImageLocalPath: "", + IsNeedingFakeID: true, + IsIdFake: true, + } +} // Turn an APIUser, as returned from the scraper, into a properly structured User object func ParseSingleUser(apiUser APIUser) (ret User, err error) {