Add creation of unknown users with known handles (i.e., deleted accounts)

This commit is contained in:
Alessio 2022-02-26 22:09:27 -08:00
parent 41586bf0f8
commit 772fa247f7
9 changed files with 138 additions and 19 deletions

View File

@ -187,7 +187,7 @@ func (p Profile) DownloadUserContentWithInjector(u *scraper.User, downloader Med
} }
u.IsContentDownloaded = true u.IsContentDownloaded = true
return p.SaveUser(*u) return p.SaveUser(u)
} }
/** /**

View File

@ -81,7 +81,7 @@ func TestDownloadUserContent(t *testing.T) {
user := create_dummy_user() user := create_dummy_user()
// Persist the User // Persist the User
err := profile.SaveUser(user) err := profile.SaveUser(&user)
if err != nil { if err != nil {
t.Fatalf("Failed to save the user: %s", err.Error()) t.Fatalf("Failed to save the user: %s", err.Error())
} }

View File

@ -20,6 +20,7 @@ create table users (rowid integer primary key,
pinned_tweet_id integer check(typeof(pinned_tweet_id) = 'integer' or pinned_tweet_id = ''), pinned_tweet_id integer check(typeof(pinned_tweet_id) = 'integer' or pinned_tweet_id = ''),
is_followed boolean default 0, is_followed boolean default 0,
is_id_fake boolean default 0,
is_content_downloaded boolean default 0 is_content_downloaded boolean default 0
); );
@ -151,3 +152,6 @@ create table hashtags (rowid integer primary key,
create table database_version(rowid integer primary key, create table database_version(rowid integer primary key,
version_number integer not null unique version_number integer not null unique
); );
create table fake_user_sequence(latest_fake_id integer not null);
insert into fake_user_sequence values(0x4000000000000000);

View File

@ -18,7 +18,7 @@ func (p Profile) SaveTweetTrove(trove TweetTrove) {
panic(fmt.Sprintf("Error downloading user content for user with ID %d and handle %s: %s", u.ID, u.Handle, err.Error())) panic(fmt.Sprintf("Error downloading user content for user with ID %d and handle %s: %s", u.ID, u.Handle, err.Error()))
} }
err = p.SaveUser(u) err = p.SaveUser(&u)
if err != nil { if err != nil {
panic(fmt.Sprintf("Error saving user with ID %d and handle %s: %s", u.ID, u.Handle, err.Error())) panic(fmt.Sprintf("Error saving user with ID %d and handle %s: %s", u.ID, u.Handle, err.Error()))
} }

View File

@ -14,12 +14,24 @@ import (
* args: * args:
* - u: the User * - u: the User
*/ */
func (p Profile) SaveUser(u scraper.User) error { func (p Profile) SaveUser(u *scraper.User) error {
db := p.DB if u.IsNeedingFakeID {
err := p.DB.QueryRow("select id from users where lower(handle) = lower(?)", u.Handle).Scan(&u.ID)
if err == sql.ErrNoRows {
// We need to continue-- create a new fake user
u.ID = p.NextFakeUserID()
} else if err == nil {
// We're done; everything is fine (ID has already been scanned into the User)
return nil
} else {
// A real error occurred
panic(fmt.Sprintf("Error checking for existence of fake user with handle %q: %s", u.Handle, err.Error()))
}
}
_, err := db.Exec(` _, err := p.DB.Exec(`
insert into users (id, display_name, handle, bio, following_count, followers_count, location, website, join_date, is_private, is_verified, is_banned, profile_image_url, profile_image_local_path, banner_image_url, banner_image_local_path, pinned_tweet_id, is_content_downloaded) insert into users (id, display_name, handle, bio, following_count, followers_count, location, website, join_date, is_private, is_verified, is_banned, profile_image_url, profile_image_local_path, banner_image_url, banner_image_local_path, pinned_tweet_id, is_content_downloaded, is_id_fake)
values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
on conflict do update on conflict do update
set bio=?, set bio=?,
display_name=?, display_name=?,
@ -37,7 +49,7 @@ func (p Profile) SaveUser(u scraper.User) error {
pinned_tweet_id=?, pinned_tweet_id=?,
is_content_downloaded=(is_content_downloaded or ?) is_content_downloaded=(is_content_downloaded or ?)
`, `,
u.ID, u.DisplayName, u.Handle, u.Bio, u.FollowingCount, u.FollowersCount, u.Location, u.Website, u.JoinDate.Unix(), u.IsPrivate, u.IsVerified, u.IsBanned, u.ProfileImageUrl, u.ProfileImageLocalPath, u.BannerImageUrl, u.BannerImageLocalPath, u.PinnedTweetID, u.IsContentDownloaded, u.ID, u.DisplayName, u.Handle, u.Bio, u.FollowingCount, u.FollowersCount, u.Location, u.Website, u.JoinDate.Unix(), u.IsPrivate, u.IsVerified, u.IsBanned, u.ProfileImageUrl, u.ProfileImageLocalPath, u.BannerImageUrl, u.BannerImageLocalPath, u.PinnedTweetID, u.IsContentDownloaded, u.IsIdFake,
u.Bio, u.DisplayName, u.FollowingCount, u.FollowersCount, u.Location, u.Website, u.IsPrivate, u.IsVerified, u.IsBanned, u.ProfileImageUrl, u.ProfileImageLocalPath, u.BannerImageUrl, u.BannerImageLocalPath, u.PinnedTweetID, u.IsContentDownloaded, u.Bio, u.DisplayName, u.FollowingCount, u.FollowersCount, u.Location, u.Website, u.IsPrivate, u.IsVerified, u.IsBanned, u.ProfileImageUrl, u.ProfileImageLocalPath, u.BannerImageUrl, u.BannerImageLocalPath, u.PinnedTweetID, u.IsContentDownloaded,
) )
if err != nil { if err != nil {
@ -208,3 +220,16 @@ func (p Profile) SetUserFollowed(user *scraper.User, is_followed bool) {
} }
user.IsFollowed = is_followed user.IsFollowed = is_followed
} }
func (p Profile) NextFakeUserID() scraper.UserID {
_, err := p.DB.Exec("update fake_user_sequence set latest_fake_id = latest_fake_id + 1")
if err != nil {
panic(err)
}
var ret scraper.UserID
err = p.DB.QueryRow("select latest_fake_id from fake_user_sequence").Scan(&ret)
if err != nil {
panic(err)
}
return ret
}

View File

@ -3,11 +3,15 @@ package persistence_test
import ( import (
"testing" "testing"
"time" "time"
"fmt"
"math/rand"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"github.com/go-test/deep" "github.com/go-test/deep"
"offline_twitter/scraper"
) )
@ -21,7 +25,7 @@ func TestSaveAndLoadUser(t *testing.T) {
fake_user := create_dummy_user() fake_user := create_dummy_user()
// Save the user, then reload it and ensure it's the same // Save the user, then reload it and ensure it's the same
err := profile.SaveUser(fake_user) err := profile.SaveUser(&fake_user)
if err != nil { if err != nil {
panic(err) panic(err)
} }
@ -64,7 +68,7 @@ func TestModifyUser(t *testing.T) {
fake_user.IsContentDownloaded = true fake_user.IsContentDownloaded = true
// Save the user so it can be modified // Save the user so it can be modified
err := profile.SaveUser(fake_user) err := profile.SaveUser(&fake_user)
if err != nil { if err != nil {
panic(err) panic(err)
} }
@ -81,7 +85,7 @@ func TestModifyUser(t *testing.T) {
fake_user.IsContentDownloaded = false // test No Worsening fake_user.IsContentDownloaded = false // test No Worsening
// Save the modified user // Save the modified user
err = profile.SaveUser(fake_user) err = profile.SaveUser(&fake_user)
if err != nil { if err != nil {
panic(err) panic(err)
} }
@ -150,7 +154,7 @@ func TestUserExists(t *testing.T) {
if exists { if exists {
t.Errorf("It shouldn't exist, but it does: %d", user.ID) t.Errorf("It shouldn't exist, but it does: %d", user.ID)
} }
err := profile.SaveUser(user) err := profile.SaveUser(&user)
if err != nil { if err != nil {
panic(err) panic(err)
} }
@ -178,7 +182,7 @@ func TestCheckUserContentDownloadNeeded(t *testing.T) {
user.BannerImageUrl = "banner url1" user.BannerImageUrl = "banner url1"
user.ProfileImageUrl = "profile url1" user.ProfileImageUrl = "profile url1"
user.IsContentDownloaded = false user.IsContentDownloaded = false
err := profile.SaveUser(user) err := profile.SaveUser(&user)
if err != nil { if err != nil {
panic(err) panic(err)
} }
@ -190,7 +194,7 @@ func TestCheckUserContentDownloadNeeded(t *testing.T) {
// Mark `is_content_downloaded` as "true" again // Mark `is_content_downloaded` as "true" again
user.IsContentDownloaded = true user.IsContentDownloaded = true
err = profile.SaveUser(user) err = profile.SaveUser(&user)
if err != nil { if err != nil {
panic(err) panic(err)
} }
@ -225,7 +229,7 @@ func TestFollowUnfollowUser(t *testing.T) {
user := create_dummy_user() user := create_dummy_user()
assert.False(user.IsFollowed) assert.False(user.IsFollowed)
err := profile.SaveUser(user) err := profile.SaveUser(&user)
assert.NoError(err) assert.NoError(err)
profile.SetUserFollowed(&user, true) profile.SetUserFollowed(&user, true)
@ -237,7 +241,7 @@ func TestFollowUnfollowUser(t *testing.T) {
assert.Equal(user.ID, user_reloaded.ID) // Verify it's the same user assert.Equal(user.ID, user_reloaded.ID) // Verify it's the same user
assert.True(user_reloaded.IsFollowed) assert.True(user_reloaded.IsFollowed)
err = profile.SaveUser(user) // should NOT un-set is_followed err = profile.SaveUser(&user) // should NOT un-set is_followed
assert.NoError(err) assert.NoError(err)
user_reloaded, err = profile.GetUserByHandle(user.Handle) user_reloaded, err = profile.GetUserByHandle(user.Handle)
require.NoError(t, err) require.NoError(t, err)
@ -253,3 +257,62 @@ func TestFollowUnfollowUser(t *testing.T) {
assert.Equal(user.ID, user_reloaded.ID) // Verify it's the same user assert.Equal(user.ID, user_reloaded.ID) // Verify it's the same user
assert.False(user_reloaded.IsFollowed) assert.False(user_reloaded.IsFollowed)
} }
/**
* Should create a new Unknown User from the given handle.
* The Unknown User should work consistently with other Users.
*/
func TestCreateUnknownUserWithHandle(t *testing.T) {
assert := assert.New(t)
profile_path := "test_profiles/TestUserQueries"
profile := create_or_load_profile(profile_path)
next_id := profile.NextFakeUserID()
handle := scraper.UserHandle(fmt.Sprintf("UnknownUser%d", rand.Int31()))
user := scraper.GetUnknownUserWithHandle(handle)
assert.Equal(scraper.UserID(0), user.ID)
assert.True(user.IsIdFake)
err := profile.SaveUser(&user)
assert.NoError(err)
assert.Equal(scraper.UserID(next_id + 1), user.ID)
// Ensure the change was persisted
user_reloaded, err := profile.GetUserByHandle(user.Handle)
require.NoError(t, err)
assert.Equal(handle, user_reloaded.Handle) // Verify it's the same user
assert.Equal(scraper.UserID(next_id + 1), user_reloaded.ID)
// Why not tack this test on here: make sure NextFakeUserID works as expected
assert.Equal(next_id + 2, profile.NextFakeUserID())
}
/**
* Should update the unknown User's UserID with the correct ID if it already exists
*/
func TestCreateUnknownUserWithHandleThatAlreadyExists(t *testing.T) {
assert := assert.New(t)
profile_path := "test_profiles/TestUserQueries"
profile := create_or_load_profile(profile_path)
user := create_stable_user()
unknown_user := scraper.GetUnknownUserWithHandle(user.Handle)
assert.Equal(scraper.UserID(0), unknown_user.ID)
err := profile.SaveUser(&unknown_user)
assert.NoError(err)
assert.Equal(user.ID, unknown_user.ID)
// The real user should not have been overwritten at all
user_reloaded, err := profile.GetUserByID(user.ID)
assert.NoError(err)
assert.False(user_reloaded.IsIdFake) // This one particularly
assert.Equal(user.Handle, user_reloaded.Handle)
assert.Equal(user.Bio, user_reloaded.Bio)
assert.Equal(user.DisplayName, user_reloaded.DisplayName)
}

View File

@ -21,7 +21,8 @@ func create_or_load_profile(profile_path string) persistence.Profile {
if err != nil { if err != nil {
panic(err) panic(err)
} }
err = profile.SaveUser(create_stable_user()) u := create_stable_user()
err = profile.SaveUser(&u)
if err != nil { if err != nil {
panic(err) panic(err)
} }

View File

@ -8,7 +8,7 @@ import (
) )
const ENGINE_DATABASE_VERSION = 9 const ENGINE_DATABASE_VERSION = 10
type VersionMismatchError struct { type VersionMismatchError struct {
@ -62,6 +62,9 @@ var MIGRATIONS = []string{
`alter table urls add column short_text text not null default ""`, `alter table urls add column short_text text not null default ""`,
`insert into tombstone_types (rowid, short_name, tombstone_text) values (7, 'age-restricted', 'Age-restricted adult content. This content might not be appropriate for people under 18 years old. To view this media, youll need to log in to Twitter')`, `insert into tombstone_types (rowid, short_name, tombstone_text) values (7, 'age-restricted', 'Age-restricted adult content. This content might not be appropriate for people under 18 years old. To view this media, youll need to log in to Twitter')`,
`alter table users add column is_followed boolean default 0`, `alter table users add column is_followed boolean default 0`,
`create table fake_user_sequence(latest_fake_id integer not null);
insert into fake_user_sequence values(0x4000000000000000);
alter table users add column is_id_fake boolean default 0;`,
} }
/** /**

View File

@ -46,6 +46,8 @@ type User struct {
IsFollowed bool IsFollowed bool
IsContentDownloaded bool IsContentDownloaded bool
IsNeedingFakeID bool
IsIdFake bool
} }
func (u User) String() string { func (u User) String() string {
@ -100,6 +102,27 @@ func ParseHandleFromTweetUrl(tweet_url string) (UserHandle, error) {
return UserHandle(matches[1]), nil return UserHandle(matches[1]), nil
} }
func GetUnknownUserWithHandle(handle UserHandle) User {
return User{
ID: UserID(0), // 2^62 + 1...
DisplayName: string(handle),
Handle: handle,
Bio: "<blank>",
FollowersCount: 0,
FollowingCount: 0,
Location: "<blank>",
Website:"<blank>",
JoinDate: time.Unix(0, 0),
IsVerified: false,
IsPrivate: true,
ProfileImageUrl: DEFAULT_PROFILE_IMAGE_URL,
ProfileImageLocalPath: path.Base(DEFAULT_PROFILE_IMAGE_URL),
BannerImageUrl: "",
BannerImageLocalPath: "",
IsNeedingFakeID: true,
IsIdFake: true,
}
}
// Turn an APIUser, as returned from the scraper, into a properly structured User object // Turn an APIUser, as returned from the scraper, into a properly structured User object
func ParseSingleUser(apiUser APIUser) (ret User, err error) { func ParseSingleUser(apiUser APIUser) (ret User, err error) {