Add creation of unknown users with known handles (i.e., deleted accounts)

This commit is contained in:
Alessio 2022-02-26 22:09:27 -08:00
parent 41586bf0f8
commit 772fa247f7
9 changed files with 138 additions and 19 deletions

View File

@ -187,7 +187,7 @@ func (p Profile) DownloadUserContentWithInjector(u *scraper.User, downloader Med
}
u.IsContentDownloaded = true
return p.SaveUser(*u)
return p.SaveUser(u)
}
/**

View File

@ -81,7 +81,7 @@ func TestDownloadUserContent(t *testing.T) {
user := create_dummy_user()
// Persist the User
err := profile.SaveUser(user)
err := profile.SaveUser(&user)
if err != nil {
t.Fatalf("Failed to save the user: %s", err.Error())
}

View File

@ -20,6 +20,7 @@ create table users (rowid integer primary key,
pinned_tweet_id integer check(typeof(pinned_tweet_id) = 'integer' or pinned_tweet_id = ''),
is_followed boolean default 0,
is_id_fake boolean default 0,
is_content_downloaded boolean default 0
);
@ -151,3 +152,6 @@ create table hashtags (rowid integer primary key,
create table database_version(rowid integer primary key,
version_number integer not null unique
);
create table fake_user_sequence(latest_fake_id integer not null);
insert into fake_user_sequence values(0x4000000000000000);

View File

@ -18,7 +18,7 @@ func (p Profile) SaveTweetTrove(trove TweetTrove) {
panic(fmt.Sprintf("Error downloading user content for user with ID %d and handle %s: %s", u.ID, u.Handle, err.Error()))
}
err = p.SaveUser(u)
err = p.SaveUser(&u)
if err != nil {
panic(fmt.Sprintf("Error saving user with ID %d and handle %s: %s", u.ID, u.Handle, err.Error()))
}

View File

@ -14,12 +14,24 @@ import (
* args:
* - u: the User
*/
func (p Profile) SaveUser(u scraper.User) error {
db := p.DB
func (p Profile) SaveUser(u *scraper.User) error {
if u.IsNeedingFakeID {
err := p.DB.QueryRow("select id from users where lower(handle) = lower(?)", u.Handle).Scan(&u.ID)
if err == sql.ErrNoRows {
// We need to continue-- create a new fake user
u.ID = p.NextFakeUserID()
} else if err == nil {
// We're done; everything is fine (ID has already been scanned into the User)
return nil
} else {
// A real error occurred
panic(fmt.Sprintf("Error checking for existence of fake user with handle %q: %s", u.Handle, err.Error()))
}
}
_, err := db.Exec(`
insert into users (id, display_name, handle, bio, following_count, followers_count, location, website, join_date, is_private, is_verified, is_banned, profile_image_url, profile_image_local_path, banner_image_url, banner_image_local_path, pinned_tweet_id, is_content_downloaded)
values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
_, err := p.DB.Exec(`
insert into users (id, display_name, handle, bio, following_count, followers_count, location, website, join_date, is_private, is_verified, is_banned, profile_image_url, profile_image_local_path, banner_image_url, banner_image_local_path, pinned_tweet_id, is_content_downloaded, is_id_fake)
values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
on conflict do update
set bio=?,
display_name=?,
@ -37,7 +49,7 @@ func (p Profile) SaveUser(u scraper.User) error {
pinned_tweet_id=?,
is_content_downloaded=(is_content_downloaded or ?)
`,
u.ID, u.DisplayName, u.Handle, u.Bio, u.FollowingCount, u.FollowersCount, u.Location, u.Website, u.JoinDate.Unix(), u.IsPrivate, u.IsVerified, u.IsBanned, u.ProfileImageUrl, u.ProfileImageLocalPath, u.BannerImageUrl, u.BannerImageLocalPath, u.PinnedTweetID, u.IsContentDownloaded,
u.ID, u.DisplayName, u.Handle, u.Bio, u.FollowingCount, u.FollowersCount, u.Location, u.Website, u.JoinDate.Unix(), u.IsPrivate, u.IsVerified, u.IsBanned, u.ProfileImageUrl, u.ProfileImageLocalPath, u.BannerImageUrl, u.BannerImageLocalPath, u.PinnedTweetID, u.IsContentDownloaded, u.IsIdFake,
u.Bio, u.DisplayName, u.FollowingCount, u.FollowersCount, u.Location, u.Website, u.IsPrivate, u.IsVerified, u.IsBanned, u.ProfileImageUrl, u.ProfileImageLocalPath, u.BannerImageUrl, u.BannerImageLocalPath, u.PinnedTweetID, u.IsContentDownloaded,
)
if err != nil {
@ -208,3 +220,16 @@ func (p Profile) SetUserFollowed(user *scraper.User, is_followed bool) {
}
user.IsFollowed = is_followed
}
func (p Profile) NextFakeUserID() scraper.UserID {
_, err := p.DB.Exec("update fake_user_sequence set latest_fake_id = latest_fake_id + 1")
if err != nil {
panic(err)
}
var ret scraper.UserID
err = p.DB.QueryRow("select latest_fake_id from fake_user_sequence").Scan(&ret)
if err != nil {
panic(err)
}
return ret
}

View File

@ -3,11 +3,15 @@ package persistence_test
import (
"testing"
"time"
"fmt"
"math/rand"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/go-test/deep"
"offline_twitter/scraper"
)
@ -21,7 +25,7 @@ func TestSaveAndLoadUser(t *testing.T) {
fake_user := create_dummy_user()
// Save the user, then reload it and ensure it's the same
err := profile.SaveUser(fake_user)
err := profile.SaveUser(&fake_user)
if err != nil {
panic(err)
}
@ -64,7 +68,7 @@ func TestModifyUser(t *testing.T) {
fake_user.IsContentDownloaded = true
// Save the user so it can be modified
err := profile.SaveUser(fake_user)
err := profile.SaveUser(&fake_user)
if err != nil {
panic(err)
}
@ -81,7 +85,7 @@ func TestModifyUser(t *testing.T) {
fake_user.IsContentDownloaded = false // test No Worsening
// Save the modified user
err = profile.SaveUser(fake_user)
err = profile.SaveUser(&fake_user)
if err != nil {
panic(err)
}
@ -150,7 +154,7 @@ func TestUserExists(t *testing.T) {
if exists {
t.Errorf("It shouldn't exist, but it does: %d", user.ID)
}
err := profile.SaveUser(user)
err := profile.SaveUser(&user)
if err != nil {
panic(err)
}
@ -178,7 +182,7 @@ func TestCheckUserContentDownloadNeeded(t *testing.T) {
user.BannerImageUrl = "banner url1"
user.ProfileImageUrl = "profile url1"
user.IsContentDownloaded = false
err := profile.SaveUser(user)
err := profile.SaveUser(&user)
if err != nil {
panic(err)
}
@ -190,7 +194,7 @@ func TestCheckUserContentDownloadNeeded(t *testing.T) {
// Mark `is_content_downloaded` as "true" again
user.IsContentDownloaded = true
err = profile.SaveUser(user)
err = profile.SaveUser(&user)
if err != nil {
panic(err)
}
@ -225,7 +229,7 @@ func TestFollowUnfollowUser(t *testing.T) {
user := create_dummy_user()
assert.False(user.IsFollowed)
err := profile.SaveUser(user)
err := profile.SaveUser(&user)
assert.NoError(err)
profile.SetUserFollowed(&user, true)
@ -237,7 +241,7 @@ func TestFollowUnfollowUser(t *testing.T) {
assert.Equal(user.ID, user_reloaded.ID) // Verify it's the same user
assert.True(user_reloaded.IsFollowed)
err = profile.SaveUser(user) // should NOT un-set is_followed
err = profile.SaveUser(&user) // should NOT un-set is_followed
assert.NoError(err)
user_reloaded, err = profile.GetUserByHandle(user.Handle)
require.NoError(t, err)
@ -253,3 +257,62 @@ func TestFollowUnfollowUser(t *testing.T) {
assert.Equal(user.ID, user_reloaded.ID) // Verify it's the same user
assert.False(user_reloaded.IsFollowed)
}
/**
* Should create a new Unknown User from the given handle.
* The Unknown User should work consistently with other Users.
*/
func TestCreateUnknownUserWithHandle(t *testing.T) {
assert := assert.New(t)
profile_path := "test_profiles/TestUserQueries"
profile := create_or_load_profile(profile_path)
next_id := profile.NextFakeUserID()
handle := scraper.UserHandle(fmt.Sprintf("UnknownUser%d", rand.Int31()))
user := scraper.GetUnknownUserWithHandle(handle)
assert.Equal(scraper.UserID(0), user.ID)
assert.True(user.IsIdFake)
err := profile.SaveUser(&user)
assert.NoError(err)
assert.Equal(scraper.UserID(next_id + 1), user.ID)
// Ensure the change was persisted
user_reloaded, err := profile.GetUserByHandle(user.Handle)
require.NoError(t, err)
assert.Equal(handle, user_reloaded.Handle) // Verify it's the same user
assert.Equal(scraper.UserID(next_id + 1), user_reloaded.ID)
// Why not tack this test on here: make sure NextFakeUserID works as expected
assert.Equal(next_id + 2, profile.NextFakeUserID())
}
/**
* Should update the unknown User's UserID with the correct ID if it already exists
*/
func TestCreateUnknownUserWithHandleThatAlreadyExists(t *testing.T) {
assert := assert.New(t)
profile_path := "test_profiles/TestUserQueries"
profile := create_or_load_profile(profile_path)
user := create_stable_user()
unknown_user := scraper.GetUnknownUserWithHandle(user.Handle)
assert.Equal(scraper.UserID(0), unknown_user.ID)
err := profile.SaveUser(&unknown_user)
assert.NoError(err)
assert.Equal(user.ID, unknown_user.ID)
// The real user should not have been overwritten at all
user_reloaded, err := profile.GetUserByID(user.ID)
assert.NoError(err)
assert.False(user_reloaded.IsIdFake) // This one particularly
assert.Equal(user.Handle, user_reloaded.Handle)
assert.Equal(user.Bio, user_reloaded.Bio)
assert.Equal(user.DisplayName, user_reloaded.DisplayName)
}

View File

@ -21,7 +21,8 @@ func create_or_load_profile(profile_path string) persistence.Profile {
if err != nil {
panic(err)
}
err = profile.SaveUser(create_stable_user())
u := create_stable_user()
err = profile.SaveUser(&u)
if err != nil {
panic(err)
}

View File

@ -8,7 +8,7 @@ import (
)
const ENGINE_DATABASE_VERSION = 9
const ENGINE_DATABASE_VERSION = 10
type VersionMismatchError struct {
@ -62,6 +62,9 @@ var MIGRATIONS = []string{
`alter table urls add column short_text text not null default ""`,
`insert into tombstone_types (rowid, short_name, tombstone_text) values (7, 'age-restricted', 'Age-restricted adult content. This content might not be appropriate for people under 18 years old. To view this media, youll need to log in to Twitter')`,
`alter table users add column is_followed boolean default 0`,
`create table fake_user_sequence(latest_fake_id integer not null);
insert into fake_user_sequence values(0x4000000000000000);
alter table users add column is_id_fake boolean default 0;`,
}
/**

View File

@ -46,6 +46,8 @@ type User struct {
IsFollowed bool
IsContentDownloaded bool
IsNeedingFakeID bool
IsIdFake bool
}
func (u User) String() string {
@ -100,6 +102,27 @@ func ParseHandleFromTweetUrl(tweet_url string) (UserHandle, error) {
return UserHandle(matches[1]), nil
}
func GetUnknownUserWithHandle(handle UserHandle) User {
return User{
ID: UserID(0), // 2^62 + 1...
DisplayName: string(handle),
Handle: handle,
Bio: "<blank>",
FollowersCount: 0,
FollowingCount: 0,
Location: "<blank>",
Website:"<blank>",
JoinDate: time.Unix(0, 0),
IsVerified: false,
IsPrivate: true,
ProfileImageUrl: DEFAULT_PROFILE_IMAGE_URL,
ProfileImageLocalPath: path.Base(DEFAULT_PROFILE_IMAGE_URL),
BannerImageUrl: "",
BannerImageLocalPath: "",
IsNeedingFakeID: true,
IsIdFake: true,
}
}
// Turn an APIUser, as returned from the scraper, into a properly structured User object
func ParseSingleUser(apiUser APIUser) (ret User, err error) {