Add creation of unknown users with known handles (i.e., deleted accounts)
This commit is contained in:
parent
41586bf0f8
commit
772fa247f7
@ -187,7 +187,7 @@ func (p Profile) DownloadUserContentWithInjector(u *scraper.User, downloader Med
|
||||
}
|
||||
|
||||
u.IsContentDownloaded = true
|
||||
return p.SaveUser(*u)
|
||||
return p.SaveUser(u)
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -81,7 +81,7 @@ func TestDownloadUserContent(t *testing.T) {
|
||||
user := create_dummy_user()
|
||||
|
||||
// Persist the User
|
||||
err := profile.SaveUser(user)
|
||||
err := profile.SaveUser(&user)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to save the user: %s", err.Error())
|
||||
}
|
||||
|
@ -20,6 +20,7 @@ create table users (rowid integer primary key,
|
||||
pinned_tweet_id integer check(typeof(pinned_tweet_id) = 'integer' or pinned_tweet_id = ''),
|
||||
|
||||
is_followed boolean default 0,
|
||||
is_id_fake boolean default 0,
|
||||
is_content_downloaded boolean default 0
|
||||
);
|
||||
|
||||
@ -151,3 +152,6 @@ create table hashtags (rowid integer primary key,
|
||||
create table database_version(rowid integer primary key,
|
||||
version_number integer not null unique
|
||||
);
|
||||
|
||||
create table fake_user_sequence(latest_fake_id integer not null);
|
||||
insert into fake_user_sequence values(0x4000000000000000);
|
||||
|
@ -18,7 +18,7 @@ func (p Profile) SaveTweetTrove(trove TweetTrove) {
|
||||
panic(fmt.Sprintf("Error downloading user content for user with ID %d and handle %s: %s", u.ID, u.Handle, err.Error()))
|
||||
}
|
||||
|
||||
err = p.SaveUser(u)
|
||||
err = p.SaveUser(&u)
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("Error saving user with ID %d and handle %s: %s", u.ID, u.Handle, err.Error()))
|
||||
}
|
||||
|
@ -14,12 +14,24 @@ import (
|
||||
* args:
|
||||
* - u: the User
|
||||
*/
|
||||
func (p Profile) SaveUser(u scraper.User) error {
|
||||
db := p.DB
|
||||
func (p Profile) SaveUser(u *scraper.User) error {
|
||||
if u.IsNeedingFakeID {
|
||||
err := p.DB.QueryRow("select id from users where lower(handle) = lower(?)", u.Handle).Scan(&u.ID)
|
||||
if err == sql.ErrNoRows {
|
||||
// We need to continue-- create a new fake user
|
||||
u.ID = p.NextFakeUserID()
|
||||
} else if err == nil {
|
||||
// We're done; everything is fine (ID has already been scanned into the User)
|
||||
return nil
|
||||
} else {
|
||||
// A real error occurred
|
||||
panic(fmt.Sprintf("Error checking for existence of fake user with handle %q: %s", u.Handle, err.Error()))
|
||||
}
|
||||
}
|
||||
|
||||
_, err := db.Exec(`
|
||||
insert into users (id, display_name, handle, bio, following_count, followers_count, location, website, join_date, is_private, is_verified, is_banned, profile_image_url, profile_image_local_path, banner_image_url, banner_image_local_path, pinned_tweet_id, is_content_downloaded)
|
||||
values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
_, err := p.DB.Exec(`
|
||||
insert into users (id, display_name, handle, bio, following_count, followers_count, location, website, join_date, is_private, is_verified, is_banned, profile_image_url, profile_image_local_path, banner_image_url, banner_image_local_path, pinned_tweet_id, is_content_downloaded, is_id_fake)
|
||||
values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
on conflict do update
|
||||
set bio=?,
|
||||
display_name=?,
|
||||
@ -37,7 +49,7 @@ func (p Profile) SaveUser(u scraper.User) error {
|
||||
pinned_tweet_id=?,
|
||||
is_content_downloaded=(is_content_downloaded or ?)
|
||||
`,
|
||||
u.ID, u.DisplayName, u.Handle, u.Bio, u.FollowingCount, u.FollowersCount, u.Location, u.Website, u.JoinDate.Unix(), u.IsPrivate, u.IsVerified, u.IsBanned, u.ProfileImageUrl, u.ProfileImageLocalPath, u.BannerImageUrl, u.BannerImageLocalPath, u.PinnedTweetID, u.IsContentDownloaded,
|
||||
u.ID, u.DisplayName, u.Handle, u.Bio, u.FollowingCount, u.FollowersCount, u.Location, u.Website, u.JoinDate.Unix(), u.IsPrivate, u.IsVerified, u.IsBanned, u.ProfileImageUrl, u.ProfileImageLocalPath, u.BannerImageUrl, u.BannerImageLocalPath, u.PinnedTweetID, u.IsContentDownloaded, u.IsIdFake,
|
||||
u.Bio, u.DisplayName, u.FollowingCount, u.FollowersCount, u.Location, u.Website, u.IsPrivate, u.IsVerified, u.IsBanned, u.ProfileImageUrl, u.ProfileImageLocalPath, u.BannerImageUrl, u.BannerImageLocalPath, u.PinnedTweetID, u.IsContentDownloaded,
|
||||
)
|
||||
if err != nil {
|
||||
@ -208,3 +220,16 @@ func (p Profile) SetUserFollowed(user *scraper.User, is_followed bool) {
|
||||
}
|
||||
user.IsFollowed = is_followed
|
||||
}
|
||||
|
||||
func (p Profile) NextFakeUserID() scraper.UserID {
|
||||
_, err := p.DB.Exec("update fake_user_sequence set latest_fake_id = latest_fake_id + 1")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
var ret scraper.UserID
|
||||
err = p.DB.QueryRow("select latest_fake_id from fake_user_sequence").Scan(&ret)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
@ -3,11 +3,15 @@ package persistence_test
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/go-test/deep"
|
||||
|
||||
"offline_twitter/scraper"
|
||||
)
|
||||
|
||||
|
||||
@ -21,7 +25,7 @@ func TestSaveAndLoadUser(t *testing.T) {
|
||||
fake_user := create_dummy_user()
|
||||
|
||||
// Save the user, then reload it and ensure it's the same
|
||||
err := profile.SaveUser(fake_user)
|
||||
err := profile.SaveUser(&fake_user)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
@ -64,7 +68,7 @@ func TestModifyUser(t *testing.T) {
|
||||
fake_user.IsContentDownloaded = true
|
||||
|
||||
// Save the user so it can be modified
|
||||
err := profile.SaveUser(fake_user)
|
||||
err := profile.SaveUser(&fake_user)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
@ -81,7 +85,7 @@ func TestModifyUser(t *testing.T) {
|
||||
fake_user.IsContentDownloaded = false // test No Worsening
|
||||
|
||||
// Save the modified user
|
||||
err = profile.SaveUser(fake_user)
|
||||
err = profile.SaveUser(&fake_user)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
@ -150,7 +154,7 @@ func TestUserExists(t *testing.T) {
|
||||
if exists {
|
||||
t.Errorf("It shouldn't exist, but it does: %d", user.ID)
|
||||
}
|
||||
err := profile.SaveUser(user)
|
||||
err := profile.SaveUser(&user)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
@ -178,7 +182,7 @@ func TestCheckUserContentDownloadNeeded(t *testing.T) {
|
||||
user.BannerImageUrl = "banner url1"
|
||||
user.ProfileImageUrl = "profile url1"
|
||||
user.IsContentDownloaded = false
|
||||
err := profile.SaveUser(user)
|
||||
err := profile.SaveUser(&user)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
@ -190,7 +194,7 @@ func TestCheckUserContentDownloadNeeded(t *testing.T) {
|
||||
|
||||
// Mark `is_content_downloaded` as "true" again
|
||||
user.IsContentDownloaded = true
|
||||
err = profile.SaveUser(user)
|
||||
err = profile.SaveUser(&user)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
@ -225,7 +229,7 @@ func TestFollowUnfollowUser(t *testing.T) {
|
||||
|
||||
user := create_dummy_user()
|
||||
assert.False(user.IsFollowed)
|
||||
err := profile.SaveUser(user)
|
||||
err := profile.SaveUser(&user)
|
||||
assert.NoError(err)
|
||||
|
||||
profile.SetUserFollowed(&user, true)
|
||||
@ -237,7 +241,7 @@ func TestFollowUnfollowUser(t *testing.T) {
|
||||
assert.Equal(user.ID, user_reloaded.ID) // Verify it's the same user
|
||||
assert.True(user_reloaded.IsFollowed)
|
||||
|
||||
err = profile.SaveUser(user) // should NOT un-set is_followed
|
||||
err = profile.SaveUser(&user) // should NOT un-set is_followed
|
||||
assert.NoError(err)
|
||||
user_reloaded, err = profile.GetUserByHandle(user.Handle)
|
||||
require.NoError(t, err)
|
||||
@ -253,3 +257,62 @@ func TestFollowUnfollowUser(t *testing.T) {
|
||||
assert.Equal(user.ID, user_reloaded.ID) // Verify it's the same user
|
||||
assert.False(user_reloaded.IsFollowed)
|
||||
}
|
||||
|
||||
/**
|
||||
* Should create a new Unknown User from the given handle.
|
||||
* The Unknown User should work consistently with other Users.
|
||||
*/
|
||||
func TestCreateUnknownUserWithHandle(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
profile_path := "test_profiles/TestUserQueries"
|
||||
profile := create_or_load_profile(profile_path)
|
||||
|
||||
next_id := profile.NextFakeUserID()
|
||||
|
||||
handle := scraper.UserHandle(fmt.Sprintf("UnknownUser%d", rand.Int31()))
|
||||
user := scraper.GetUnknownUserWithHandle(handle)
|
||||
assert.Equal(scraper.UserID(0), user.ID)
|
||||
assert.True(user.IsIdFake)
|
||||
|
||||
err := profile.SaveUser(&user)
|
||||
assert.NoError(err)
|
||||
assert.Equal(scraper.UserID(next_id + 1), user.ID)
|
||||
|
||||
// Ensure the change was persisted
|
||||
user_reloaded, err := profile.GetUserByHandle(user.Handle)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(handle, user_reloaded.Handle) // Verify it's the same user
|
||||
assert.Equal(scraper.UserID(next_id + 1), user_reloaded.ID)
|
||||
|
||||
// Why not tack this test on here: make sure NextFakeUserID works as expected
|
||||
assert.Equal(next_id + 2, profile.NextFakeUserID())
|
||||
}
|
||||
|
||||
/**
|
||||
* Should update the unknown User's UserID with the correct ID if it already exists
|
||||
*/
|
||||
func TestCreateUnknownUserWithHandleThatAlreadyExists(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
profile_path := "test_profiles/TestUserQueries"
|
||||
profile := create_or_load_profile(profile_path)
|
||||
|
||||
user := create_stable_user()
|
||||
|
||||
|
||||
unknown_user := scraper.GetUnknownUserWithHandle(user.Handle)
|
||||
assert.Equal(scraper.UserID(0), unknown_user.ID)
|
||||
|
||||
err := profile.SaveUser(&unknown_user)
|
||||
assert.NoError(err)
|
||||
assert.Equal(user.ID, unknown_user.ID)
|
||||
|
||||
// The real user should not have been overwritten at all
|
||||
user_reloaded, err := profile.GetUserByID(user.ID)
|
||||
assert.NoError(err)
|
||||
assert.False(user_reloaded.IsIdFake) // This one particularly
|
||||
assert.Equal(user.Handle, user_reloaded.Handle)
|
||||
assert.Equal(user.Bio, user_reloaded.Bio)
|
||||
assert.Equal(user.DisplayName, user_reloaded.DisplayName)
|
||||
}
|
||||
|
@ -21,7 +21,8 @@ func create_or_load_profile(profile_path string) persistence.Profile {
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
err = profile.SaveUser(create_stable_user())
|
||||
u := create_stable_user()
|
||||
err = profile.SaveUser(&u)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
@ -8,7 +8,7 @@ import (
|
||||
)
|
||||
|
||||
|
||||
const ENGINE_DATABASE_VERSION = 9
|
||||
const ENGINE_DATABASE_VERSION = 10
|
||||
|
||||
|
||||
type VersionMismatchError struct {
|
||||
@ -62,6 +62,9 @@ var MIGRATIONS = []string{
|
||||
`alter table urls add column short_text text not null default ""`,
|
||||
`insert into tombstone_types (rowid, short_name, tombstone_text) values (7, 'age-restricted', 'Age-restricted adult content. This content might not be appropriate for people under 18 years old. To view this media, you’ll need to log in to Twitter')`,
|
||||
`alter table users add column is_followed boolean default 0`,
|
||||
`create table fake_user_sequence(latest_fake_id integer not null);
|
||||
insert into fake_user_sequence values(0x4000000000000000);
|
||||
alter table users add column is_id_fake boolean default 0;`,
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -46,6 +46,8 @@ type User struct {
|
||||
|
||||
IsFollowed bool
|
||||
IsContentDownloaded bool
|
||||
IsNeedingFakeID bool
|
||||
IsIdFake bool
|
||||
}
|
||||
|
||||
func (u User) String() string {
|
||||
@ -100,6 +102,27 @@ func ParseHandleFromTweetUrl(tweet_url string) (UserHandle, error) {
|
||||
return UserHandle(matches[1]), nil
|
||||
}
|
||||
|
||||
func GetUnknownUserWithHandle(handle UserHandle) User {
|
||||
return User{
|
||||
ID: UserID(0), // 2^62 + 1...
|
||||
DisplayName: string(handle),
|
||||
Handle: handle,
|
||||
Bio: "<blank>",
|
||||
FollowersCount: 0,
|
||||
FollowingCount: 0,
|
||||
Location: "<blank>",
|
||||
Website:"<blank>",
|
||||
JoinDate: time.Unix(0, 0),
|
||||
IsVerified: false,
|
||||
IsPrivate: true,
|
||||
ProfileImageUrl: DEFAULT_PROFILE_IMAGE_URL,
|
||||
ProfileImageLocalPath: path.Base(DEFAULT_PROFILE_IMAGE_URL),
|
||||
BannerImageUrl: "",
|
||||
BannerImageLocalPath: "",
|
||||
IsNeedingFakeID: true,
|
||||
IsIdFake: true,
|
||||
}
|
||||
}
|
||||
|
||||
// Turn an APIUser, as returned from the scraper, into a properly structured User object
|
||||
func ParseSingleUser(apiUser APIUser) (ret User, err error) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user