Add creation of unknown users with known handles (i.e., deleted accounts)
This commit is contained in:
parent
41586bf0f8
commit
772fa247f7
@ -187,7 +187,7 @@ func (p Profile) DownloadUserContentWithInjector(u *scraper.User, downloader Med
|
|||||||
}
|
}
|
||||||
|
|
||||||
u.IsContentDownloaded = true
|
u.IsContentDownloaded = true
|
||||||
return p.SaveUser(*u)
|
return p.SaveUser(u)
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -81,7 +81,7 @@ func TestDownloadUserContent(t *testing.T) {
|
|||||||
user := create_dummy_user()
|
user := create_dummy_user()
|
||||||
|
|
||||||
// Persist the User
|
// Persist the User
|
||||||
err := profile.SaveUser(user)
|
err := profile.SaveUser(&user)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Failed to save the user: %s", err.Error())
|
t.Fatalf("Failed to save the user: %s", err.Error())
|
||||||
}
|
}
|
||||||
|
@ -20,6 +20,7 @@ create table users (rowid integer primary key,
|
|||||||
pinned_tweet_id integer check(typeof(pinned_tweet_id) = 'integer' or pinned_tweet_id = ''),
|
pinned_tweet_id integer check(typeof(pinned_tweet_id) = 'integer' or pinned_tweet_id = ''),
|
||||||
|
|
||||||
is_followed boolean default 0,
|
is_followed boolean default 0,
|
||||||
|
is_id_fake boolean default 0,
|
||||||
is_content_downloaded boolean default 0
|
is_content_downloaded boolean default 0
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -151,3 +152,6 @@ create table hashtags (rowid integer primary key,
|
|||||||
create table database_version(rowid integer primary key,
|
create table database_version(rowid integer primary key,
|
||||||
version_number integer not null unique
|
version_number integer not null unique
|
||||||
);
|
);
|
||||||
|
|
||||||
|
create table fake_user_sequence(latest_fake_id integer not null);
|
||||||
|
insert into fake_user_sequence values(0x4000000000000000);
|
||||||
|
@ -18,7 +18,7 @@ func (p Profile) SaveTweetTrove(trove TweetTrove) {
|
|||||||
panic(fmt.Sprintf("Error downloading user content for user with ID %d and handle %s: %s", u.ID, u.Handle, err.Error()))
|
panic(fmt.Sprintf("Error downloading user content for user with ID %d and handle %s: %s", u.ID, u.Handle, err.Error()))
|
||||||
}
|
}
|
||||||
|
|
||||||
err = p.SaveUser(u)
|
err = p.SaveUser(&u)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(fmt.Sprintf("Error saving user with ID %d and handle %s: %s", u.ID, u.Handle, err.Error()))
|
panic(fmt.Sprintf("Error saving user with ID %d and handle %s: %s", u.ID, u.Handle, err.Error()))
|
||||||
}
|
}
|
||||||
|
@ -14,12 +14,24 @@ import (
|
|||||||
* args:
|
* args:
|
||||||
* - u: the User
|
* - u: the User
|
||||||
*/
|
*/
|
||||||
func (p Profile) SaveUser(u scraper.User) error {
|
func (p Profile) SaveUser(u *scraper.User) error {
|
||||||
db := p.DB
|
if u.IsNeedingFakeID {
|
||||||
|
err := p.DB.QueryRow("select id from users where lower(handle) = lower(?)", u.Handle).Scan(&u.ID)
|
||||||
|
if err == sql.ErrNoRows {
|
||||||
|
// We need to continue-- create a new fake user
|
||||||
|
u.ID = p.NextFakeUserID()
|
||||||
|
} else if err == nil {
|
||||||
|
// We're done; everything is fine (ID has already been scanned into the User)
|
||||||
|
return nil
|
||||||
|
} else {
|
||||||
|
// A real error occurred
|
||||||
|
panic(fmt.Sprintf("Error checking for existence of fake user with handle %q: %s", u.Handle, err.Error()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
_, err := db.Exec(`
|
_, err := p.DB.Exec(`
|
||||||
insert into users (id, display_name, handle, bio, following_count, followers_count, location, website, join_date, is_private, is_verified, is_banned, profile_image_url, profile_image_local_path, banner_image_url, banner_image_local_path, pinned_tweet_id, is_content_downloaded)
|
insert into users (id, display_name, handle, bio, following_count, followers_count, location, website, join_date, is_private, is_verified, is_banned, profile_image_url, profile_image_local_path, banner_image_url, banner_image_local_path, pinned_tweet_id, is_content_downloaded, is_id_fake)
|
||||||
values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||||
on conflict do update
|
on conflict do update
|
||||||
set bio=?,
|
set bio=?,
|
||||||
display_name=?,
|
display_name=?,
|
||||||
@ -37,7 +49,7 @@ func (p Profile) SaveUser(u scraper.User) error {
|
|||||||
pinned_tweet_id=?,
|
pinned_tweet_id=?,
|
||||||
is_content_downloaded=(is_content_downloaded or ?)
|
is_content_downloaded=(is_content_downloaded or ?)
|
||||||
`,
|
`,
|
||||||
u.ID, u.DisplayName, u.Handle, u.Bio, u.FollowingCount, u.FollowersCount, u.Location, u.Website, u.JoinDate.Unix(), u.IsPrivate, u.IsVerified, u.IsBanned, u.ProfileImageUrl, u.ProfileImageLocalPath, u.BannerImageUrl, u.BannerImageLocalPath, u.PinnedTweetID, u.IsContentDownloaded,
|
u.ID, u.DisplayName, u.Handle, u.Bio, u.FollowingCount, u.FollowersCount, u.Location, u.Website, u.JoinDate.Unix(), u.IsPrivate, u.IsVerified, u.IsBanned, u.ProfileImageUrl, u.ProfileImageLocalPath, u.BannerImageUrl, u.BannerImageLocalPath, u.PinnedTweetID, u.IsContentDownloaded, u.IsIdFake,
|
||||||
u.Bio, u.DisplayName, u.FollowingCount, u.FollowersCount, u.Location, u.Website, u.IsPrivate, u.IsVerified, u.IsBanned, u.ProfileImageUrl, u.ProfileImageLocalPath, u.BannerImageUrl, u.BannerImageLocalPath, u.PinnedTweetID, u.IsContentDownloaded,
|
u.Bio, u.DisplayName, u.FollowingCount, u.FollowersCount, u.Location, u.Website, u.IsPrivate, u.IsVerified, u.IsBanned, u.ProfileImageUrl, u.ProfileImageLocalPath, u.BannerImageUrl, u.BannerImageLocalPath, u.PinnedTweetID, u.IsContentDownloaded,
|
||||||
)
|
)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -208,3 +220,16 @@ func (p Profile) SetUserFollowed(user *scraper.User, is_followed bool) {
|
|||||||
}
|
}
|
||||||
user.IsFollowed = is_followed
|
user.IsFollowed = is_followed
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (p Profile) NextFakeUserID() scraper.UserID {
|
||||||
|
_, err := p.DB.Exec("update fake_user_sequence set latest_fake_id = latest_fake_id + 1")
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
var ret scraper.UserID
|
||||||
|
err = p.DB.QueryRow("select latest_fake_id from fake_user_sequence").Scan(&ret)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
return ret
|
||||||
|
}
|
||||||
|
@ -3,11 +3,15 @@ package persistence_test
|
|||||||
import (
|
import (
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
"fmt"
|
||||||
|
"math/rand"
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
"github.com/go-test/deep"
|
"github.com/go-test/deep"
|
||||||
|
|
||||||
|
"offline_twitter/scraper"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -21,7 +25,7 @@ func TestSaveAndLoadUser(t *testing.T) {
|
|||||||
fake_user := create_dummy_user()
|
fake_user := create_dummy_user()
|
||||||
|
|
||||||
// Save the user, then reload it and ensure it's the same
|
// Save the user, then reload it and ensure it's the same
|
||||||
err := profile.SaveUser(fake_user)
|
err := profile.SaveUser(&fake_user)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
@ -64,7 +68,7 @@ func TestModifyUser(t *testing.T) {
|
|||||||
fake_user.IsContentDownloaded = true
|
fake_user.IsContentDownloaded = true
|
||||||
|
|
||||||
// Save the user so it can be modified
|
// Save the user so it can be modified
|
||||||
err := profile.SaveUser(fake_user)
|
err := profile.SaveUser(&fake_user)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
@ -81,7 +85,7 @@ func TestModifyUser(t *testing.T) {
|
|||||||
fake_user.IsContentDownloaded = false // test No Worsening
|
fake_user.IsContentDownloaded = false // test No Worsening
|
||||||
|
|
||||||
// Save the modified user
|
// Save the modified user
|
||||||
err = profile.SaveUser(fake_user)
|
err = profile.SaveUser(&fake_user)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
@ -150,7 +154,7 @@ func TestUserExists(t *testing.T) {
|
|||||||
if exists {
|
if exists {
|
||||||
t.Errorf("It shouldn't exist, but it does: %d", user.ID)
|
t.Errorf("It shouldn't exist, but it does: %d", user.ID)
|
||||||
}
|
}
|
||||||
err := profile.SaveUser(user)
|
err := profile.SaveUser(&user)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
@ -178,7 +182,7 @@ func TestCheckUserContentDownloadNeeded(t *testing.T) {
|
|||||||
user.BannerImageUrl = "banner url1"
|
user.BannerImageUrl = "banner url1"
|
||||||
user.ProfileImageUrl = "profile url1"
|
user.ProfileImageUrl = "profile url1"
|
||||||
user.IsContentDownloaded = false
|
user.IsContentDownloaded = false
|
||||||
err := profile.SaveUser(user)
|
err := profile.SaveUser(&user)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
@ -190,7 +194,7 @@ func TestCheckUserContentDownloadNeeded(t *testing.T) {
|
|||||||
|
|
||||||
// Mark `is_content_downloaded` as "true" again
|
// Mark `is_content_downloaded` as "true" again
|
||||||
user.IsContentDownloaded = true
|
user.IsContentDownloaded = true
|
||||||
err = profile.SaveUser(user)
|
err = profile.SaveUser(&user)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
@ -225,7 +229,7 @@ func TestFollowUnfollowUser(t *testing.T) {
|
|||||||
|
|
||||||
user := create_dummy_user()
|
user := create_dummy_user()
|
||||||
assert.False(user.IsFollowed)
|
assert.False(user.IsFollowed)
|
||||||
err := profile.SaveUser(user)
|
err := profile.SaveUser(&user)
|
||||||
assert.NoError(err)
|
assert.NoError(err)
|
||||||
|
|
||||||
profile.SetUserFollowed(&user, true)
|
profile.SetUserFollowed(&user, true)
|
||||||
@ -237,7 +241,7 @@ func TestFollowUnfollowUser(t *testing.T) {
|
|||||||
assert.Equal(user.ID, user_reloaded.ID) // Verify it's the same user
|
assert.Equal(user.ID, user_reloaded.ID) // Verify it's the same user
|
||||||
assert.True(user_reloaded.IsFollowed)
|
assert.True(user_reloaded.IsFollowed)
|
||||||
|
|
||||||
err = profile.SaveUser(user) // should NOT un-set is_followed
|
err = profile.SaveUser(&user) // should NOT un-set is_followed
|
||||||
assert.NoError(err)
|
assert.NoError(err)
|
||||||
user_reloaded, err = profile.GetUserByHandle(user.Handle)
|
user_reloaded, err = profile.GetUserByHandle(user.Handle)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
@ -253,3 +257,62 @@ func TestFollowUnfollowUser(t *testing.T) {
|
|||||||
assert.Equal(user.ID, user_reloaded.ID) // Verify it's the same user
|
assert.Equal(user.ID, user_reloaded.ID) // Verify it's the same user
|
||||||
assert.False(user_reloaded.IsFollowed)
|
assert.False(user_reloaded.IsFollowed)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Should create a new Unknown User from the given handle.
|
||||||
|
* The Unknown User should work consistently with other Users.
|
||||||
|
*/
|
||||||
|
func TestCreateUnknownUserWithHandle(t *testing.T) {
|
||||||
|
assert := assert.New(t)
|
||||||
|
|
||||||
|
profile_path := "test_profiles/TestUserQueries"
|
||||||
|
profile := create_or_load_profile(profile_path)
|
||||||
|
|
||||||
|
next_id := profile.NextFakeUserID()
|
||||||
|
|
||||||
|
handle := scraper.UserHandle(fmt.Sprintf("UnknownUser%d", rand.Int31()))
|
||||||
|
user := scraper.GetUnknownUserWithHandle(handle)
|
||||||
|
assert.Equal(scraper.UserID(0), user.ID)
|
||||||
|
assert.True(user.IsIdFake)
|
||||||
|
|
||||||
|
err := profile.SaveUser(&user)
|
||||||
|
assert.NoError(err)
|
||||||
|
assert.Equal(scraper.UserID(next_id + 1), user.ID)
|
||||||
|
|
||||||
|
// Ensure the change was persisted
|
||||||
|
user_reloaded, err := profile.GetUserByHandle(user.Handle)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(handle, user_reloaded.Handle) // Verify it's the same user
|
||||||
|
assert.Equal(scraper.UserID(next_id + 1), user_reloaded.ID)
|
||||||
|
|
||||||
|
// Why not tack this test on here: make sure NextFakeUserID works as expected
|
||||||
|
assert.Equal(next_id + 2, profile.NextFakeUserID())
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Should update the unknown User's UserID with the correct ID if it already exists
|
||||||
|
*/
|
||||||
|
func TestCreateUnknownUserWithHandleThatAlreadyExists(t *testing.T) {
|
||||||
|
assert := assert.New(t)
|
||||||
|
|
||||||
|
profile_path := "test_profiles/TestUserQueries"
|
||||||
|
profile := create_or_load_profile(profile_path)
|
||||||
|
|
||||||
|
user := create_stable_user()
|
||||||
|
|
||||||
|
|
||||||
|
unknown_user := scraper.GetUnknownUserWithHandle(user.Handle)
|
||||||
|
assert.Equal(scraper.UserID(0), unknown_user.ID)
|
||||||
|
|
||||||
|
err := profile.SaveUser(&unknown_user)
|
||||||
|
assert.NoError(err)
|
||||||
|
assert.Equal(user.ID, unknown_user.ID)
|
||||||
|
|
||||||
|
// The real user should not have been overwritten at all
|
||||||
|
user_reloaded, err := profile.GetUserByID(user.ID)
|
||||||
|
assert.NoError(err)
|
||||||
|
assert.False(user_reloaded.IsIdFake) // This one particularly
|
||||||
|
assert.Equal(user.Handle, user_reloaded.Handle)
|
||||||
|
assert.Equal(user.Bio, user_reloaded.Bio)
|
||||||
|
assert.Equal(user.DisplayName, user_reloaded.DisplayName)
|
||||||
|
}
|
||||||
|
@ -21,7 +21,8 @@ func create_or_load_profile(profile_path string) persistence.Profile {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
err = profile.SaveUser(create_stable_user())
|
u := create_stable_user()
|
||||||
|
err = profile.SaveUser(&u)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
|
@ -8,7 +8,7 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
const ENGINE_DATABASE_VERSION = 9
|
const ENGINE_DATABASE_VERSION = 10
|
||||||
|
|
||||||
|
|
||||||
type VersionMismatchError struct {
|
type VersionMismatchError struct {
|
||||||
@ -62,6 +62,9 @@ var MIGRATIONS = []string{
|
|||||||
`alter table urls add column short_text text not null default ""`,
|
`alter table urls add column short_text text not null default ""`,
|
||||||
`insert into tombstone_types (rowid, short_name, tombstone_text) values (7, 'age-restricted', 'Age-restricted adult content. This content might not be appropriate for people under 18 years old. To view this media, you’ll need to log in to Twitter')`,
|
`insert into tombstone_types (rowid, short_name, tombstone_text) values (7, 'age-restricted', 'Age-restricted adult content. This content might not be appropriate for people under 18 years old. To view this media, you’ll need to log in to Twitter')`,
|
||||||
`alter table users add column is_followed boolean default 0`,
|
`alter table users add column is_followed boolean default 0`,
|
||||||
|
`create table fake_user_sequence(latest_fake_id integer not null);
|
||||||
|
insert into fake_user_sequence values(0x4000000000000000);
|
||||||
|
alter table users add column is_id_fake boolean default 0;`,
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -46,6 +46,8 @@ type User struct {
|
|||||||
|
|
||||||
IsFollowed bool
|
IsFollowed bool
|
||||||
IsContentDownloaded bool
|
IsContentDownloaded bool
|
||||||
|
IsNeedingFakeID bool
|
||||||
|
IsIdFake bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func (u User) String() string {
|
func (u User) String() string {
|
||||||
@ -100,6 +102,27 @@ func ParseHandleFromTweetUrl(tweet_url string) (UserHandle, error) {
|
|||||||
return UserHandle(matches[1]), nil
|
return UserHandle(matches[1]), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func GetUnknownUserWithHandle(handle UserHandle) User {
|
||||||
|
return User{
|
||||||
|
ID: UserID(0), // 2^62 + 1...
|
||||||
|
DisplayName: string(handle),
|
||||||
|
Handle: handle,
|
||||||
|
Bio: "<blank>",
|
||||||
|
FollowersCount: 0,
|
||||||
|
FollowingCount: 0,
|
||||||
|
Location: "<blank>",
|
||||||
|
Website:"<blank>",
|
||||||
|
JoinDate: time.Unix(0, 0),
|
||||||
|
IsVerified: false,
|
||||||
|
IsPrivate: true,
|
||||||
|
ProfileImageUrl: DEFAULT_PROFILE_IMAGE_URL,
|
||||||
|
ProfileImageLocalPath: path.Base(DEFAULT_PROFILE_IMAGE_URL),
|
||||||
|
BannerImageUrl: "",
|
||||||
|
BannerImageLocalPath: "",
|
||||||
|
IsNeedingFakeID: true,
|
||||||
|
IsIdFake: true,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Turn an APIUser, as returned from the scraper, into a properly structured User object
|
// Turn an APIUser, as returned from the scraper, into a properly structured User object
|
||||||
func ParseSingleUser(apiUser APIUser) (ret User, err error) {
|
func ParseSingleUser(apiUser APIUser) (ret User, err error) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user