From df8093bbd9c540025d37eb6e5d8769d0cb39807c Mon Sep 17 00:00:00 2001 From: Alessio Date: Tue, 26 Dec 2023 19:52:37 -0600 Subject: [PATCH] Add follows --- cmd/tests.sh | 9 +++ cmd/twitter/main.go | 34 +++++++++++ doc/graphql_processing.py | 24 ++++++++ pkg/persistence/follow_queries.go | 58 +++++++++++++++++++ pkg/persistence/follow_queries_test.go | 55 ++++++++++++++++++ pkg/persistence/schema.sql | 12 ++++ pkg/scraper/api_graphql_utils.go | 2 + pkg/scraper/api_types_v2.go | 17 +++++- .../test_responses/lists/followees.json | 1 + sample_data/seed_data.sql | 14 +++++ 10 files changed, 224 insertions(+), 2 deletions(-) create mode 100644 doc/graphql_processing.py create mode 100644 pkg/persistence/follow_queries.go create mode 100644 pkg/persistence/follow_queries_test.go create mode 100644 pkg/scraper/test_responses/lists/followees.json diff --git a/cmd/tests.sh b/cmd/tests.sh index 342c70d..3b536e1 100755 --- a/cmd/tests.sh +++ b/cmd/tests.sh @@ -380,6 +380,15 @@ test $(sqlite3 twitter.db "select count(*) from chat_messages where chat_room_id # Test fetch a DM conversation tw fetch_dm "1458284524761075714-1488963321701171204" + +# Test followers and followees +test $(sqlite3 twitter.db "select count(*) from follows") = "0" +tw get_followees Offline_Twatter +test $(sqlite3 twitter.db "select count(*) from follows where follower_id = 1488963321701171204") = "4" +test $(sqlite3 twitter.db "select count(*) from follows where followee_id = 1488963321701171204") = "0" +tw get_followers Offline_Twatter +test $(sqlite3 twitter.db "select count(*) from follows where follower_id = 1488963321701171204 and followee_id = 759251") = "1" + # TODO: Maybe this file should be broken up into multiple test scripts echo -e "\033[32mAll tests passed. Finished successfully.\033[0m" diff --git a/cmd/twitter/main.go b/cmd/twitter/main.go index a753490..616058f 100644 --- a/cmd/twitter/main.go +++ b/cmd/twitter/main.go @@ -135,6 +135,10 @@ func main() { fetch_user_feed(target, 999999999) case "get_user_likes": get_user_likes(target, *how_many) + case "get_followers": + get_followers(target, *how_many) + case "get_followees": + get_followees(target, *how_many) case "fetch_timeline": fetch_timeline(false) case "fetch_timeline_for_you": @@ -310,6 +314,36 @@ func get_user_likes(handle string, how_many int) { happy_exit(fmt.Sprintf("Saved %d tweets, %d retweets and %d users", len(trove.Tweets), len(trove.Retweets), len(trove.Users))) } +func get_followees(handle string, how_many int) { + user, err := profile.GetUserByHandle(scraper.UserHandle(handle)) + if err != nil { + die(fmt.Sprintf("Error getting user: %s\n %s", handle, err.Error()), false, -1) + } + + trove, err := scraper.GetFollowees(user.ID, how_many) + if err != nil { + die(fmt.Sprintf("Error getting followees: %s\n %s", handle, err.Error()), false, -2) + } + profile.SaveTweetTrove(trove, true) + profile.SaveAsFolloweesList(user.ID, trove) + + happy_exit(fmt.Sprintf("Saved %d followees", len(trove.Users))) +} +func get_followers(handle string, how_many int) { + user, err := profile.GetUserByHandle(scraper.UserHandle(handle)) + if err != nil { + die(fmt.Sprintf("Error getting user: %s\n %s", handle, err.Error()), false, -1) + } + trove, err := scraper.GetFollowers(user.ID, how_many) + if err != nil { + die(fmt.Sprintf("Error getting followees: %s\n %s", handle, err.Error()), false, -2) + } + profile.SaveTweetTrove(trove, true) + profile.SaveAsFollowersList(user.ID, trove) + + happy_exit(fmt.Sprintf("Saved %d followers", len(trove.Users))) +} + func fetch_timeline(is_for_you bool) { trove, err := scraper.GetHomeTimeline("", is_for_you) if err != nil { diff --git a/doc/graphql_processing.py b/doc/graphql_processing.py new file mode 100644 index 0000000..5f0162f --- /dev/null +++ b/doc/graphql_processing.py @@ -0,0 +1,24 @@ +import urllib +import urllib.parse as parse +import json + +x = "https://twitter.com/i/api/graphql/3_7xfjmh897x8h_n6QBqTA/Followers?variables=%7B%22userId%22%3A%221488963321701171204%22%2C%22count%22%3A20%2C%22includePromotedContent%22%3Afalse%7D&features=%7B%22responsive_web_graphql_exclude_directive_enabled%22%3Atrue%2C%22verified_phone_label_enabled%22%3Afalse%2C%22creator_subscriptions_tweet_preview_api_enabled%22%3Atrue%2C%22responsive_web_graphql_timeline_navigation_enabled%22%3Atrue%2C%22responsive_web_graphql_skip_user_profile_image_extensions_enabled%22%3Afalse%2C%22c9s_tweet_anatomy_moderator_badge_enabled%22%3Atrue%2C%22tweetypie_unmention_optimization_enabled%22%3Atrue%2C%22responsive_web_edit_tweet_api_enabled%22%3Atrue%2C%22graphql_is_translatable_rweb_tweet_is_translatable_enabled%22%3Atrue%2C%22view_counts_everywhere_api_enabled%22%3Atrue%2C%22longform_notetweets_consumption_enabled%22%3Atrue%2C%22responsive_web_twitter_article_tweet_consumption_enabled%22%3Afalse%2C%22tweet_awards_web_tipping_enabled%22%3Afalse%2C%22freedom_of_speech_not_reach_fetch_enabled%22%3Atrue%2C%22standardized_nudges_misinfo%22%3Atrue%2C%22tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled%22%3Atrue%2C%22rweb_video_timestamps_enabled%22%3Atrue%2C%22longform_notetweets_rich_text_read_enabled%22%3Atrue%2C%22longform_notetweets_inline_media_enabled%22%3Atrue%2C%22responsive_web_media_download_video_enabled%22%3Afalse%2C%22responsive_web_enhance_cards_enabled%22%3Afalse%7D" +parsed_url = parse.urlparse(x) + +base_url = parsed_url._replace(query="").geturl() + +gql_vars = json.loads(parse.parse_qs(parsed_url.query)["variables"][0]) +gql_feats = json.loads(parse.parse_qs(parsed_url.query)["features"][0]) + +def snake_to_camel(s): + return "".join(x.capitalize() for x in s.split("_")) + +print("BaseUrl: \"{}\",".format(base_url)) +print("Variables: GraphqlVariables{") +for k, v in gql_vars.items(): + print("\t{}: {},".format(snake_to_camel(k), json.dumps(v))) +print("},") +print("Features: GraphqlFeatures{") +for k, v in gql_feats.items(): + print("\t{}: {},".format(snake_to_camel(k), json.dumps(v))) +print("},") diff --git a/pkg/persistence/follow_queries.go b/pkg/persistence/follow_queries.go new file mode 100644 index 0000000..0cd41cf --- /dev/null +++ b/pkg/persistence/follow_queries.go @@ -0,0 +1,58 @@ +package persistence + +import ( + "fmt" + . "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper" +) + +func (p Profile) SaveFollow(follower_id UserID, followee_id UserID) { + fmt.Printf("Saving %d => %d\n", follower_id, followee_id) + _, err := p.DB.Exec(` + insert into follows (follower_id, followee_id) + values (?, ?) + on conflict do nothing + `, follower_id, followee_id) + if err != nil { + panic(err) + } +} + +func (p Profile) SaveAsFollowersList(followee_id UserID, trove TweetTrove) { + for follower_id := range trove.Users { + p.SaveFollow(follower_id, followee_id) + } +} + +func (p Profile) SaveAsFolloweesList(follower_id UserID, trove TweetTrove) { + for followee_id := range trove.Users { + p.SaveFollow(follower_id, followee_id) + } +} + +// Returns true if the first user follows the second user, false otherwise +func (p Profile) IsXFollowingY(follower_id UserID, followee_id UserID) bool { + rows, err := p.DB.Query(`select 1 from follows where follower_id = ? and followee_id = ?`, follower_id, followee_id) + if err != nil { + panic(err) + } + defer rows.Close() + return rows.Next() // true if there is a row, false otherwise +} + +func (p Profile) GetFollowers(followee_id UserID) []UserID { + var ret []UserID + err := p.DB.Select(&ret, `select follower_id from follows where followee_id = ?`, followee_id) + if err != nil { + panic(err) + } + return ret +} + +func (p Profile) GetFollowees(follower_id UserID) []UserID { + var ret []UserID + err := p.DB.Select(&ret, `select followee_id from follows where follower_id = ?`, follower_id) + if err != nil { + panic(err) + } + return ret +} diff --git a/pkg/persistence/follow_queries_test.go b/pkg/persistence/follow_queries_test.go new file mode 100644 index 0000000..dbadea4 --- /dev/null +++ b/pkg/persistence/follow_queries_test.go @@ -0,0 +1,55 @@ +package persistence_test + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "gitlab.com/offline-twitter/twitter_offline_engine/pkg/persistence" + . "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper" +) + +func TestSaveAndLoadFollows(t *testing.T) { + require := require.New(t) + assert := assert.New(t) + + profile, err := persistence.LoadProfile("../../sample_data/profile") + require.NoError(err) + + follower := create_dummy_user() + require.NoError(profile.SaveUser(&follower)) + + followee_ids := []UserID{ + 1427250806378672134, + 1304281147074064385, + 887434912529338375, + 836779281049014272, + 1032468021485293568, + } + trove := NewTweetTrove() + for _, id := range followee_ids { + trove.Users[id] = User{} + } + + // Save and reload it + profile.SaveAsFolloweesList(follower.ID, trove) + new_followee_ids := profile.GetFollowees(follower.ID) + + assert.Len(new_followee_ids, len(followee_ids)) + for _, id := range new_followee_ids { + _, is_ok := trove.Users[id] + assert.True(is_ok) + } +} + +func TestIsFollowing(t *testing.T) { + require := require.New(t) + assert := assert.New(t) + + profile, err := persistence.LoadProfile("../../sample_data/profile") + require.NoError(err) + + assert.True(profile.IsXFollowingY(UserID(1178839081222115328), UserID(1488963321701171204))) + assert.False(profile.IsXFollowingY(UserID(1488963321701171204), UserID(1178839081222115328))) +} diff --git a/pkg/persistence/schema.sql b/pkg/persistence/schema.sql index 08939a0..82be948 100644 --- a/pkg/persistence/schema.sql +++ b/pkg/persistence/schema.sql @@ -203,6 +203,18 @@ create table likes(rowid integer primary key, create index if not exists index_likes_user_id on likes (user_id); create index if not exists index_likes_tweet_id on likes (tweet_id); + +create table follows(rowid integer primary key, + follower_id integer not null, + followee_id integer not null, + unique(follower_id, followee_id), + foreign key(follower_id) references users(id) + foreign key(followee_id) references users(id) +); +create index if not exists index_follows_followee_id on follows (followee_id); +create index if not exists index_follows_follower_id on follows (follower_id); + + create table fake_user_sequence(latest_fake_id integer not null); insert into fake_user_sequence values(0x4000000000000000); diff --git a/pkg/scraper/api_graphql_utils.go b/pkg/scraper/api_graphql_utils.go index 7c072c7..3c47067 100644 --- a/pkg/scraper/api_graphql_utils.go +++ b/pkg/scraper/api_graphql_utils.go @@ -68,6 +68,8 @@ type GraphqlFeatures struct { HiddenProfileSubscriptionsEnabled bool `json:"hidden_profile_subscriptions_enabled"` HighlightsTweetsTabUIEnabled bool `json:"highlights_tweets_tab_ui_enabled"` SubscriptionsVerificationInfoIsIdentityVerifiedEnabled bool `json:"subscriptions_verification_info_is_identity_verified_enabled"` //nolint:lll // I didn't choose this field name + C9sTweetAnatomyModeratorBadgeEnabled bool `json:"c9s_tweet_anatomy_moderator_badge_enabled"` + RwebVideoTimestampsEnabled bool `json:"rweb_video_timestamps_enabled"` // Spaces Spaces2022H2Clipping bool `json:"spaces_2022_h2_clipping,omitempty"` diff --git a/pkg/scraper/api_types_v2.go b/pkg/scraper/api_types_v2.go index 4c82559..bd02493 100644 --- a/pkg/scraper/api_types_v2.go +++ b/pkg/scraper/api_types_v2.go @@ -406,6 +406,7 @@ func (api_v2_tweet APIV2Tweet) ToTweetTrove() (TweetTrove, error) { type ItemContent struct { ItemType string `json:"itemType"` TweetResults APIV2Result `json:"tweet_results"` + APIV2UserResult // Cursors (conversation view format) CursorType string `json:"cursorType"` @@ -548,7 +549,13 @@ func (e APIV2Entry) ToTweetTrove() TweetTrove { } ret.Tweets[parsed_tombstone_tweet.ID] = parsed_tombstone_tweet } else if err != nil { - panic(err) + if e.Content.ItemContent.APIV2UserResult.UserResults.Result.ID != 0 { + user := e.Content.ItemContent.APIV2UserResult.ToUser() + ret = NewTweetTrove() + ret.Users[user.ID] = user + } else { + panic(err) + } } return ret } @@ -793,7 +800,13 @@ func (r APIV2Response) ToTweetTroveAsLikes() (TweetTrove, error) { // Generate a "Like" from the entry tweet, is_ok := ret.Tweets[TweetID(entry.Content.ItemContent.TweetResults.Result._Result.ID)] if !is_ok { - panic(entry) + // For TweetWithVisibilityResults + tweet, is_ok = ret.Tweets[TweetID(entry.Content.ItemContent.TweetResults.Result.Tweet.ID)] + if !is_ok { + log.Warnf("ID: %d", entry.Content.ItemContent.TweetResults.Result._Result.ID) + log.Warnf("Entry JSON: %s", entry.OriginalJSON) + panic(ret.Tweets) + } } ret.Likes[LikeSortID(entry.SortIndex)] = Like{ SortID: LikeSortID(entry.SortIndex), diff --git a/pkg/scraper/test_responses/lists/followees.json b/pkg/scraper/test_responses/lists/followees.json new file mode 100644 index 0000000..fb6c256 --- /dev/null +++ b/pkg/scraper/test_responses/lists/followees.json @@ -0,0 +1 @@ +{"data":{"user":{"result":{"__typename":"User","timeline":{"timeline":{"instructions":[{"type":"TimelineClearCache"},{"type":"TimelineTerminateTimeline","direction":"Top"},{"type":"TimelineTerminateTimeline","direction":"Bottom"},{"type":"TimelineAddEntries","entries":[{"entryId":"user-1458284524761075714","sortIndex":"1739810405452087296","content":{"entryType":"TimelineTimelineItem","__typename":"TimelineTimelineItem","itemContent":{"itemType":"TimelineUser","__typename":"TimelineUser","user_results":{"result":{"__typename":"User","id":"VXNlcjoxNDU4Mjg0NTI0NzYxMDc1NzE0","rest_id":"1458284524761075714","affiliates_highlighted_label":{},"has_graduated_access":true,"is_blue_verified":false,"profile_image_shape":"Circle","legacy":{"can_dm":true,"can_media_tag":false,"created_at":"Wed Nov 10 04:05:16 +0000 2021","default_profile":true,"default_profile_image":false,"description":"~wispem-wantex\n\nCurrently looking for work (DMs open)","entities":{"description":{"urls":[]},"url":{"urls":[{"display_url":"offline-twitter.com","expanded_url":"https://offline-twitter.com/","url":"https://t.co/7nDTwkz7Hh","indices":[0,23]}]}},"fast_followers_count":0,"favourites_count":18359,"followers_count":521,"friends_count":151,"has_custom_timelines":false,"is_translator":false,"listed_count":11,"location":"on my computer","media_count":435,"name":"wispem-wantex","needs_phone_verification":false,"normal_followers_count":521,"pinned_tweet_ids_str":["1723014479200547105"],"possibly_sensitive":false,"profile_image_url_https":"https://pbs.twimg.com/profile_images/1724933823144620032/sYTzWQy2_normal.jpg","profile_interstitial_type":"","screen_name":"wispem_wantex","statuses_count":6881,"translator_type":"none","url":"https://t.co/7nDTwkz7Hh","verified":false,"want_retweets":false,"withheld_in_countries":[]}}},"userDisplayType":"User"},"clientEventInfo":{"component":"FollowingSgs","element":"user"}}},{"entryId":"user-1349149096909668363","sortIndex":"1739810405452087295","content":{"entryType":"TimelineTimelineItem","__typename":"TimelineTimelineItem","itemContent":{"itemType":"TimelineUser","__typename":"TimelineUser","user_results":{"result":{"__typename":"User","id":"VXNlcjoxMzQ5MTQ5MDk2OTA5NjY4MzYz","rest_id":"1349149096909668363","affiliates_highlighted_label":{},"has_graduated_access":true,"is_blue_verified":true,"profile_image_shape":"Circle","legacy":{"can_dm":false,"can_media_tag":false,"created_at":"Wed Jan 13 00:37:08 +0000 2021","default_profile":true,"default_profile_image":false,"description":"46th President of the United States, husband to @FLOTUS, proud dad & pop. Tweets may be archived: https://t.co/HDhBZBkKpU Text me: (302) 404-0880","entities":{"description":{"urls":[{"display_url":"whitehouse.gov/privacy","expanded_url":"http://whitehouse.gov/privacy","url":"https://t.co/HDhBZBkKpU","indices":[98,121]}]},"url":{"urls":[{"display_url":"wh.gov/resources","expanded_url":"http://wh.gov/resources","url":"https://t.co/GMISXhI3kd","indices":[0,23]}]}},"fast_followers_count":0,"favourites_count":17,"followers_count":33782394,"friends_count":5,"has_custom_timelines":true,"is_translator":false,"listed_count":26440,"location":"","media_count":3072,"name":"President Biden","normal_followers_count":33782394,"pinned_tweet_ids_str":[],"possibly_sensitive":false,"profile_banner_url":"https://pbs.twimg.com/profile_banners/1349149096909668363/1701459164","profile_image_url_https":"https://pbs.twimg.com/profile_images/1380530524779859970/TfwVAbyX_normal.jpg","profile_interstitial_type":"","screen_name":"POTUS","statuses_count":8102,"translator_type":"none","url":"https://t.co/GMISXhI3kd","verified":false,"verified_type":"Government","want_retweets":false,"withheld_in_countries":[]}}},"userDisplayType":"User"},"clientEventInfo":{"component":"FollowingSgs","element":"user"}}},{"entryId":"user-759251","sortIndex":"1739810405452087294","content":{"entryType":"TimelineTimelineItem","__typename":"TimelineTimelineItem","itemContent":{"itemType":"TimelineUser","__typename":"TimelineUser","user_results":{"result":{"__typename":"User","id":"VXNlcjo3NTkyNTE=","rest_id":"759251","affiliates_highlighted_label":{},"has_graduated_access":true,"is_blue_verified":true,"profile_image_shape":"Square","legacy":{"can_dm":false,"can_media_tag":false,"created_at":"Fri Feb 09 00:35:02 +0000 2007","default_profile":false,"default_profile_image":false,"description":"It’s our job to #GoThere & tell the most difficult stories. For breaking news, follow @CNNBRK and download our app https://t.co/ceNBoNi8y6","entities":{"description":{"urls":[{"display_url":"cnn.com/apps","expanded_url":"http://cnn.com/apps","url":"https://t.co/ceNBoNi8y6","indices":[115,138]}]},"url":{"urls":[{"display_url":"cnn.com","expanded_url":"http://www.cnn.com","url":"https://t.co/imGp4Ieixi","indices":[0,23]}]}},"fast_followers_count":0,"favourites_count":1367,"followers_count":62070240,"friends_count":1084,"has_custom_timelines":true,"is_translator":false,"listed_count":158487,"location":"","media_count":190288,"name":"CNN","normal_followers_count":62070240,"pinned_tweet_ids_str":[],"possibly_sensitive":false,"profile_banner_url":"https://pbs.twimg.com/profile_banners/759251/1689257013","profile_image_url_https":"https://pbs.twimg.com/profile_images/1669780523911110656/K9gaaM1g_normal.jpg","profile_interstitial_type":"","screen_name":"CNN","statuses_count":420907,"translator_type":"regular","url":"https://t.co/imGp4Ieixi","verified":false,"verified_type":"Business","want_retweets":false,"withheld_in_countries":[]}}},"userDisplayType":"User"},"clientEventInfo":{"component":"FollowingSgs","element":"user"}}},{"entryId":"user-26257166","sortIndex":"1739810405452087293","content":{"entryType":"TimelineTimelineItem","__typename":"TimelineTimelineItem","itemContent":{"itemType":"TimelineUser","__typename":"TimelineUser","user_results":{"result":{"__typename":"User","id":"VXNlcjoyNjI1NzE2Ng==","rest_id":"26257166","affiliates_highlighted_label":{"label":{"url":{"url":"https://twitter.com/espn","urlType":"DeepLink"},"badge":{"url":"https://pbs.twimg.com/profile_images/1170690523201527808/FriNRiir_bigger.png"},"description":"ESPN","userLabelType":"BusinessLabel","userLabelDisplayType":"Badge"}},"has_graduated_access":true,"is_blue_verified":true,"profile_image_shape":"Square","legacy":{"can_dm":false,"can_media_tag":false,"created_at":"Tue Mar 24 15:28:02 +0000 2009","default_profile":false,"default_profile_image":false,"description":"Download the ESPN App ⬇️","entities":{"description":{"urls":[]},"url":{"urls":[{"display_url":"espn.com/espn/apps/espn","expanded_url":"http://www.espn.com/espn/apps/espn","url":"https://t.co/e5h1o3ZZfJ","indices":[0,23]}]}},"fast_followers_count":0,"favourites_count":2564,"followers_count":42542085,"friends_count":685,"has_custom_timelines":true,"is_translator":false,"listed_count":45061,"location":"","media_count":67923,"name":"SportsCenter","normal_followers_count":42542085,"pinned_tweet_ids_str":[],"possibly_sensitive":false,"profile_banner_url":"https://pbs.twimg.com/profile_banners/26257166/1688578885","profile_image_url_https":"https://pbs.twimg.com/profile_images/1604943357750419465/wk5BAmKG_normal.png","profile_interstitial_type":"","screen_name":"SportsCenter","statuses_count":144664,"translator_type":"regular","url":"https://t.co/e5h1o3ZZfJ","verified":false,"verified_type":"Business","want_retweets":false,"withheld_in_countries":[]}}},"userDisplayType":"User"},"clientEventInfo":{"component":"FollowingSgs","element":"user"}}},{"entryId":"cursor-bottom-1739810405452087292","sortIndex":"1739810405452087292","content":{"entryType":"TimelineTimelineCursor","__typename":"TimelineTimelineCursor","value":"0|1739810405452087290","cursorType":"Bottom"}},{"entryId":"cursor-top-1739810405452087297","sortIndex":"1739810405452087297","content":{"entryType":"TimelineTimelineCursor","__typename":"TimelineTimelineCursor","value":"-1|1739810405452087297","cursorType":"Top"}}]}]}}}}}} diff --git a/sample_data/seed_data.sql b/sample_data/seed_data.sql index b4578b4..5707806 100644 --- a/sample_data/seed_data.sql +++ b/sample_data/seed_data.sql @@ -410,6 +410,20 @@ INSERT INTO chat_message_reactions VALUES (3,1665936253834578774,1665936253483614216,1178839081222115328,1686075343331,'🤔'); +create table follows(rowid integer primary key, + follower_id integer not null, + followee_id integer not null, + unique(follower_id, followee_id), + foreign key(follower_id) references users(id) + foreign key(followee_id) references users(id) +); +create index if not exists index_follows_followee_id on follows (followee_id); +create index if not exists index_follows_follower_id on follows (follower_id); +insert into follows values + (1, 1178839081222115328, 1488963321701171204), + (2, 1032468021485293568, 1488963321701171204); + + create table fake_user_sequence(latest_fake_id integer not null); insert into fake_user_sequence values(0x4000000000000000);