From 8892785aad21f8a1de5cc47039d8f89dc7ed7b84 Mon Sep 17 00:00:00 2001 From: Alessio Date: Wed, 2 Feb 2022 14:49:46 -0800 Subject: [PATCH] Add fetching of tombstoned users for APIv2 --- scraper/api_types_v2.go | 6 +++++- scraper/api_types_v2_test.go | 36 ++++++++++++++++++++++++++++++++++++ scraper/tweet.go | 2 ++ scraper/user_feed.go | 29 ++++++++++++++++++++++++++++- 4 files changed, 71 insertions(+), 2 deletions(-) diff --git a/scraper/api_types_v2.go b/scraper/api_types_v2.go index 5bbf7b8..c3ece34 100644 --- a/scraper/api_types_v2.go +++ b/scraper/api_types_v2.go @@ -122,7 +122,11 @@ func (api_result APIV2Result) ToTweetTrove() TweetTrove { // Quoted tweets might be tombstones! if quoted_api_result.Result.Tombstone != nil { tombstoned_tweet := "ed_api_result.Result.Legacy.APITweet - tombstoned_tweet.TombstoneText = quoted_api_result.Result.Tombstone.Text.Text + var ok bool + tombstoned_tweet.TombstoneText, ok = tombstone_types[quoted_api_result.Result.Tombstone.Text.Text] + if !ok { + panic(fmt.Sprintf("Unknown tombstone text: %s", quoted_api_result.Result.Tombstone.Text.Text)) + } tombstoned_tweet.ID = int64(int_or_panic(api_result.Result.Legacy.APITweet.QuotedStatusIDStr)) handle, err := ParseHandleFromTweetUrl(api_result.Result.Legacy.APITweet.QuotedStatusPermalink.ExpandedURL) if err != nil { diff --git a/scraper/api_types_v2_test.go b/scraper/api_types_v2_test.go index a4d0634..ad9ea8b 100644 --- a/scraper/api_types_v2_test.go +++ b/scraper/api_types_v2_test.go @@ -268,6 +268,42 @@ func TestAPIV2ParseRetweetedQuoteTweet(t *testing.T) { } +/** + * Parse tweet with quoted tombstone + */ +func TestAPIV2ParseTweetWithQuotedTombstone(t *testing.T) { + assert := assert.New(t) + data, err := ioutil.ReadFile("test_responses/api_v2/tweet_with_quoted_tombstone.json") + if err != nil { + panic(err) + } + + var tweet_result APIV2Result + err = json.Unmarshal(data, &tweet_result) + assert.NoError(err) + + trove := tweet_result.ToTweetTrove() + + assert.Equal(1, len(trove.Users)) + user, ok := trove.Users[44067298] + assert.True(ok) + assert.Equal(UserHandle("michaelmalice"), user.Handle) + + assert.Equal(1, len(trove.TombstoneUsers)) + assert.Contains(trove.TombstoneUsers, UserHandle("coltnkat")) + + assert.Equal(2, len(trove.Tweets)) + tombstoned_tweet, ok := trove.Tweets[1485774025347371008] + assert.True(ok) + assert.Equal(TweetID(1485774025347371008), tombstoned_tweet.ID) + assert.Equal("no longer exists", tombstoned_tweet.TombstoneType) + assert.True (tombstoned_tweet.IsStub) + assert.Equal(UserHandle("coltnkat"), tombstoned_tweet.UserHandle) + + assert.Equal(0, len(trove.Retweets)) +} + + /** * Parse a tweet with a link */ diff --git a/scraper/tweet.go b/scraper/tweet.go index b4c7a59..3d7a07b 100644 --- a/scraper/tweet.go +++ b/scraper/tweet.go @@ -15,6 +15,7 @@ type TweetID int64 type Tweet struct { ID TweetID UserID UserID + UserHandle UserHandle // For processing tombstones User *User Text string PostedAt time.Time @@ -85,6 +86,7 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) { ret.ID = TweetID(apiTweet.ID) ret.UserID = UserID(apiTweet.UserID) + ret.UserHandle = UserHandle(apiTweet.UserHandle) ret.Text = apiTweet.FullText // Process "posted-at" date and time diff --git a/scraper/user_feed.go b/scraper/user_feed.go index 078681f..cea79ec 100644 --- a/scraper/user_feed.go +++ b/scraper/user_feed.go @@ -48,5 +48,32 @@ func GetUserFeedGraphqlFor(user_id UserID, min_tweets int) (trove TweetTrove, er } - return api_response.ToTweetTrove() + trove, err = api_response.ToTweetTrove() + if err != nil { + panic(err) + } + + // DUPE tombstone-user-processing + fmt.Println("------------") + for _, handle := range trove.TombstoneUsers { + fmt.Println(handle) + + user, err := GetUser(handle) + if err != nil { + panic(err) + } + fmt.Println(user) + + if user.ID == 0 { + panic(fmt.Sprintf("UserID == 0 (@%s)", handle)) + } + + trove.Users[user.ID] = user + } + // Quoted tombstones need their user_id filled out from the tombstoned_users list + trove.FillMissingUserIDs() + + // <<<<<<< DUPE tombstone-user-processing + + return trove, nil }