diff --git a/pkg/scraper/api_types_v2.go b/pkg/scraper/api_types_v2.go index 0e9fd6b..99b7547 100644 --- a/pkg/scraper/api_types_v2.go +++ b/pkg/scraper/api_types_v2.go @@ -557,6 +557,8 @@ func (e APIV2Entry) ToTweetTrove() TweetTrove { panic(err) } + // We don't have any user info yet. We may be able to reconstruct some later with reply-joining + // But for now, it's just "Unknown User" fake_user := GetUnknownUser() ret.Users[fake_user.ID] = fake_user parsed_tombstone_tweet.UserID = fake_user.ID @@ -784,8 +786,10 @@ func (api_response APIV2Response) ToTweetTrove() (TweetTrove, error) { if replied_tweet.UserID == 0 || replied_tweet.UserID == GetUnknownUser().ID { replied_tweet.UserID = tweet.InReplyToUserID if replied_tweet.UserID == 0 || replied_tweet.UserID == GetUnknownUser().ID { + // We know absolutely nothing about them; can't determine a UserID or handle + // Create a dummy user just so the Tweet will have a non-0 user ID. fake_user := GetUnknownUser() - ret.Users[fake_user.ID] = fake_user + ret.Users[fake_user.ID] = fake_user // Make sure the dummy user appears in the Trove replied_tweet.UserID = fake_user.ID } } // replied_tweet.UserID should now be a real UserID @@ -797,6 +801,11 @@ func (api_response APIV2Response) ToTweetTrove() (TweetTrove, error) { if existing_user.Handle == "" { existing_user.Handle = tweet.InReplyToUserHandle } + // If the replied tweet is a "user was suspended" tombstone, it can be inferred that the + // user must be suspended + if replied_tweet.TombstoneType == "suspended" { + existing_user.IsBanned = true + } ret.Users[replied_tweet.UserID] = existing_user ret.TombstoneUsers = append(ret.TombstoneUsers, existing_user.Handle) diff --git a/pkg/scraper/api_types_v2_test.go b/pkg/scraper/api_types_v2_test.go index 7d752a0..09e90e0 100644 --- a/pkg/scraper/api_types_v2_test.go +++ b/pkg/scraper/api_types_v2_test.go @@ -1013,3 +1013,36 @@ func TestNoFailOnComposerEntryInRegularThread(t *testing.T) { assert.Len(trove.Tweets, 3) } + +// Test a thread with a banned user +func TestParseTweetThreadWithBannedUser(t *testing.T) { + assert := assert.New(t) + require := require.New(t) + data, err := os.ReadFile("test_responses/api_v2/tweet_from_banned_user.json") + require.NoError(err) + var api_response APIV2Response + err = json.Unmarshal(data, &api_response) + require.NoError(err) + + trove, err := api_response.ToTweetTrove() + require.NoError(err) + + assert.Len(trove.Tweets, 6) + banned_tweet1, is_ok := trove.Tweets[1709579269247234284] + assert.True(is_ok) + assert.True(banned_tweet1.IsStub) + assert.Equal(banned_tweet1.UserID, UserID(1595500307374829568)) + assert.Equal(banned_tweet1.TombstoneType, "suspended") + + banned_tweet2, is_ok := trove.Tweets[1709580398026805736] + assert.True(is_ok) + assert.True(banned_tweet2.IsStub) + assert.Equal(banned_tweet2.UserID, UserID(1595500307374829568)) + assert.Equal(banned_tweet2.TombstoneType, "suspended") + + // assert.Len(trove.Users, 3) + banned_user, is_ok := trove.Users[1595500307374829568] + assert.True(is_ok) + assert.Equal(banned_user.Handle, UserHandle("spandrell3")) + assert.True(banned_user.IsBanned) +} diff --git a/pkg/scraper/test_responses/api_v2/tweet_from_banned_user.json b/pkg/scraper/test_responses/api_v2/tweet_from_banned_user.json new file mode 100644 index 0000000..d1a9d40 --- /dev/null +++ b/pkg/scraper/test_responses/api_v2/tweet_from_banned_user.json @@ -0,0 +1 @@ +{"data":{"threaded_conversation_with_injections_v2":{"instructions":[{"type":"TimelineAddEntries","entries":[{"entryId":"tweet-1709547389722210722","sortIndex":"7513824647132565085","content":{"entryType":"TimelineTimelineItem","__typename":"TimelineTimelineItem","itemContent":{"itemType":"TimelineTweet","__typename":"TimelineTweet","tweet_results":{"result":{"__typename":"Tweet","rest_id":"1709547389722210722","has_birdwatch_notes":false,"core":{"user_results":{"result":{"__typename":"User","id":"VXNlcjoxMDYzODA0MjYwMzY4ODA1ODkw","rest_id":"1063804260368805890","affiliates_highlighted_label":{},"has_graduated_access":true,"is_blue_verified":true,"profile_image_shape":"Circle","legacy":{"can_dm":true,"can_media_tag":true,"created_at":"Sat Nov 17 14:41:15 +0000 2018","default_profile":true,"default_profile_image":false,"description":"WRITER/DIRECTOR/Angel (Network)/Angel(VC)/ SURGEON/CHEMIST __GARDENER IN CHIEF __HYPERTHYMIC CYBORG __LOVE MAXI __SPECIAL OPERATIONS GROUP @Blimp_DAO","entities":{"description":{"urls":[]},"url":{"urls":[{"display_url":"leafyfang.substack.com","expanded_url":"http://leafyfang.substack.com","url":"https://t.co/QpJvATk1yR","indices":[0,23]}]}},"fast_followers_count":0,"favourites_count":5665,"followers_count":1626,"friends_count":971,"has_custom_timelines":true,"is_translator":false,"listed_count":38,"location":"Australia","media_count":165,"name":"Dr. Oskar Sarkon","normal_followers_count":1626,"pinned_tweet_ids_str":["1824232777769984099"],"possibly_sensitive":false,"profile_banner_url":"https://pbs.twimg.com/profile_banners/1063804260368805890/1598354942","profile_image_url_https":"https://pbs.twimg.com/profile_images/1559231638822760450/TBfALnv0_normal.jpg","profile_interstitial_type":"","screen_name":"hastuc_dibtux","statuses_count":2518,"translator_type":"none","url":"https://t.co/QpJvATk1yR","verified":false,"want_retweets":false,"withheld_in_countries":[]}}}},"unmention_data":{},"edit_control":{"edit_tweet_ids":["1709547389722210722"],"editable_until_msecs":"1696426434000","is_edit_eligible":true,"edits_remaining":"5"},"is_translatable":false,"views":{"count":"818","state":"EnabledWithCount"},"source":"Twitter for iPhone","quoted_status_result":{"result":{"__typename":"Tweet","rest_id":"1709411596064444880","has_birdwatch_notes":false,"core":{"user_results":{"result":{"__typename":"User","id":"VXNlcjoxNzA5MzkzNDY3ODcxNDYxMzc2","rest_id":"1709393467871461376","affiliates_highlighted_label":{},"has_graduated_access":true,"is_blue_verified":true,"profile_image_shape":"Circle","legacy":{"can_dm":false,"can_media_tag":true,"created_at":"Wed Oct 04 02:23:01 +0000 2023","default_profile":true,"default_profile_image":false,"description":"Anglotheosophical Oblique Escalation\nzerophilosophy@substack.com","entities":{"description":{"urls":[]}},"fast_followers_count":0,"favourites_count":49152,"followers_count":24670,"friends_count":662,"has_custom_timelines":false,"is_translator":false,"listed_count":137,"location":"","media_count":60,"name":"Xenocosmography","normal_followers_count":24670,"pinned_tweet_ids_str":[],"possibly_sensitive":false,"profile_image_url_https":"https://pbs.twimg.com/profile_images/1709394184837451776/4xkaiTS7_normal.jpg","profile_interstitial_type":"","screen_name":"xenocosmography","statuses_count":8776,"translator_type":"none","verified":false,"want_retweets":false,"withheld_in_countries":[]}}}},"unmention_data":{},"edit_control":{"edit_tweet_ids":["1709411596064444880"],"editable_until_msecs":"1696394058000","is_edit_eligible":false,"edits_remaining":"5"},"is_translatable":false,"views":{"count":"1920","state":"EnabledWithCount"},"source":"Twitter Web App","legacy":{"bookmark_count":0,"bookmarked":false,"created_at":"Wed Oct 04 03:34:18 +0000 2023","conversation_id_str":"1709411596064444880","display_text_range":[0,45],"entities":{"user_mentions":[{"id_str":"1852899224","name":"Outsideness","screen_name":"Outsideness","indices":[0,12]}],"urls":[],"hashtags":[],"symbols":[]},"favorite_count":19,"favorited":false,"full_text":"@Outsideness is gone for good (or something).","in_reply_to_screen_name":"Outsideness","in_reply_to_user_id_str":"1852899224","is_quote_status":false,"lang":"en","quote_count":1,"reply_count":2,"retweet_count":1,"retweeted":false,"user_id_str":"1709393467871461376","id_str":"1709411596064444880"}}},"legacy":{"bookmark_count":0,"bookmarked":false,"created_at":"Wed Oct 04 12:33:54 +0000 2023","conversation_id_str":"1709547389722210722","display_text_range":[0,16],"entities":{"user_mentions":[],"urls":[],"hashtags":[],"symbols":[]},"favorite_count":13,"favorited":false,"full_text":"Urbit fixes this","is_quote_status":true,"lang":"en","quote_count":0,"quoted_status_id_str":"1709411596064444880","quoted_status_permalink":{"url":"https://t.co/EJ4vIYreCT","expanded":"https://twitter.com/xenocosmography/status/1709411596064444880","display":"x.com/xenocosmograph…"},"reply_count":1,"retweet_count":0,"retweeted":false,"user_id_str":"1063804260368805890","id_str":"1709547389722210722"},"quick_promote_eligibility":{"eligibility":"IneligibleNotProfessional"}}},"tweetDisplayType":"Tweet","hasModeratedReplies":false}}},{"entryId":"tweet-1709579269247234284","sortIndex":"7513792767607541523","content":{"entryType":"TimelineTimelineItem","__typename":"TimelineTimelineItem","itemContent":{"itemType":"TimelineTweet","__typename":"TimelineTweet","tweet_results":{"result":{"__typename":"TweetTombstone","tombstone":{"__typename":"TextTombstone","text":{"rtl":false,"text":"This Post is from a suspended account. Learn more","entities":[{"fromIndex":39,"toIndex":49,"ref":{"type":"TimelineUrl","url":"https://help.twitter.com/rules-and-policies/notices-on-twitter","urlType":"ExternalUrl"}}]}}}},"tweetDisplayType":"Tweet"}}},{"entryId":"tweet-1709579508469076051","sortIndex":"7513792528385699756","content":{"entryType":"TimelineTimelineItem","__typename":"TimelineTimelineItem","itemContent":{"itemType":"TimelineTweet","__typename":"TimelineTweet","tweet_results":{"result":{"__typename":"Tweet","rest_id":"1709579508469076051","has_birdwatch_notes":false,"core":{"user_results":{"result":{"__typename":"User","id":"VXNlcjoxMDYzODA0MjYwMzY4ODA1ODkw","rest_id":"1063804260368805890","affiliates_highlighted_label":{},"has_graduated_access":true,"is_blue_verified":true,"profile_image_shape":"Circle","legacy":{"can_dm":true,"can_media_tag":true,"created_at":"Sat Nov 17 14:41:15 +0000 2018","default_profile":true,"default_profile_image":false,"description":"WRITER/DIRECTOR/Angel (Network)/Angel(VC)/ SURGEON/CHEMIST __GARDENER IN CHIEF __HYPERTHYMIC CYBORG __LOVE MAXI __SPECIAL OPERATIONS GROUP @Blimp_DAO","entities":{"description":{"urls":[]},"url":{"urls":[{"display_url":"leafyfang.substack.com","expanded_url":"http://leafyfang.substack.com","url":"https://t.co/QpJvATk1yR","indices":[0,23]}]}},"fast_followers_count":0,"favourites_count":5665,"followers_count":1626,"friends_count":971,"has_custom_timelines":true,"is_translator":false,"listed_count":38,"location":"Australia","media_count":165,"name":"Dr. Oskar Sarkon","normal_followers_count":1626,"pinned_tweet_ids_str":["1824232777769984099"],"possibly_sensitive":false,"profile_banner_url":"https://pbs.twimg.com/profile_banners/1063804260368805890/1598354942","profile_image_url_https":"https://pbs.twimg.com/profile_images/1559231638822760450/TBfALnv0_normal.jpg","profile_interstitial_type":"","screen_name":"hastuc_dibtux","statuses_count":2518,"translator_type":"none","url":"https://t.co/QpJvATk1yR","verified":false,"want_retweets":false,"withheld_in_countries":[]}}}},"unmention_data":{},"edit_control":{"edit_tweet_ids":["1709579508469076051"],"editable_until_msecs":"1696434091000","is_edit_eligible":false,"edits_remaining":"5"},"is_translatable":false,"views":{"count":"106","state":"EnabledWithCount"},"source":"Twitter Web App","legacy":{"bookmark_count":0,"bookmarked":false,"created_at":"Wed Oct 04 14:41:31 +0000 2023","conversation_id_str":"1709547389722210722","display_text_range":[12,121],"entities":{"user_mentions":[{"id_str":"1595500307374829568","name":"Spandrell","screen_name":"spandrell3","indices":[0,11]}],"urls":[],"hashtags":[],"symbols":[]},"favorite_count":1,"favorited":false,"full_text":"@spandrell3 yeah, making user accounts legible as cryptoassets lends itself perfectly to social recovery multisig wallets","in_reply_to_screen_name":"spandrell3","in_reply_to_status_id_str":"1709579269247234284","in_reply_to_user_id_str":"1595500307374829568","is_quote_status":false,"lang":"en","quote_count":0,"reply_count":1,"retweet_count":0,"retweeted":false,"user_id_str":"1063804260368805890","id_str":"1709579508469076051"},"quick_promote_eligibility":{"eligibility":"IneligibleNotProfessional"}}},"tweetDisplayType":"Tweet"}}},{"entryId":"tweet-1709580398026805736","sortIndex":"7513791638827970071","content":{"entryType":"TimelineTimelineItem","__typename":"TimelineTimelineItem","itemContent":{"itemType":"TimelineTweet","__typename":"TimelineTweet","tweet_results":{"result":{"__typename":"TweetTombstone","tombstone":{"__typename":"TextTombstone","text":{"rtl":false,"text":"This Post is from a suspended account. Learn more","entities":[{"fromIndex":39,"toIndex":49,"ref":{"type":"TimelineUrl","url":"https://help.twitter.com/rules-and-policies/notices-on-twitter","urlType":"ExternalUrl"}}]}}}},"tweetDisplayType":"Tweet"}}},{"entryId":"conversationthread-1709580993886068820","sortIndex":"7513791638827970061","content":{"entryType":"TimelineTimelineModule","__typename":"TimelineTimelineModule","items":[{"entryId":"conversationthread-1709580993886068820-tweet-1709580993886068820","item":{"itemContent":{"itemType":"TimelineTweet","__typename":"TimelineTweet","tweet_results":{"result":{"__typename":"Tweet","rest_id":"1709580993886068820","has_birdwatch_notes":false,"core":{"user_results":{"result":{"__typename":"User","id":"VXNlcjoxMDYzODA0MjYwMzY4ODA1ODkw","rest_id":"1063804260368805890","affiliates_highlighted_label":{},"has_graduated_access":true,"is_blue_verified":true,"profile_image_shape":"Circle","legacy":{"can_dm":true,"can_media_tag":true,"created_at":"Sat Nov 17 14:41:15 +0000 2018","default_profile":true,"default_profile_image":false,"description":"WRITER/DIRECTOR/Angel (Network)/Angel(VC)/ SURGEON/CHEMIST __GARDENER IN CHIEF __HYPERTHYMIC CYBORG __LOVE MAXI __SPECIAL OPERATIONS GROUP @Blimp_DAO","entities":{"description":{"urls":[]},"url":{"urls":[{"display_url":"leafyfang.substack.com","expanded_url":"http://leafyfang.substack.com","url":"https://t.co/QpJvATk1yR","indices":[0,23]}]}},"fast_followers_count":0,"favourites_count":5665,"followers_count":1626,"friends_count":971,"has_custom_timelines":true,"is_translator":false,"listed_count":38,"location":"Australia","media_count":165,"name":"Dr. Oskar Sarkon","normal_followers_count":1626,"pinned_tweet_ids_str":["1824232777769984099"],"possibly_sensitive":false,"profile_banner_url":"https://pbs.twimg.com/profile_banners/1063804260368805890/1598354942","profile_image_url_https":"https://pbs.twimg.com/profile_images/1559231638822760450/TBfALnv0_normal.jpg","profile_interstitial_type":"","screen_name":"hastuc_dibtux","statuses_count":2518,"translator_type":"none","url":"https://t.co/QpJvATk1yR","verified":false,"want_retweets":false,"withheld_in_countries":[]}}}},"card":{"rest_id":"https://t.co/AG3g3JePH9","legacy":{"binding_values":[{"key":"description","value":{"string_value":"In this article, we're going to dive into the benefits and challenges social recovery wallets present, and see how they're changing the world of digital asset management.","type":"STRING"}},{"key":"domain","value":{"string_value":"originstamp.com","type":"STRING"}},{"key":"vanity_url","value":{"scribe_key":"vanity_url","string_value":"originstamp.com","type":"STRING"}},{"key":"title","value":{"string_value":"What is a Social Recovery Wallet?","type":"STRING"}},{"key":"card_url","value":{"scribe_key":"card_url","string_value":"https://t.co/AG3g3JePH9","type":"STRING"}}],"card_platform":{"platform":{"audience":{"name":"production"},"device":{"name":"Swift","version":"12"}}},"name":"summary","url":"https://t.co/AG3g3JePH9","user_refs_results":[]}},"unmention_data":{},"edit_control":{"edit_tweet_ids":["1709580993886068820"],"editable_until_msecs":"1696434446000","is_edit_eligible":false,"edits_remaining":"5"},"is_translatable":false,"views":{"count":"70","state":"EnabledWithCount"},"source":"Twitter Web App","legacy":{"bookmark_count":0,"bookmarked":false,"created_at":"Wed Oct 04 14:47:26 +0000 2023","conversation_id_str":"1709547389722210722","display_text_range":[12,214],"entities":{"user_mentions":[{"id_str":"1595500307374829568","name":"Spandrell","screen_name":"spandrell3","indices":[0,11]}],"urls":[{"display_url":"originstamp.com/blog/what-is-a…","expanded_url":"https://originstamp.com/blog/what-is-a-social-recovery-wallet/#how-does-social-recovery-wallet-work","url":"https://t.co/AG3g3JePH9","indices":[191,214]}],"hashtags":[],"symbols":[]},"favorite_count":1,"favorited":false,"full_text":"@spandrell3 like the only way to make self-custody boomerproof. basically, you elect other people to act as guardians, and a n-of-m majority of the guardians can vote to reissue your wallet.\nhttps://t.co/AG3g3JePH9","in_reply_to_screen_name":"spandrell3","in_reply_to_status_id_str":"1709580398026805736","in_reply_to_user_id_str":"1595500307374829568","is_quote_status":false,"lang":"en","possibly_sensitive":false,"possibly_sensitive_editable":true,"quote_count":0,"reply_count":0,"retweet_count":0,"retweeted":false,"user_id_str":"1063804260368805890","id_str":"1709580993886068820"},"quick_promote_eligibility":{"eligibility":"IneligibleNotProfessional"}}},"tweetDisplayType":"Tweet"},"clientEventInfo":{"details":{"conversationDetails":{"conversationSection":"HighQuality"},"timelinesDetails":{"controllerData":"DAACDAAEDAABCgABDSACCEAEgQEKAAIAAAAAGCBACAAAAAA="}}}}}],"displayType":"VerticalConversation","clientEventInfo":{"details":{"conversationDetails":{"conversationSection":"HighQuality"}}}}}]},{"type":"TimelineTerminateTimeline","direction":"Top"}]}}}