diff --git a/cmd/tests.sh b/cmd/tests.sh index 047cb95..27caf27 100755 --- a/cmd/tests.sh +++ b/cmd/tests.sh @@ -255,15 +255,25 @@ test $(find link_preview_images | wc -l) = $initial_link_preview_images_count # # Test a tweet thread with tombstones -# tw fetch_tweet https://twitter.com/CovfefeAnon/status/1454526270809726977 -# test $(sqlite3 twitter.db "select is_stub from tweets where id = 1454515503242829830") = 1 -# test $(sqlite3 twitter.db "select is_stub from tweets where id = 1454521424144654344") = 0 # TODO this guy got banned -# test $(sqlite3 twitter.db "select is_stub from tweets where id = 1454522147750260742") = 1 +tw fetch_tweet https://twitter.com/CovfefeAnon/status/1454526270809726977 +test $(sqlite3 twitter.db "select is_stub from tweets where id = 1454515503242829830") = 1 +test $(sqlite3 twitter.db "select is_stub from tweets where id = 1454521424144654344") = 1 +test $(sqlite3 twitter.db "select is_stub from tweets where id = 1454522147750260742") = 1 +test $(sqlite3 twitter.db "select is_stub from tweets where id = 1454526270809726977") = 0 +# Check that it downloaded the fetchable user's profile image +test $(find profile_images/itsbackwereover_profile* | wc -l) -ne 0 + + +# Test an expanding ("Show more") tweet +tw fetch_tweet https://twitter.com/PaulSkallas/status/1649600354747572225 +test $(sqlite3 twitter.db "select is_expandable from tweets where id = 1649600354747572225") = 1 +test $(sqlite3 twitter.db "select length(text) from tweets where id = 1649600354747572225") -gt 280 +test "$(sqlite3 twitter.db "select text from tweets where id = 1649600354747572225" | tail -n 1)" = "A fitting ending to a time not worth saving" # Test updating a tombstone (e.g., the QT-ing user is blocked but acct is not priv) tw fetch_tweet https://twitter.com/michaelmalice/status/1479540552081326085 -test "$(sqlite3 twitter.db "select tombstone_type, text from tweets where id = 1479540319410696192")" = "4|" +test "$(sqlite3 twitter.db "select tombstone_type, text from tweets where id = 1479540319410696192")" = "3|" tw fetch_tweet_only 1479540319410696192 # Should remove the tombstone type and update the text test "$(sqlite3 twitter.db "select tombstone_type, text from tweets where id = 1479540319410696192")" = "|Eyyy! Look! Another one on my block list! Well done @michaelmalice, you silck person." diff --git a/cmd/twitter/main.go b/cmd/twitter/main.go index d7d5d99..6404ac5 100644 --- a/cmd/twitter/main.go +++ b/cmd/twitter/main.go @@ -236,7 +236,8 @@ func fetch_tweet_conversation(tweet_identifier string, how_many int) { die(err.Error(), false, -1) } - trove, err := scraper.GetTweetFull(tweet_id, how_many) + //trove, err := scraper.GetTweetFull(tweet_id, how_many) + trove, err := scraper.GetTweetFullAPIV2(tweet_id, how_many) if err != nil { die(err.Error(), false, -1) } diff --git a/scraper/api_types_v2.go b/scraper/api_types_v2.go index a019402..58871da 100644 --- a/scraper/api_types_v2.go +++ b/scraper/api_types_v2.go @@ -680,6 +680,17 @@ func get_graphql_user_timeline_url(user_id UserID, cursor string) string { return "https://twitter.com/i/api/graphql/CwLU7qTfeu0doqhSr6tW4A/UserTweetsAndReplies?variables=%7B%22userId%22%3A%22" + fmt.Sprint(user_id) + "%22%2C%22count%22%3A40%2C%22includePromotedContent%22%3Afalse%2C%22withCommunity%22%3Atrue%2C%22withSuperFollowsUserFields%22%3Atrue%2C%22withBirdwatchPivots%22%3Afalse%2C%22withDownvotePerspective%22%3Afalse%2C%22withReactionsMetadata%22%3Afalse%2C%22withReactionsPerspective%22%3Afalse%2C%22withSuperFollowsTweetFields%22%3Atrue%2C%22withVoice%22%3Atrue%2C%22withV2Timeline%22%3Afalse%2C%22__fs_interactive_text%22%3Afalse%2C%22__fs_dont_mention_me_view_api_enabled%22%3Afalse%7D" //nolint:lll // It's a URL, come on } +func get_tweet_detail_url(tweet_id TweetID, cursor string) string { + maybe_cursor := "" + if cursor != "" { + maybe_cursor = "%22cursor%22%3A%22" + url.QueryEscape(cursor) + "%22%2C" + } + return "https://twitter.com/i/api/graphql/tPRAv4UnqM9dOgDWggph7Q/TweetDetail?variables=%7B%22focalTweetId%22%3A%22" + fmt.Sprint(tweet_id) + "%22%2C" + maybe_cursor + "%22with_rux_injections%22%3Afalse%2C%22includePromotedContent%22%3Atrue%2C%22withCommunity%22%3Atrue%2C%22withQuickPromoteEligibilityTweetFields%22%3Atrue%2C%22withBirdwatchNotes%22%3Atrue%2C%22withVoice%22%3Atrue%2C%22withV2Timeline%22%3Atrue%7D&features=%7B%22rweb_lists_timeline_redesign_enabled%22%3Atrue%2C%22responsive_web_graphql_exclude_directive_enabled%22%3Atrue%2C%22verified_phone_label_enabled%22%3Afalse%2C%22creator_subscriptions_tweet_preview_api_enabled%22%3Atrue%2C%22responsive_web_graphql_timeline_navigation_enabled%22%3Atrue%2C%22responsive_web_graphql_skip_user_profile_image_extensions_enabled%22%3Afalse%2C%22tweetypie_unmention_optimization_enabled%22%3Atrue%2C%22responsive_web_edit_tweet_api_enabled%22%3Atrue%2C%22graphql_is_translatable_rweb_tweet_is_translatable_enabled%22%3Atrue%2C%22view_counts_everywhere_api_enabled%22%3Atrue%2C%22longform_notetweets_consumption_enabled%22%3Atrue%2C%22tweet_awards_web_tipping_enabled%22%3Afalse%2C%22freedom_of_speech_not_reach_fetch_enabled%22%3Atrue%2C%22standardized_nudges_misinfo%22%3Atrue%2C%22tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled%22%3Afalse%2C%22longform_notetweets_rich_text_read_enabled%22%3Atrue%2C%22longform_notetweets_inline_media_enabled%22%3Afalse%2C%22responsive_web_enhance_cards_enabled%22%3Afalse%7D" + // Request URL: "https://twitter.com/i/api/graphql/Pn68XRZwyV9ClrAEmK8rrQ/TweetDetail?variables=%7B%22focalTweetId%22%3A%221649600354747572225 + // "%22%2C%22cursor%22%3A%22WwAAAPANHBmWgICz2dr56OQtgICw7cSjr-UtkICw1fzkyBIA8BK-0YTco-UtioC-0ePcx-QthICw-aSBz-QtgoC-1fffiuUkAPAHya3Ty-QtgMCz6cS-lOUtJQISFQQAAA%22%2C%22referrer%22%3A%22tweet + // "%22%2C%22with_rux_injections%22%3Afalse%2C%22includePromotedContent%22%3Atrue%2C%22withCommunity%22%3Atrue%2C%22withQuickPromoteEligibilityTweetFields%22%3Atrue%2C%22withBirdwatchNotes%22%3Atrue%2C%22withVoice%22%3Atrue%2C%22withV2Timeline%22%3Atrue%7D&features=%7B%22rweb_lists_timeline_redesign_enabled%22%3Atrue%2C%22responsive_web_graphql_exclude_directive_enabled%22%3Atrue%2C%22verified_phone_label_enabled%22%3Afalse%2C%22creator_subscriptions_tweet_preview_api_enabled%22%3Atrue%2C%22responsive_web_graphql_timeline_navigation_enabled%22%3Atrue%2C%22responsive_web_graphql_skip_user_profile_image_extensions_enabled%22%3Afalse%2C%22tweetypie_unmention_optimization_enabled%22%3Atrue%2C%22responsive_web_edit_tweet_api_enabled%22%3Atrue%2C%22graphql_is_translatable_rweb_tweet_is_translatable_enabled%22%3Atrue%2C%22view_counts_everywhere_api_enabled%22%3Atrue%2C%22longform_notetweets_consumption_enabled%22%3Atrue%2C%22tweet_awards_web_tipping_enabled%22%3Afalse%2C%22freedom_of_speech_not_reach_fetch_enabled%22%3Atrue%2C%22standardized_nudges_misinfo%22%3Atrue%2C%22tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled%22%3Afalse%2C%22longform_notetweets_rich_text_read_enabled%22%3Atrue%2C%22longform_notetweets_inline_media_enabled%22%3Afalse%2C%22responsive_web_enhance_cards_enabled%22%3Afalse%7D +} + /** * Get a User feed using the new GraphQL twitter api */ @@ -739,3 +750,44 @@ func (api *API) GetMoreTweetsFromGraphqlFeed(user_id UserID, response *APIV2Resp } return nil } + +func (api *API) GetTweetDetail(tweet_id TweetID, cursor string) (APIV2Response, error) { + url, err := url.Parse(get_tweet_detail_url(tweet_id, cursor)) + if err != nil { + panic(err) + } + + var response APIV2Response + err = api.do_http(url.String(), cursor, &response) + + return response, err +} + +func (api *API) GetMoreTweetReplies(tweet_id TweetID, response *APIV2Response, min_tweets int) error { + last_response := response + for last_response.GetCursorBottom() != "" && len(response.GetMainInstruction().Entries) < min_tweets { + fresh_response, err := api.GetTweetDetail(tweet_id, last_response.GetCursorBottom()) + if err != nil { + return err + } + + if fresh_response.GetCursorBottom() == last_response.GetCursorBottom() && len(fresh_response.GetMainInstruction().Entries) == 0 { + // Empty response, cursor same as previous: end of feed has been reached + return END_OF_FEED + } + if fresh_response.IsEmpty() { + // Response has a pinned tweet, but no other content: end of feed has been reached + return END_OF_FEED // TODO: check that there actually is a pinned tweet and the request didn't just fail lol + } + + last_response = &fresh_response + + // Copy over the entries + response.GetMainInstruction().Entries = append( + response.GetMainInstruction().Entries, + last_response.GetMainInstruction().Entries...) + + fmt.Printf("Have %d entries so far\n", len(response.GetMainInstruction().Entries)) + } + return nil +} diff --git a/scraper/tweet.go b/scraper/tweet.go index 0dcc86a..83e3acd 100644 --- a/scraper/tweet.go +++ b/scraper/tweet.go @@ -316,3 +316,39 @@ func GetTweetFull(id TweetID, how_many int) (trove TweetTrove, err error) { return } + +func GetTweetFullAPIV2(id TweetID, how_many int) (trove TweetTrove, err error) { + resp, err := the_api.GetTweetDetail(id, "") + if err != nil { + err = fmt.Errorf("Error getting tweet detail: %d\n %w", id, err) + return + } + err = the_api.GetMoreTweetReplies(id, &resp, how_many) + if err != nil && !errors.Is(err, END_OF_FEED) { + err = fmt.Errorf("Error getting more replies in tweet detail: %d\n %w", id, err) + return + } + trove, err = resp.ToTweetTrove() + if err != nil { + return trove, err + } + + // Quoted tombstones need their user_id filled out from the tombstoned_users list + log.Debug("Running tweet trove post-processing\n") + err = trove.PostProcess() + if err != nil { + err = fmt.Errorf("Error getting tweet (id %d):\n %w", id, err) + return + } + + // Find the main tweet and update its "is_conversation_downloaded" and "last_scraped_at" + tweet, ok := trove.Tweets[id] + if !ok { + panic("Trove didn't contain its own tweet!") + } + tweet.LastScrapedAt = Timestamp{time.Now()} + tweet.IsConversationScraped = true + trove.Tweets[id] = tweet + + return +}