Switch over Tweet Detail fetching to use APIv2

This commit is contained in:
Alessio 2023-06-21 13:26:18 -03:00
parent eae3ebe3e7
commit a44323c5de
4 changed files with 105 additions and 6 deletions

View File

@ -255,15 +255,25 @@ test $(find link_preview_images | wc -l) = $initial_link_preview_images_count #
# Test a tweet thread with tombstones # Test a tweet thread with tombstones
# tw fetch_tweet https://twitter.com/CovfefeAnon/status/1454526270809726977 tw fetch_tweet https://twitter.com/CovfefeAnon/status/1454526270809726977
# test $(sqlite3 twitter.db "select is_stub from tweets where id = 1454515503242829830") = 1 test $(sqlite3 twitter.db "select is_stub from tweets where id = 1454515503242829830") = 1
# test $(sqlite3 twitter.db "select is_stub from tweets where id = 1454521424144654344") = 0 # TODO this guy got banned test $(sqlite3 twitter.db "select is_stub from tweets where id = 1454521424144654344") = 1
# test $(sqlite3 twitter.db "select is_stub from tweets where id = 1454522147750260742") = 1 test $(sqlite3 twitter.db "select is_stub from tweets where id = 1454522147750260742") = 1
test $(sqlite3 twitter.db "select is_stub from tweets where id = 1454526270809726977") = 0
# Check that it downloaded the fetchable user's profile image
test $(find profile_images/itsbackwereover_profile* | wc -l) -ne 0
# Test an expanding ("Show more") tweet
tw fetch_tweet https://twitter.com/PaulSkallas/status/1649600354747572225
test $(sqlite3 twitter.db "select is_expandable from tweets where id = 1649600354747572225") = 1
test $(sqlite3 twitter.db "select length(text) from tweets where id = 1649600354747572225") -gt 280
test "$(sqlite3 twitter.db "select text from tweets where id = 1649600354747572225" | tail -n 1)" = "A fitting ending to a time not worth saving"
# Test updating a tombstone (e.g., the QT-ing user is blocked but acct is not priv) # Test updating a tombstone (e.g., the QT-ing user is blocked but acct is not priv)
tw fetch_tweet https://twitter.com/michaelmalice/status/1479540552081326085 tw fetch_tweet https://twitter.com/michaelmalice/status/1479540552081326085
test "$(sqlite3 twitter.db "select tombstone_type, text from tweets where id = 1479540319410696192")" = "4|" test "$(sqlite3 twitter.db "select tombstone_type, text from tweets where id = 1479540319410696192")" = "3|"
tw fetch_tweet_only 1479540319410696192 # Should remove the tombstone type and update the text tw fetch_tweet_only 1479540319410696192 # Should remove the tombstone type and update the text
test "$(sqlite3 twitter.db "select tombstone_type, text from tweets where id = 1479540319410696192")" = "|Eyyy! Look! Another one on my block list! Well done @michaelmalice, you silck person." test "$(sqlite3 twitter.db "select tombstone_type, text from tweets where id = 1479540319410696192")" = "|Eyyy! Look! Another one on my block list! Well done @michaelmalice, you silck person."

View File

@ -236,7 +236,8 @@ func fetch_tweet_conversation(tweet_identifier string, how_many int) {
die(err.Error(), false, -1) die(err.Error(), false, -1)
} }
trove, err := scraper.GetTweetFull(tweet_id, how_many) //trove, err := scraper.GetTweetFull(tweet_id, how_many)
trove, err := scraper.GetTweetFullAPIV2(tweet_id, how_many)
if err != nil { if err != nil {
die(err.Error(), false, -1) die(err.Error(), false, -1)
} }

View File

@ -680,6 +680,17 @@ func get_graphql_user_timeline_url(user_id UserID, cursor string) string {
return "https://twitter.com/i/api/graphql/CwLU7qTfeu0doqhSr6tW4A/UserTweetsAndReplies?variables=%7B%22userId%22%3A%22" + fmt.Sprint(user_id) + "%22%2C%22count%22%3A40%2C%22includePromotedContent%22%3Afalse%2C%22withCommunity%22%3Atrue%2C%22withSuperFollowsUserFields%22%3Atrue%2C%22withBirdwatchPivots%22%3Afalse%2C%22withDownvotePerspective%22%3Afalse%2C%22withReactionsMetadata%22%3Afalse%2C%22withReactionsPerspective%22%3Afalse%2C%22withSuperFollowsTweetFields%22%3Atrue%2C%22withVoice%22%3Atrue%2C%22withV2Timeline%22%3Afalse%2C%22__fs_interactive_text%22%3Afalse%2C%22__fs_dont_mention_me_view_api_enabled%22%3Afalse%7D" //nolint:lll // It's a URL, come on return "https://twitter.com/i/api/graphql/CwLU7qTfeu0doqhSr6tW4A/UserTweetsAndReplies?variables=%7B%22userId%22%3A%22" + fmt.Sprint(user_id) + "%22%2C%22count%22%3A40%2C%22includePromotedContent%22%3Afalse%2C%22withCommunity%22%3Atrue%2C%22withSuperFollowsUserFields%22%3Atrue%2C%22withBirdwatchPivots%22%3Afalse%2C%22withDownvotePerspective%22%3Afalse%2C%22withReactionsMetadata%22%3Afalse%2C%22withReactionsPerspective%22%3Afalse%2C%22withSuperFollowsTweetFields%22%3Atrue%2C%22withVoice%22%3Atrue%2C%22withV2Timeline%22%3Afalse%2C%22__fs_interactive_text%22%3Afalse%2C%22__fs_dont_mention_me_view_api_enabled%22%3Afalse%7D" //nolint:lll // It's a URL, come on
} }
func get_tweet_detail_url(tweet_id TweetID, cursor string) string {
maybe_cursor := ""
if cursor != "" {
maybe_cursor = "%22cursor%22%3A%22" + url.QueryEscape(cursor) + "%22%2C"
}
return "https://twitter.com/i/api/graphql/tPRAv4UnqM9dOgDWggph7Q/TweetDetail?variables=%7B%22focalTweetId%22%3A%22" + fmt.Sprint(tweet_id) + "%22%2C" + maybe_cursor + "%22with_rux_injections%22%3Afalse%2C%22includePromotedContent%22%3Atrue%2C%22withCommunity%22%3Atrue%2C%22withQuickPromoteEligibilityTweetFields%22%3Atrue%2C%22withBirdwatchNotes%22%3Atrue%2C%22withVoice%22%3Atrue%2C%22withV2Timeline%22%3Atrue%7D&features=%7B%22rweb_lists_timeline_redesign_enabled%22%3Atrue%2C%22responsive_web_graphql_exclude_directive_enabled%22%3Atrue%2C%22verified_phone_label_enabled%22%3Afalse%2C%22creator_subscriptions_tweet_preview_api_enabled%22%3Atrue%2C%22responsive_web_graphql_timeline_navigation_enabled%22%3Atrue%2C%22responsive_web_graphql_skip_user_profile_image_extensions_enabled%22%3Afalse%2C%22tweetypie_unmention_optimization_enabled%22%3Atrue%2C%22responsive_web_edit_tweet_api_enabled%22%3Atrue%2C%22graphql_is_translatable_rweb_tweet_is_translatable_enabled%22%3Atrue%2C%22view_counts_everywhere_api_enabled%22%3Atrue%2C%22longform_notetweets_consumption_enabled%22%3Atrue%2C%22tweet_awards_web_tipping_enabled%22%3Afalse%2C%22freedom_of_speech_not_reach_fetch_enabled%22%3Atrue%2C%22standardized_nudges_misinfo%22%3Atrue%2C%22tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled%22%3Afalse%2C%22longform_notetweets_rich_text_read_enabled%22%3Atrue%2C%22longform_notetweets_inline_media_enabled%22%3Afalse%2C%22responsive_web_enhance_cards_enabled%22%3Afalse%7D"
// Request URL: "https://twitter.com/i/api/graphql/Pn68XRZwyV9ClrAEmK8rrQ/TweetDetail?variables=%7B%22focalTweetId%22%3A%221649600354747572225
// "%22%2C%22cursor%22%3A%22WwAAAPANHBmWgICz2dr56OQtgICw7cSjr-UtkICw1fzkyBIA8BK-0YTco-UtioC-0ePcx-QthICw-aSBz-QtgoC-1fffiuUkAPAHya3Ty-QtgMCz6cS-lOUtJQISFQQAAA%22%2C%22referrer%22%3A%22tweet
// "%22%2C%22with_rux_injections%22%3Afalse%2C%22includePromotedContent%22%3Atrue%2C%22withCommunity%22%3Atrue%2C%22withQuickPromoteEligibilityTweetFields%22%3Atrue%2C%22withBirdwatchNotes%22%3Atrue%2C%22withVoice%22%3Atrue%2C%22withV2Timeline%22%3Atrue%7D&features=%7B%22rweb_lists_timeline_redesign_enabled%22%3Atrue%2C%22responsive_web_graphql_exclude_directive_enabled%22%3Atrue%2C%22verified_phone_label_enabled%22%3Afalse%2C%22creator_subscriptions_tweet_preview_api_enabled%22%3Atrue%2C%22responsive_web_graphql_timeline_navigation_enabled%22%3Atrue%2C%22responsive_web_graphql_skip_user_profile_image_extensions_enabled%22%3Afalse%2C%22tweetypie_unmention_optimization_enabled%22%3Atrue%2C%22responsive_web_edit_tweet_api_enabled%22%3Atrue%2C%22graphql_is_translatable_rweb_tweet_is_translatable_enabled%22%3Atrue%2C%22view_counts_everywhere_api_enabled%22%3Atrue%2C%22longform_notetweets_consumption_enabled%22%3Atrue%2C%22tweet_awards_web_tipping_enabled%22%3Afalse%2C%22freedom_of_speech_not_reach_fetch_enabled%22%3Atrue%2C%22standardized_nudges_misinfo%22%3Atrue%2C%22tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled%22%3Afalse%2C%22longform_notetweets_rich_text_read_enabled%22%3Atrue%2C%22longform_notetweets_inline_media_enabled%22%3Afalse%2C%22responsive_web_enhance_cards_enabled%22%3Afalse%7D
}
/** /**
* Get a User feed using the new GraphQL twitter api * Get a User feed using the new GraphQL twitter api
*/ */
@ -739,3 +750,44 @@ func (api *API) GetMoreTweetsFromGraphqlFeed(user_id UserID, response *APIV2Resp
} }
return nil return nil
} }
func (api *API) GetTweetDetail(tweet_id TweetID, cursor string) (APIV2Response, error) {
url, err := url.Parse(get_tweet_detail_url(tweet_id, cursor))
if err != nil {
panic(err)
}
var response APIV2Response
err = api.do_http(url.String(), cursor, &response)
return response, err
}
func (api *API) GetMoreTweetReplies(tweet_id TweetID, response *APIV2Response, min_tweets int) error {
last_response := response
for last_response.GetCursorBottom() != "" && len(response.GetMainInstruction().Entries) < min_tweets {
fresh_response, err := api.GetTweetDetail(tweet_id, last_response.GetCursorBottom())
if err != nil {
return err
}
if fresh_response.GetCursorBottom() == last_response.GetCursorBottom() && len(fresh_response.GetMainInstruction().Entries) == 0 {
// Empty response, cursor same as previous: end of feed has been reached
return END_OF_FEED
}
if fresh_response.IsEmpty() {
// Response has a pinned tweet, but no other content: end of feed has been reached
return END_OF_FEED // TODO: check that there actually is a pinned tweet and the request didn't just fail lol
}
last_response = &fresh_response
// Copy over the entries
response.GetMainInstruction().Entries = append(
response.GetMainInstruction().Entries,
last_response.GetMainInstruction().Entries...)
fmt.Printf("Have %d entries so far\n", len(response.GetMainInstruction().Entries))
}
return nil
}

View File

@ -316,3 +316,39 @@ func GetTweetFull(id TweetID, how_many int) (trove TweetTrove, err error) {
return return
} }
func GetTweetFullAPIV2(id TweetID, how_many int) (trove TweetTrove, err error) {
resp, err := the_api.GetTweetDetail(id, "")
if err != nil {
err = fmt.Errorf("Error getting tweet detail: %d\n %w", id, err)
return
}
err = the_api.GetMoreTweetReplies(id, &resp, how_many)
if err != nil && !errors.Is(err, END_OF_FEED) {
err = fmt.Errorf("Error getting more replies in tweet detail: %d\n %w", id, err)
return
}
trove, err = resp.ToTweetTrove()
if err != nil {
return trove, err
}
// Quoted tombstones need their user_id filled out from the tombstoned_users list
log.Debug("Running tweet trove post-processing\n")
err = trove.PostProcess()
if err != nil {
err = fmt.Errorf("Error getting tweet (id %d):\n %w", id, err)
return
}
// Find the main tweet and update its "is_conversation_downloaded" and "last_scraped_at"
tweet, ok := trove.Tweets[id]
if !ok {
panic("Trove didn't contain its own tweet!")
}
tweet.LastScrapedAt = Timestamp{time.Now()}
tweet.IsConversationScraped = true
trove.Tweets[id] = tweet
return
}