From c00ff06b3f3445a493853fc930682e0cad1f5012 Mon Sep 17 00:00:00 2001 From: Alessio Date: Tue, 1 Feb 2022 18:21:48 -0800 Subject: [PATCH] URLs need to have a TweetID lol --- scraper/api_types_v2.go | 1 + scraper/api_types_v2_test.go | 29 +++++++++++++++++++ .../api_v2/retweet_with_url.json | 1 + 3 files changed, 31 insertions(+) create mode 100644 scraper/test_responses/api_v2/retweet_with_url.json diff --git a/scraper/api_types_v2.go b/scraper/api_types_v2.go index a27bc3a..509b60e 100644 --- a/scraper/api_types_v2.go +++ b/scraper/api_types_v2.go @@ -149,6 +149,7 @@ func (api_result APIV2Result) ToTweetTrove() TweetTrove { if !ok { panic(fmt.Sprintf("Tweet trove didn't contain its own tweet: %d", api_result.Result.Legacy.ID)) } + url.TweetID = main_tweet.ID found := false for i := range main_tweet.Urls { if main_tweet.Urls[i].ShortText != url.ShortText { diff --git a/scraper/api_types_v2_test.go b/scraper/api_types_v2_test.go index 3ea497f..557dd60 100644 --- a/scraper/api_types_v2_test.go +++ b/scraper/api_types_v2_test.go @@ -291,6 +291,7 @@ func TestAPIV2ParseTweetWithURL(t *testing.T) { assert.Equal(1, len(tweet.Urls)) url := tweet.Urls[0] + assert.Equal(tweet.ID, url.TweetID) assert.Equal("observer.com", url.Domain) assert.Equal("Why Evangelical Progressives Need to Demonstrate Anguish Publicly", url.Title) assert.Equal("https://observer.com/2016/12/why-evangelical-progressives-need-to-demonstrate-anguish-publicly/", url.Text) @@ -325,6 +326,7 @@ func TestAPIV2ParseTweetWithURLPlayerCard(t *testing.T) { assert.Equal(1, len(tweet.Urls)) url := tweet.Urls[0] + assert.Equal(tweet.ID, url.TweetID) assert.Equal("www.youtube.com", url.Domain) assert.Equal("Michael Malice on Kennedy Nov. 15, 2016", url.Title) assert.Equal("https://www.youtube.com/watch?v=c9TypEM1ik4&t=9s", url.Text) @@ -333,6 +335,33 @@ func TestAPIV2ParseTweetWithURLPlayerCard(t *testing.T) { assert.Equal(UserID(10228272), url.SiteID) } +/** + * Parse a tweet with a link with a "player" card + */ +func TestAPIV2ParseTweetWithURLRetweet(t *testing.T) { + assert := assert.New(t) + data, err := ioutil.ReadFile("test_responses/api_v2/retweet_with_url.json") + if err != nil { + panic(err) + } + + var tweet_result APIV2Result + err = json.Unmarshal(data, &tweet_result) + assert.NoError(err) + + trove := tweet_result.ToTweetTrove() + + assert.Equal(1, len(trove.Tweets)) + tweet, ok := trove.Tweets[1488605073588559873] + assert.True(ok) + assert.Equal("REJOICE", tweet.Text) + + assert.Equal(1, len(tweet.Urls)) + url := tweet.Urls[0] + assert.Equal(tweet.ID, url.TweetID) + assert.Equal("tippinsights.com", url.Domain) +} + func TestParseAPIV2UserFeed(t *testing.T) { data, err := ioutil.ReadFile("test_responses/api_v2/user_feed_apiv2.json") diff --git a/scraper/test_responses/api_v2/retweet_with_url.json b/scraper/test_responses/api_v2/retweet_with_url.json new file mode 100644 index 0000000..f7bded1 --- /dev/null +++ b/scraper/test_responses/api_v2/retweet_with_url.json @@ -0,0 +1 @@ +{"result":{"__typename":"Tweet","rest_id":"1488605340392374278","core":{"user_results":{"result":{"__typename":"User","id":"VXNlcjo0NDA2NzI5OA==","rest_id":"44067298","affiliates_highlighted_label":{},"has_nft_avatar":false,"legacy":{"created_at":"Tue Jun 02 05:35:52 +0000 2009","default_profile":false,"default_profile_image":false,"description":"Author of Dear Reader, The New Right & The Anarchist Handbook\nHost of \"YOUR WELCOME\" \nSubject of Ego & Hubris by Harvey Pekar\nHe/Him ⚑\n@SheathUnderwear Model","entities":{"description":{"urls":[]},"url":{"urls":[{"display_url":"amzn.to/3oInafv","expanded_url":"https://amzn.to/3oInafv","url":"https://t.co/7VDFOOtFK2","indices":[0,23]}]}},"fast_followers_count":0,"favourites_count":3858,"followers_count":339044,"friends_count":962,"has_custom_timelines":false,"is_translator":false,"listed_count":1443,"location":"Austin","media_count":9572,"name":"Michael Malice","normal_followers_count":339044,"pinned_tweet_ids_str":["1475160467035377670"],"profile_banner_extensions":{"mediaColor":{"r":{"ok":{"palette":[{"percentage":60.59,"rgb":{"blue":0,"green":0,"red":0}},{"percentage":18.77,"rgb":{"blue":64,"green":60,"red":156}},{"percentage":3.62,"rgb":{"blue":31,"green":29,"red":77}},{"percentage":3.22,"rgb":{"blue":215,"green":199,"red":138}},{"percentage":2.83,"rgb":{"blue":85,"green":79,"red":215}}]}}}},"profile_banner_url":"https://pbs.twimg.com/profile_banners/44067298/1615134676","profile_image_extensions":{"mediaColor":{"r":{"ok":{"palette":[{"percentage":50.78,"rgb":{"blue":249,"green":247,"red":246}},{"percentage":17.4,"rgb":{"blue":51,"green":51,"red":205}},{"percentage":9.43,"rgb":{"blue":124,"green":139,"red":210}},{"percentage":6.38,"rgb":{"blue":47,"green":63,"red":116}},{"percentage":3.17,"rgb":{"blue":65,"green":45,"red":46}}]}}}},"profile_image_url_https":"https://pbs.twimg.com/profile_images/1415820415314931715/_VVX4GI8_normal.jpg","profile_interstitial_type":"","protected":false,"screen_name":"michaelmalice","statuses_count":139129,"translator_type":"none","url":"https://t.co/7VDFOOtFK2","verified":true,"withheld_in_countries":[]},"super_follow_eligible":false,"super_followed_by":false,"super_following":false}}},"card":{"rest_id":"https://t.co/YFABXRqcox","legacy":{"binding_values":[{"key":"photo_image_full_size_large","value":{"image_value":{"height":419,"width":800,"url":"https://pbs.twimg.com/card_img/1488141088586145794/ieT1uWUQ?format=jpg&name=800x419"},"type":"IMAGE"}},{"key":"thumbnail_image","value":{"image_value":{"height":150,"width":225,"url":"https://pbs.twimg.com/card_img/1488141088586145794/ieT1uWUQ?format=jpg&name=280x150"},"type":"IMAGE"}},{"key":"description","value":{"string_value":"Conservatives and moderates see bias, liberals not so much.","type":"STRING"}},{"key":"domain","value":{"string_value":"tippinsights.com","type":"STRING"}},{"key":"thumbnail_image_large","value":{"image_value":{"height":320,"width":480,"url":"https://pbs.twimg.com/card_img/1488141088586145794/ieT1uWUQ?format=jpg&name=800x320_1"},"type":"IMAGE"}},{"key":"summary_photo_image_small","value":{"image_value":{"height":202,"width":386,"url":"https://pbs.twimg.com/card_img/1488141088586145794/ieT1uWUQ?format=jpg&name=386x202"},"type":"IMAGE"}},{"key":"thumbnail_image_original","value":{"image_value":{"height":1333,"width":1999,"url":"https://pbs.twimg.com/card_img/1488141088586145794/ieT1uWUQ?format=jpg&name=orig"},"type":"IMAGE"}},{"key":"site","value":{"scribe_key":"publisher_id","type":"USER","user_value":{"id_str":"1366768672946204676","path":[]}}},{"key":"photo_image_full_size_small","value":{"image_value":{"height":202,"width":386,"url":"https://pbs.twimg.com/card_img/1488141088586145794/ieT1uWUQ?format=jpg&name=386x202"},"type":"IMAGE"}},{"key":"summary_photo_image_large","value":{"image_value":{"height":419,"width":800,"url":"https://pbs.twimg.com/card_img/1488141088586145794/ieT1uWUQ?format=jpg&name=800x419"},"type":"IMAGE"}},{"key":"thumbnail_image_small","value":{"image_value":{"height":67,"width":100,"url":"https://pbs.twimg.com/card_img/1488141088586145794/ieT1uWUQ?format=jpg&name=100x100"},"type":"IMAGE"}},{"key":"thumbnail_image_x_large","value":{"image_value":{"height":1333,"width":1999,"url":"https://pbs.twimg.com/card_img/1488141088586145794/ieT1uWUQ?format=png&name=2048x2048_2_exp"},"type":"IMAGE"}},{"key":"photo_image_full_size_original","value":{"image_value":{"height":1333,"width":1999,"url":"https://pbs.twimg.com/card_img/1488141088586145794/ieT1uWUQ?format=jpg&name=orig"},"type":"IMAGE"}},{"key":"vanity_url","value":{"scribe_key":"vanity_url","string_value":"tippinsights.com","type":"STRING"}},{"key":"photo_image_full_size","value":{"image_value":{"height":314,"width":600,"url":"https://pbs.twimg.com/card_img/1488141088586145794/ieT1uWUQ?format=jpg&name=600x314"},"type":"IMAGE"}},{"key":"thumbnail_image_color","value":{"image_color_value":{"palette":[{"rgb":{"blue":208,"green":200,"red":99},"percentage":83.45},{"rgb":{"blue":50,"green":45,"red":5},"percentage":5.69},{"rgb":{"blue":47,"green":229,"red":228},"percentage":3.94},{"rgb":{"blue":177,"green":168,"red":85},"percentage":1.96},{"rgb":{"blue":188,"green":218,"red":252},"percentage":1.46}]},"type":"IMAGE_COLOR"}},{"key":"title","value":{"string_value":"I&I/TIPP Poll: Trust In Media Collapse Continues As 2022 Begins","type":"STRING"}},{"key":"summary_photo_image_color","value":{"image_color_value":{"palette":[{"rgb":{"blue":208,"green":200,"red":99},"percentage":83.45},{"rgb":{"blue":50,"green":45,"red":5},"percentage":5.69},{"rgb":{"blue":47,"green":229,"red":228},"percentage":3.94},{"rgb":{"blue":177,"green":168,"red":85},"percentage":1.96},{"rgb":{"blue":188,"green":218,"red":252},"percentage":1.46}]},"type":"IMAGE_COLOR"}},{"key":"summary_photo_image_x_large","value":{"image_value":{"height":1333,"width":1999,"url":"https://pbs.twimg.com/card_img/1488141088586145794/ieT1uWUQ?format=png&name=2048x2048_2_exp"},"type":"IMAGE"}},{"key":"summary_photo_image","value":{"image_value":{"height":314,"width":600,"url":"https://pbs.twimg.com/card_img/1488141088586145794/ieT1uWUQ?format=jpg&name=600x314"},"type":"IMAGE"}},{"key":"photo_image_full_size_color","value":{"image_color_value":{"palette":[{"rgb":{"blue":208,"green":200,"red":99},"percentage":83.45},{"rgb":{"blue":50,"green":45,"red":5},"percentage":5.69},{"rgb":{"blue":47,"green":229,"red":228},"percentage":3.94},{"rgb":{"blue":177,"green":168,"red":85},"percentage":1.96},{"rgb":{"blue":188,"green":218,"red":252},"percentage":1.46}]},"type":"IMAGE_COLOR"}},{"key":"photo_image_full_size_x_large","value":{"image_value":{"height":1333,"width":1999,"url":"https://pbs.twimg.com/card_img/1488141088586145794/ieT1uWUQ?format=png&name=2048x2048_2_exp"},"type":"IMAGE"}},{"key":"card_url","value":{"scribe_key":"card_url","string_value":"https://t.co/YFABXRqcox","type":"STRING"}},{"key":"summary_photo_image_original","value":{"image_value":{"height":1333,"width":1999,"url":"https://pbs.twimg.com/card_img/1488141088586145794/ieT1uWUQ?format=jpg&name=orig"},"type":"IMAGE"}}],"card_platform":{"platform":{"audience":{"name":"production"},"device":{"name":"Swift","version":"12"}}},"name":"summary_large_image","url":"https://t.co/YFABXRqcox","user_refs":[{"id":"VXNlcjoxMzY2NzY4NjcyOTQ2MjA0Njc2","rest_id":"1366768672946204676","affiliates_highlighted_label":{},"has_nft_avatar":false,"super_follow_eligible":false,"super_followed_by":false,"super_following":false}]}},"legacy":{"created_at":"Tue Feb 01 20:09:04 +0000 2022","conversation_id_str":"1488605340392374278","display_text_range":[0,63],"entities":{"user_mentions":[{"id_str":"44067298","name":"Michael Malice","screen_name":"michaelmalice","indices":[3,17]},{"id_str":"16076032","name":"Glenn Greenwald","screen_name":"ggreenwald","indices":[19,30]}],"urls":[{"display_url":"tippinsights.com/i-i-tipp-poll-…","expanded_url":"https://tippinsights.com/i-i-tipp-poll-trust-in-media-collapse-continues-as-2022-begins/","url":"https://t.co/YFABXRqcox","indices":[40,63]}],"hashtags":[],"symbols":[]},"favorite_count":0,"favorited":false,"full_text":"RT @michaelmalice: @ggreenwald REJOICE\n\nhttps://t.co/YFABXRqcox","is_quote_status":false,"lang":"en","possibly_sensitive":false,"possibly_sensitive_editable":true,"quote_count":0,"reply_count":0,"retweet_count":14,"retweeted":false,"source":"Twitter Web App","user_id_str":"44067298","id_str":"1488605340392374278","retweeted_status_result":{"result":{"__typename":"Tweet","rest_id":"1488605073588559873","core":{"user_results":{"result":{"__typename":"User","id":"VXNlcjo0NDA2NzI5OA==","rest_id":"44067298","affiliates_highlighted_label":{},"has_nft_avatar":false,"legacy":{"created_at":"Tue Jun 02 05:35:52 +0000 2009","default_profile":false,"default_profile_image":false,"description":"Author of Dear Reader, The New Right & The Anarchist Handbook\nHost of \"YOUR WELCOME\" \nSubject of Ego & Hubris by Harvey Pekar\nHe/Him ⚑\n@SheathUnderwear Model","entities":{"description":{"urls":[]},"url":{"urls":[{"display_url":"amzn.to/3oInafv","expanded_url":"https://amzn.to/3oInafv","url":"https://t.co/7VDFOOtFK2","indices":[0,23]}]}},"fast_followers_count":0,"favourites_count":3858,"followers_count":339044,"friends_count":962,"has_custom_timelines":false,"is_translator":false,"listed_count":1443,"location":"Austin","media_count":9572,"name":"Michael Malice","normal_followers_count":339044,"pinned_tweet_ids_str":["1475160467035377670"],"profile_banner_extensions":{"mediaColor":{"r":{"ok":{"palette":[{"percentage":60.59,"rgb":{"blue":0,"green":0,"red":0}},{"percentage":18.77,"rgb":{"blue":64,"green":60,"red":156}},{"percentage":3.62,"rgb":{"blue":31,"green":29,"red":77}},{"percentage":3.22,"rgb":{"blue":215,"green":199,"red":138}},{"percentage":2.83,"rgb":{"blue":85,"green":79,"red":215}}]}}}},"profile_banner_url":"https://pbs.twimg.com/profile_banners/44067298/1615134676","profile_image_extensions":{"mediaColor":{"r":{"ok":{"palette":[{"percentage":50.78,"rgb":{"blue":249,"green":247,"red":246}},{"percentage":17.4,"rgb":{"blue":51,"green":51,"red":205}},{"percentage":9.43,"rgb":{"blue":124,"green":139,"red":210}},{"percentage":6.38,"rgb":{"blue":47,"green":63,"red":116}},{"percentage":3.17,"rgb":{"blue":65,"green":45,"red":46}}]}}}},"profile_image_url_https":"https://pbs.twimg.com/profile_images/1415820415314931715/_VVX4GI8_normal.jpg","profile_interstitial_type":"","protected":false,"screen_name":"michaelmalice","statuses_count":139129,"translator_type":"none","url":"https://t.co/7VDFOOtFK2","verified":true,"withheld_in_countries":[]},"super_follow_eligible":false,"super_followed_by":false,"super_following":false}}},"card":{"rest_id":"https://t.co/YFABXRqcox","legacy":{"binding_values":[{"key":"photo_image_full_size_large","value":{"image_value":{"height":419,"width":800,"url":"https://pbs.twimg.com/card_img/1488141088586145794/ieT1uWUQ?format=jpg&name=800x419"},"type":"IMAGE"}},{"key":"thumbnail_image","value":{"image_value":{"height":150,"width":225,"url":"https://pbs.twimg.com/card_img/1488141088586145794/ieT1uWUQ?format=jpg&name=280x150"},"type":"IMAGE"}},{"key":"description","value":{"string_value":"Conservatives and moderates see bias, liberals not so much.","type":"STRING"}},{"key":"domain","value":{"string_value":"tippinsights.com","type":"STRING"}},{"key":"thumbnail_image_large","value":{"image_value":{"height":320,"width":480,"url":"https://pbs.twimg.com/card_img/1488141088586145794/ieT1uWUQ?format=jpg&name=800x320_1"},"type":"IMAGE"}},{"key":"summary_photo_image_small","value":{"image_value":{"height":202,"width":386,"url":"https://pbs.twimg.com/card_img/1488141088586145794/ieT1uWUQ?format=jpg&name=386x202"},"type":"IMAGE"}},{"key":"thumbnail_image_original","value":{"image_value":{"height":1333,"width":1999,"url":"https://pbs.twimg.com/card_img/1488141088586145794/ieT1uWUQ?format=jpg&name=orig"},"type":"IMAGE"}},{"key":"site","value":{"scribe_key":"publisher_id","type":"USER","user_value":{"id_str":"1366768672946204676","path":[]}}},{"key":"photo_image_full_size_small","value":{"image_value":{"height":202,"width":386,"url":"https://pbs.twimg.com/card_img/1488141088586145794/ieT1uWUQ?format=jpg&name=386x202"},"type":"IMAGE"}},{"key":"summary_photo_image_large","value":{"image_value":{"height":419,"width":800,"url":"https://pbs.twimg.com/card_img/1488141088586145794/ieT1uWUQ?format=jpg&name=800x419"},"type":"IMAGE"}},{"key":"thumbnail_image_small","value":{"image_value":{"height":67,"width":100,"url":"https://pbs.twimg.com/card_img/1488141088586145794/ieT1uWUQ?format=jpg&name=100x100"},"type":"IMAGE"}},{"key":"thumbnail_image_x_large","value":{"image_value":{"height":1333,"width":1999,"url":"https://pbs.twimg.com/card_img/1488141088586145794/ieT1uWUQ?format=png&name=2048x2048_2_exp"},"type":"IMAGE"}},{"key":"photo_image_full_size_original","value":{"image_value":{"height":1333,"width":1999,"url":"https://pbs.twimg.com/card_img/1488141088586145794/ieT1uWUQ?format=jpg&name=orig"},"type":"IMAGE"}},{"key":"vanity_url","value":{"scribe_key":"vanity_url","string_value":"tippinsights.com","type":"STRING"}},{"key":"photo_image_full_size","value":{"image_value":{"height":314,"width":600,"url":"https://pbs.twimg.com/card_img/1488141088586145794/ieT1uWUQ?format=jpg&name=600x314"},"type":"IMAGE"}},{"key":"thumbnail_image_color","value":{"image_color_value":{"palette":[{"rgb":{"blue":208,"green":200,"red":99},"percentage":83.45},{"rgb":{"blue":50,"green":45,"red":5},"percentage":5.69},{"rgb":{"blue":47,"green":229,"red":228},"percentage":3.94},{"rgb":{"blue":177,"green":168,"red":85},"percentage":1.96},{"rgb":{"blue":188,"green":218,"red":252},"percentage":1.46}]},"type":"IMAGE_COLOR"}},{"key":"title","value":{"string_value":"I&I/TIPP Poll: Trust In Media Collapse Continues As 2022 Begins","type":"STRING"}},{"key":"summary_photo_image_color","value":{"image_color_value":{"palette":[{"rgb":{"blue":208,"green":200,"red":99},"percentage":83.45},{"rgb":{"blue":50,"green":45,"red":5},"percentage":5.69},{"rgb":{"blue":47,"green":229,"red":228},"percentage":3.94},{"rgb":{"blue":177,"green":168,"red":85},"percentage":1.96},{"rgb":{"blue":188,"green":218,"red":252},"percentage":1.46}]},"type":"IMAGE_COLOR"}},{"key":"summary_photo_image_x_large","value":{"image_value":{"height":1333,"width":1999,"url":"https://pbs.twimg.com/card_img/1488141088586145794/ieT1uWUQ?format=png&name=2048x2048_2_exp"},"type":"IMAGE"}},{"key":"summary_photo_image","value":{"image_value":{"height":314,"width":600,"url":"https://pbs.twimg.com/card_img/1488141088586145794/ieT1uWUQ?format=jpg&name=600x314"},"type":"IMAGE"}},{"key":"photo_image_full_size_color","value":{"image_color_value":{"palette":[{"rgb":{"blue":208,"green":200,"red":99},"percentage":83.45},{"rgb":{"blue":50,"green":45,"red":5},"percentage":5.69},{"rgb":{"blue":47,"green":229,"red":228},"percentage":3.94},{"rgb":{"blue":177,"green":168,"red":85},"percentage":1.96},{"rgb":{"blue":188,"green":218,"red":252},"percentage":1.46}]},"type":"IMAGE_COLOR"}},{"key":"photo_image_full_size_x_large","value":{"image_value":{"height":1333,"width":1999,"url":"https://pbs.twimg.com/card_img/1488141088586145794/ieT1uWUQ?format=png&name=2048x2048_2_exp"},"type":"IMAGE"}},{"key":"card_url","value":{"scribe_key":"card_url","string_value":"https://t.co/YFABXRqcox","type":"STRING"}},{"key":"summary_photo_image_original","value":{"image_value":{"height":1333,"width":1999,"url":"https://pbs.twimg.com/card_img/1488141088586145794/ieT1uWUQ?format=jpg&name=orig"},"type":"IMAGE"}}],"card_platform":{"platform":{"audience":{"name":"production"},"device":{"name":"Swift","version":"12"}}},"name":"summary_large_image","url":"https://t.co/YFABXRqcox","user_refs":[{"id":"VXNlcjoxMzY2NzY4NjcyOTQ2MjA0Njc2","rest_id":"1366768672946204676","affiliates_highlighted_label":{},"has_nft_avatar":false,"super_follow_eligible":false,"super_followed_by":false,"super_following":false}]}},"legacy":{"created_at":"Tue Feb 01 20:08:01 +0000 2022","conversation_id_str":"1488498439428227074","display_text_range":[12,44],"entities":{"user_mentions":[{"id_str":"16076032","name":"Glenn Greenwald","screen_name":"ggreenwald","indices":[0,11]}],"urls":[{"display_url":"tippinsights.com/i-i-tipp-poll-…","expanded_url":"https://tippinsights.com/i-i-tipp-poll-trust-in-media-collapse-continues-as-2022-begins/","url":"https://t.co/YFABXRqcox","indices":[21,44]}],"hashtags":[],"symbols":[]},"favorite_count":153,"favorited":false,"full_text":"@ggreenwald REJOICE\n\nhttps://t.co/YFABXRqcox","in_reply_to_screen_name":"ggreenwald","in_reply_to_status_id_str":"1488604452865167361","in_reply_to_user_id_str":"16076032","is_quote_status":false,"lang":"en","possibly_sensitive":false,"possibly_sensitive_editable":true,"quote_count":1,"reply_count":1,"retweet_count":14,"retweeted":false,"source":"Twitter Web App","user_id_str":"44067298","id_str":"1488605073588559873"}}}}}}