diff --git a/scraper/api_types.go b/scraper/api_types.go index 3cff651..2bf9094 100644 --- a/scraper/api_types.go +++ b/scraper/api_types.go @@ -56,13 +56,13 @@ type APITweet struct { func (t *APITweet) NormalizeContent() { // Remove embedded links at the end of the text - if len(t.Entities.URLs) == 1 { + if len(t.Entities.URLs) == 1 { // TODO: should this be `>= 1`, like below? url := t.Entities.URLs[0].URL if strings.Index(t.FullText, url) == len(t.FullText) - len(url) { t.FullText = t.FullText[0:len(t.FullText) - len(url)] // Also strip the newline } } - if len(t.Entities.Media) == 1 { + if len(t.Entities.Media) >= 1 { url := t.Entities.Media[0].URL if strings.Index(t.FullText, url) == len(t.FullText) - len(url) { t.FullText = t.FullText[0:len(t.FullText) - len(url)] // Also strip the trailing space diff --git a/scraper/api_types_test.go b/scraper/api_types_test.go index ba87439..4be68e1 100644 --- a/scraper/api_types_test.go +++ b/scraper/api_types_test.go @@ -17,6 +17,7 @@ func TestNormalizeContent(t *testing.T) { {"test_responses/tweet_with_gif_reply.json", ""}, {"test_responses/tweet_with_image.json", "this saddens me every time"}, {"test_responses/tweet_with_reply.json", "I always liked \"The Anarchist's Cookbook.\""}, + {"test_responses/tweet_with_4_images.json", "These are public health officials who are making decisions about your lifestyle because they know more about health, fitness and well-being than you do"}, } for _, v := range test_cases { data, err := ioutil.ReadFile(v.filename) diff --git a/scraper/test_responses/tweet_with_4_images.json b/scraper/test_responses/tweet_with_4_images.json new file mode 100644 index 0000000..c63f587 --- /dev/null +++ b/scraper/test_responses/tweet_with_4_images.json @@ -0,0 +1 @@ +{"created_at":"Sat May 16 02:27:30 +0000 2020","id_str":"1261483383483293700","full_text":"These are public health officials who are making decisions about your lifestyle because they know more about health, fitness and well-being than you do https://t.co/8nm627raPo","display_text_range":[0,151],"entities":{"media":[{"id_str":"1261483377363791872","indices":[152,175],"media_url":"http://pbs.twimg.com/media/EYGwcrXUMAAiyCf.jpg","media_url_https":"https://pbs.twimg.com/media/EYGwcrXUMAAiyCf.jpg","url":"https://t.co/8nm627raPo","display_url":"pic.twitter.com/8nm627raPo","expanded_url":"https://twitter.com/Denlesks/status/1261483383483293700/photo/1","type":"photo","original_info":{"width":1914,"height":1456,"focus_rects":[{"x":0,"y":0,"h":1072,"w":1914},{"x":276,"y":0,"h":1456,"w":1456},{"x":366,"y":0,"h":1456,"w":1277},{"x":640,"y":0,"h":1456,"w":728},{"x":0,"y":0,"h":1456,"w":1914}]},"sizes":{"large":{"w":1914,"h":1456,"resize":"fit"},"thumb":{"w":150,"h":150,"resize":"crop"},"small":{"w":680,"h":517,"resize":"fit"},"medium":{"w":1200,"h":913,"resize":"fit"}}},{"id_str":"1261483377368039424","indices":[152,175],"media_url":"http://pbs.twimg.com/media/EYGwcrYVAAAFY_U.jpg","media_url_https":"https://pbs.twimg.com/media/EYGwcrYVAAAFY_U.jpg","url":"https://t.co/8nm627raPo","display_url":"pic.twitter.com/8nm627raPo","expanded_url":"https://twitter.com/Denlesks/status/1261483383483293700/photo/1","type":"photo","original_info":{"width":1440,"height":960,"focus_rects":[{"x":0,"y":0,"h":806,"w":1440},{"x":276,"y":0,"h":960,"w":960},{"x":335,"y":0,"h":960,"w":842},{"x":516,"y":0,"h":960,"w":480},{"x":0,"y":0,"h":960,"w":1440}]},"sizes":{"thumb":{"w":150,"h":150,"resize":"crop"},"medium":{"w":1200,"h":800,"resize":"fit"},"large":{"w":1440,"h":960,"resize":"fit"},"small":{"w":680,"h":453,"resize":"fit"}}},{"id_str":"1261483377409970177","indices":[152,175],"media_url":"http://pbs.twimg.com/media/EYGwcriU0AEvGA1.jpg","media_url_https":"https://pbs.twimg.com/media/EYGwcriU0AEvGA1.jpg","url":"https://t.co/8nm627raPo","display_url":"pic.twitter.com/8nm627raPo","expanded_url":"https://twitter.com/Denlesks/status/1261483383483293700/photo/1","type":"photo","original_info":{"width":620,"height":410,"focus_rects":[{"x":0,"y":59,"h":347,"w":620},{"x":119,"y":0,"h":410,"w":410},{"x":144,"y":0,"h":410,"w":360},{"x":222,"y":0,"h":410,"w":205},{"x":0,"y":0,"h":410,"w":620}]},"sizes":{"thumb":{"w":150,"h":150,"resize":"crop"},"large":{"w":620,"h":410,"resize":"fit"},"small":{"w":620,"h":410,"resize":"fit"},"medium":{"w":620,"h":410,"resize":"fit"}}},{"id_str":"1261483377519017984","indices":[152,175],"media_url":"http://pbs.twimg.com/media/EYGwcr8UwAApzgz.jpg","media_url_https":"https://pbs.twimg.com/media/EYGwcr8UwAApzgz.jpg","url":"https://t.co/8nm627raPo","display_url":"pic.twitter.com/8nm627raPo","expanded_url":"https://twitter.com/Denlesks/status/1261483383483293700/photo/1","type":"photo","original_info":{"width":1200,"height":893,"focus_rects":[{"x":0,"y":0,"h":672,"w":1200},{"x":184,"y":0,"h":893,"w":893},{"x":239,"y":0,"h":893,"w":783},{"x":407,"y":0,"h":893,"w":447},{"x":0,"y":0,"h":893,"w":1200}]},"sizes":{"thumb":{"w":150,"h":150,"resize":"crop"},"medium":{"w":1200,"h":893,"resize":"fit"},"large":{"w":1200,"h":893,"resize":"fit"},"small":{"w":680,"h":506,"resize":"fit"}}}]},"extended_entities":{"media":[{"id_str":"1261483377363791872","indices":[152,175],"media_url":"http://pbs.twimg.com/media/EYGwcrXUMAAiyCf.jpg","media_url_https":"https://pbs.twimg.com/media/EYGwcrXUMAAiyCf.jpg","url":"https://t.co/8nm627raPo","display_url":"pic.twitter.com/8nm627raPo","expanded_url":"https://twitter.com/Denlesks/status/1261483383483293700/photo/1","type":"photo","original_info":{"width":1914,"height":1456,"focus_rects":[{"x":0,"y":0,"h":1072,"w":1914},{"x":276,"y":0,"h":1456,"w":1456},{"x":366,"y":0,"h":1456,"w":1277},{"x":640,"y":0,"h":1456,"w":728},{"x":0,"y":0,"h":1456,"w":1914}]},"sizes":{"large":{"w":1914,"h":1456,"resize":"fit"},"thumb":{"w":150,"h":150,"resize":"crop"},"small":{"w":680,"h":517,"resize":"fit"},"medium":{"w":1200,"h":913,"resize":"fit"}},"media_key":"3_1261483377363791872","ext_media_color":{"palette":[{"rgb":{"red":166,"green":159,"blue":142},"percentage":27.88},{"rgb":{"red":46,"green":69,"blue":158},"percentage":18.35},{"rgb":{"red":37,"green":39,"blue":52},"percentage":13.46},{"rgb":{"red":147,"green":112,"blue":48},"percentage":13.0},{"rgb":{"red":109,"green":36,"blue":33},"percentage":8.57}]},"ext_alt_text":null,"ext_media_availability":{"status":"available"},"ext":{"mediaStats":{"r":"Missing","ttl":-1}}},{"id_str":"1261483377368039424","indices":[152,175],"media_url":"http://pbs.twimg.com/media/EYGwcrYVAAAFY_U.jpg","media_url_https":"https://pbs.twimg.com/media/EYGwcrYVAAAFY_U.jpg","url":"https://t.co/8nm627raPo","display_url":"pic.twitter.com/8nm627raPo","expanded_url":"https://twitter.com/Denlesks/status/1261483383483293700/photo/1","type":"photo","original_info":{"width":1440,"height":960,"focus_rects":[{"x":0,"y":0,"h":806,"w":1440},{"x":276,"y":0,"h":960,"w":960},{"x":335,"y":0,"h":960,"w":842},{"x":516,"y":0,"h":960,"w":480},{"x":0,"y":0,"h":960,"w":1440}]},"sizes":{"thumb":{"w":150,"h":150,"resize":"crop"},"medium":{"w":1200,"h":800,"resize":"fit"},"large":{"w":1440,"h":960,"resize":"fit"},"small":{"w":680,"h":453,"resize":"fit"}},"media_key":"3_1261483377368039424","ext_alt_text":null,"ext_media_availability":{"status":"available"},"ext_media_color":{"palette":[{"rgb":{"red":1,"green":10,"blue":32},"percentage":40.35},{"rgb":{"red":1,"green":21,"blue":62},"percentage":21.03},{"rgb":{"red":201,"green":169,"blue":161},"percentage":13.33},{"rgb":{"red":137,"green":45,"blue":24},"percentage":6.31},{"rgb":{"red":43,"green":34,"blue":32},"percentage":5.16}]},"ext":{"mediaStats":{"r":"Missing","ttl":-1}}},{"id_str":"1261483377409970177","indices":[152,175],"media_url":"http://pbs.twimg.com/media/EYGwcriU0AEvGA1.jpg","media_url_https":"https://pbs.twimg.com/media/EYGwcriU0AEvGA1.jpg","url":"https://t.co/8nm627raPo","display_url":"pic.twitter.com/8nm627raPo","expanded_url":"https://twitter.com/Denlesks/status/1261483383483293700/photo/1","type":"photo","original_info":{"width":620,"height":410,"focus_rects":[{"x":0,"y":59,"h":347,"w":620},{"x":119,"y":0,"h":410,"w":410},{"x":144,"y":0,"h":410,"w":360},{"x":222,"y":0,"h":410,"w":205},{"x":0,"y":0,"h":410,"w":620}]},"sizes":{"thumb":{"w":150,"h":150,"resize":"crop"},"large":{"w":620,"h":410,"resize":"fit"},"small":{"w":620,"h":410,"resize":"fit"},"medium":{"w":620,"h":410,"resize":"fit"}},"media_key":"3_1261483377409970177","ext_media_color":{"palette":[{"rgb":{"red":251,"green":251,"blue":251},"percentage":51.31},{"rgb":{"red":204,"green":139,"blue":135},"percentage":16.77},{"rgb":{"red":35,"green":39,"blue":42},"percentage":13.35},{"rgb":{"red":104,"green":62,"blue":56},"percentage":8.48},{"rgb":{"red":229,"green":168,"blue":144},"percentage":1.17}]},"ext_alt_text":null,"ext_media_availability":{"status":"available"},"ext":{"mediaStats":{"r":"Missing","ttl":-1}}},{"id_str":"1261483377519017984","indices":[152,175],"media_url":"http://pbs.twimg.com/media/EYGwcr8UwAApzgz.jpg","media_url_https":"https://pbs.twimg.com/media/EYGwcr8UwAApzgz.jpg","url":"https://t.co/8nm627raPo","display_url":"pic.twitter.com/8nm627raPo","expanded_url":"https://twitter.com/Denlesks/status/1261483383483293700/photo/1","type":"photo","original_info":{"width":1200,"height":893,"focus_rects":[{"x":0,"y":0,"h":672,"w":1200},{"x":184,"y":0,"h":893,"w":893},{"x":239,"y":0,"h":893,"w":783},{"x":407,"y":0,"h":893,"w":447},{"x":0,"y":0,"h":893,"w":1200}]},"sizes":{"thumb":{"w":150,"h":150,"resize":"crop"},"medium":{"w":1200,"h":893,"resize":"fit"},"large":{"w":1200,"h":893,"resize":"fit"},"small":{"w":680,"h":506,"resize":"fit"}},"media_key":"3_1261483377519017984","ext_alt_text":null,"ext_media_availability":{"status":"available"},"ext_media_color":{"palette":[{"rgb":{"red":76,"green":30,"blue":32},"percentage":34.02},{"rgb":{"red":69,"green":73,"blue":120},"percentage":30.49},{"rgb":{"red":205,"green":192,"blue":195},"percentage":11.77},{"rgb":{"red":29,"green":23,"blue":22},"percentage":5.44},{"rgb":{"red":59,"green":72,"blue":201},"percentage":3.77}]},"ext":{"mediaStats":{"r":"Missing","ttl":-1}}}]},"source":"Twitter for iPhone","user_id_str":"2703181339","retweet_count":85,"favorite_count":242,"reply_count":41,"quote_count":20,"conversation_id_str":"1261483383483293700","possibly_sensitive_editable":true,"lang":"en"}