From d0cb857acb09777cc6a73b1af88d55c4ff18505d Mon Sep 17 00:00:00 2001 From: Alessio Date: Mon, 22 Nov 2021 14:52:18 -0800 Subject: [PATCH] Fix embedded link parsing for youtube links with no thumbnails --- ...rl_card_with_player_placeholder_image.json | 1 + scraper/url.go | 3 ++ scraper/url_test.go | 44 +++++++++++++++++++ 3 files changed, 48 insertions(+) create mode 100644 scraper/test_responses/tweet_content/url_card_with_player_placeholder_image.json diff --git a/scraper/test_responses/tweet_content/url_card_with_player_placeholder_image.json b/scraper/test_responses/tweet_content/url_card_with_player_placeholder_image.json new file mode 100644 index 0000000..d226fe6 --- /dev/null +++ b/scraper/test_responses/tweet_content/url_card_with_player_placeholder_image.json @@ -0,0 +1 @@ +{"name":"player","url":"https://t.co/eLeWxR4inp","card_type_url":"http://card-type-url-is-deprecated.invalid","binding_values":{"vanity_url":{"type":"STRING","string_value":"youtube.com","scribe_key":"vanity_url"},"player_url":{"type":"STRING","string_value":"https://www.youtube.com/embed/i6Ffrx0MtTA"},"app_is_free":{"type":"STRING","string_value":"true"},"app_price_currency":{"type":"STRING","string_value":"USD"},"app_price_amount":{"type":"STRING","string_value":"0.0"},"domain":{"type":"STRING","string_value":"www.youtube.com"},"app_num_ratings":{"type":"STRING","string_value":"23,259,600"},"app_star_rating":{"type":"STRING","string_value":"4.68617"},"app_name":{"type":"STRING","string_value":"YouTube: Watch, Listen, Stream"},"player_width":{"type":"STRING","string_value":"1280"},"player_height":{"type":"STRING","string_value":"720"},"site":{"type":"USER","user_value":{"id_str":"10228272","path":[]},"scribe_key":"publisher_id"},"title":{"type":"STRING","string_value":"Did Michael Malice Turn Me into an Anarchist? | Ep 181"},"description":{"type":"STRING","string_value":"SUBSCRIBE TO THE NEW SHOW W/ ELIJAH & SYDNEY: \"YOU ARE HERE\"YT: https://www.youtube.com/youareheredaily______________________________________________________..."},"player_image_small":{"type":"IMAGE","image_value":{"url":"https://pbs.twimg.com/cards/player-placeholder.png","width":351,"height":197}},"player_image":{"type":"IMAGE","image_value":{"url":"https://pbs.twimg.com/cards/player-placeholder.png","width":351,"height":197}},"player_image_large":{"type":"IMAGE","image_value":{"url":"https://pbs.twimg.com/cards/player-placeholder.png","width":351,"height":197}},"player_image_x_large":{"type":"IMAGE","image_value":{"url":"https://pbs.twimg.com/cards/player-placeholder.png","width":351,"height":197}},"card_url":{"type":"STRING","string_value":"https://t.co/eLeWxR4inp","scribe_key":"card_url"}},"users":{"10228272":{"id_str":"10228272","name":"YouTube","screen_name":"YouTube","location":"San Bruno, CA","description":"like and subscribe.","url":"https://t.co/O2OOS6R6Eg","entities":{"url":{"urls":[{"url":"https://t.co/O2OOS6R6Eg","expanded_url":"https://www.youtube.com/c/sustainability/videos","display_url":"youtube.com/c/sustainabili…","indices":[0,23]}]},"description":{}},"followers_count":73714588,"fast_followers_count":0,"normal_followers_count":73714588,"friends_count":1201,"listed_count":79703,"created_at":"Tue Nov 13 21:43:46 +0000 2007","favourites_count":5828,"verified":true,"statuses_count":39056,"media_count":13284,"is_translation_enabled":true,"profile_image_url_https":"https://pbs.twimg.com/profile_images/1427292844612595720/RC1YSvuT_normal.jpg","profile_banner_url":"https://pbs.twimg.com/profile_banners/10228272/1635870163","profile_image_extensions_alt_text":null,"profile_image_extensions_media_availability":null,"profile_image_extensions_media_color":{"palette":[{"rgb":{"red":255,"green":255,"blue":255},"percentage":70.88},{"rgb":{"red":254,"green":0,"blue":0},"percentage":27.69},{"rgb":{"red":252,"green":180,"blue":178},"percentage":0.86},{"rgb":{"red":254,"green":87,"blue":90},"percentage":0.37},{"rgb":{"red":250,"green":132,"blue":130},"percentage":0.15}]},"profile_image_extensions":{"mediaStats":{"r":{"missing":null},"ttl":-1}},"profile_banner_extensions_alt_text":null,"profile_banner_extensions_media_availability":null,"profile_banner_extensions_media_color":{"palette":[{"rgb":{"red":212,"green":206,"blue":219},"percentage":33.47},{"rgb":{"red":153,"green":179,"blue":230},"percentage":8.85},{"rgb":{"red":207,"green":182,"blue":141},"percentage":5.41},{"rgb":{"red":221,"green":166,"blue":185},"percentage":4.36},{"rgb":{"red":163,"green":201,"blue":188},"percentage":3.42}]},"profile_banner_extensions":{"mediaStats":{"r":{"missing":null},"ttl":-1}},"profile_link_color":"C9191D","pinned_tweet_ids":[],"pinned_tweet_ids_str":[],"has_custom_timelines":true,"advertiser_account_type":"promotable_user","advertiser_account_service_levels":["dso","dso","dso","dso","dso","dso","dso","dso","dso"],"profile_interstitial_type":"","business_profile_state":"none","translator_type":"regular","withheld_in_countries":[],"ext":{"highlightedLabel":{"r":{"ok":{}},"ttl":-1}}}},"card_platform":{"platform":{"device":{"name":"Swift","version":"12"},"audience":{"name":"production"}}}} diff --git a/scraper/url.go b/scraper/url.go index 49e543b..3a8eb3b 100644 --- a/scraper/url.go +++ b/scraper/url.go @@ -63,6 +63,9 @@ func get_thumbnail_local_path(remote_url string) string { if err != nil { panic(err) } + if u.RawQuery == "" { + return path.Base(u.Path) + } query_params, err := url.ParseQuery(u.RawQuery) if err != nil { panic(err) diff --git a/scraper/url_test.go b/scraper/url_test.go index 5ea9b08..48971c4 100644 --- a/scraper/url_test.go +++ b/scraper/url_test.go @@ -108,6 +108,50 @@ func TestParseAPIUrlCardWithPlayer(t *testing.T) { } } +func TestParseAPIUrlCardWithPlayerAndPlaceholderThumbnail(t *testing.T) { + data, err := ioutil.ReadFile("test_responses/tweet_content/url_card_with_player_placeholder_image.json") + if err != nil { + panic(err) + } + var apiCard scraper.APICard + err = json.Unmarshal(data, &apiCard) + if err != nil { + t.Fatal(err.Error()) + } + url := scraper.ParseAPIUrlCard(apiCard) + + expected_domain := "www.youtube.com" + if url.Domain != expected_domain { + t.Errorf("Expected %q, got %q", expected_domain, url.Domain) + } + expected_title := "Did Michael Malice Turn Me into an Anarchist? | Ep 181" + if url.Title != expected_title { + t.Errorf("Expected %q, got %q", expected_title, url.Title) + } + expected_description := "SUBSCRIBE TO THE NEW SHOW W/ ELIJAH & SYDNEY: \"YOU ARE HERE\"YT: https://www.youtube.com/youareheredaily______________________________________________________..." + if url.Description != expected_description { + t.Errorf("Expected %q, got %q", expected_description, url.Description) + } + expected_remote_url := "https://pbs.twimg.com/cards/player-placeholder.png" + if url.ThumbnailRemoteUrl != expected_remote_url { + t.Errorf("Expected %q, got %q", expected_remote_url, url.ThumbnailRemoteUrl) + } + expected_local_filename := "player-placeholder.png" + if url.ThumbnailLocalPath != expected_local_filename { + t.Errorf("Expected %q, got %q", expected_local_filename, url.ThumbnailLocalPath) + } + expected_site_id := scraper.UserID(10228272) + if url.SiteID != expected_site_id { + t.Errorf("Expected %d, got %d", expected_site_id, url.SiteID) + } + if !url.HasThumbnail { + t.Errorf("Should have a thumbnail, but it doesn't") + } + if url.IsContentDownloaded { + t.Errorf("Expected it not to be downloaded, but it was") + } +} + func TestParseAPIUrlCardWithoutThumbnail(t *testing.T) { data, err := ioutil.ReadFile("test_responses/tweet_content/url_card_without_thumbnail.json") if err != nil {