From e2ff9436fa20b3aadcd4a45d8151850ea194388c Mon Sep 17 00:00:00 2001 From: Alessio Date: Fri, 17 Sep 2021 17:35:55 -0700 Subject: [PATCH] Add player type of URL card --- cmd/tests.sh | 2 +- scraper/api_types.go | 6 ++++++ scraper/api_types_test.go | 4 ++-- scraper/url.go | 35 ++++++++++++++++++++++++--------- scraper/url_test.go | 41 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 76 insertions(+), 12 deletions(-) diff --git a/cmd/tests.sh b/cmd/tests.sh index d449a63..4316f5e 100755 --- a/cmd/tests.sh +++ b/cmd/tests.sh @@ -83,7 +83,7 @@ tw fetch_tweet https://twitter.com/RememberAfghan1/status/1429585423702052867 test $(sqlite3 twitter.db "select handle from tweets join users on tweets.user_id = users.id where tweets.id=1429585423702052867") = "RememberAfghan1" test $(sqlite3 twitter.db "select handle from tweets join users on tweets.user_id = users.id where tweets.id=1429584239570391042") = "michaelmalice" test $(sqlite3 twitter.db "select handle from tweets join users on tweets.user_id = users.id where tweets.id=1429583672827465730") = "kanesays23" -test $(sqlite3 twitter.db "select handle from tweets join users on tweets.user_id = users.id where tweets.id=1429587734054703113") = "NovaValentis" +test $(sqlite3 twitter.db "select handle from tweets join users on tweets.user_id = users.id where tweets.id=1429616911315345414") = "NovaValentis" # Test that the `--profile` flag works diff --git a/scraper/api_types.go b/scraper/api_types.go index fb3afab..1a58706 100644 --- a/scraper/api_types.go +++ b/scraper/api_types.go @@ -33,6 +33,7 @@ type APIExtendedMedia struct { } type APICard struct { + Name string `json:"name"` BindingValues struct { Domain struct { Value string `json:"string_value"` @@ -58,6 +59,11 @@ type APICard struct { Url string `json:"url"` } `json:"image_value"` } `json:"thumbnail_image_large"` + PlayerImage struct { + ImageValue struct { + Url string `json:"url"` + } `json:"image_value"` + } `json:"player_image_large"` } `json:"binding_values"` } diff --git a/scraper/api_types_test.go b/scraper/api_types_test.go index 03e80b2..683fe0c 100644 --- a/scraper/api_types_test.go +++ b/scraper/api_types_test.go @@ -45,10 +45,10 @@ func TestNormalizeContent(t *testing.T) { t.Errorf("Expected quoted status %d, but got %d", v.quoted_status_id, tweet.QuotedStatusID) } if scraper.TweetID(tweet.InReplyToStatusID) != v.in_reply_to { - t.Errorf("Expected quoted status %d, but got %d", v.in_reply_to, tweet.InReplyToStatusID) + t.Errorf("Expected in_reply_to id %d, but got %d", v.in_reply_to, tweet.InReplyToStatusID) } if scraper.TweetID(tweet.RetweetedStatusID) != v.retweeted_status_id { - t.Errorf("Expected quoted status %d, but got %d", v.retweeted_status_id, tweet.RetweetedStatusID) + t.Errorf("Expected retweeted status id %d, but got %d", v.retweeted_status_id, tweet.RetweetedStatusID) } } } diff --git a/scraper/url.go b/scraper/url.go index c543bb0..de9ea07 100644 --- a/scraper/url.go +++ b/scraper/url.go @@ -21,15 +21,32 @@ type Url struct { func ParseAPIUrlCard(apiCard APICard) Url { values := apiCard.BindingValues - return Url{ - Domain: values.Domain.Value, - Title: values.Title.Value, - Description: values.Description.Value, - ThumbnailRemoteUrl: values.Thumbnail.ImageValue.Url, - ThumbnailLocalPath: get_thumbnail_local_path(values.Thumbnail.ImageValue.Url), - CreatorID: UserID(values.Creator.UserValue.Value), - SiteID: UserID(values.Site.UserValue.Value), - IsContentDownloaded: false, + if apiCard.Name == "summary_large_image" || apiCard.Name == "summary" { + return Url{ + Domain: values.Domain.Value, + Title: values.Title.Value, + Description: values.Description.Value, + ThumbnailRemoteUrl: values.Thumbnail.ImageValue.Url, + ThumbnailLocalPath: get_thumbnail_local_path(values.Thumbnail.ImageValue.Url), + CreatorID: UserID(values.Creator.UserValue.Value), + SiteID: UserID(values.Site.UserValue.Value), + HasCard: true, + IsContentDownloaded: false, + } + } else if apiCard.Name == "player" { + return Url{ + Domain: values.Domain.Value, + Title: values.Title.Value, + Description: values.Description.Value, + ThumbnailRemoteUrl: values.PlayerImage.ImageValue.Url, + ThumbnailLocalPath: get_thumbnail_local_path(values.PlayerImage.ImageValue.Url), + CreatorID: UserID(values.Creator.UserValue.Value), + SiteID: UserID(values.Site.UserValue.Value), + HasCard: true, + IsContentDownloaded: false, + } + } else { + panic("Unknown card type: " + apiCard.Name) } } diff --git a/scraper/url_test.go b/scraper/url_test.go index 055dbb1..630ef73 100644 --- a/scraper/url_test.go +++ b/scraper/url_test.go @@ -52,3 +52,44 @@ func TestParseAPIUrlCard(t *testing.T) { t.Errorf("Expected it not to be downloaded, but it was") } } + +func TestParseAPIUrlCardWithPlayer(t *testing.T) { + data, err := ioutil.ReadFile("test_responses/url_card_with_player.json") + if err != nil { + panic(err) + } + var apiCard scraper.APICard + err = json.Unmarshal(data, &apiCard) + if err != nil { + t.Fatal(err.Error()) + } + url := scraper.ParseAPIUrlCard(apiCard) + + expected_domain := "www.youtube.com" + if url.Domain != expected_domain { + t.Errorf("Expected %q, got %q", expected_domain, url.Domain) + } + expected_title := "The Politically Incorrect Guide to the Constitution (Starring Tom..." + if url.Title != expected_title { + t.Errorf("Expected %q, got %q", expected_title, url.Title) + } + expected_description := "Watch this episode on LBRY/Odysee: https://odysee.com/@capitalresearch:5/the-politically-incorrect-guide-to-the:8Watch this episode on Rumble: https://rumble..." + if url.Description != expected_description { + t.Errorf("Expected %q, got %q", expected_description, url.Description) + } + expected_remote_url := "https://pbs.twimg.com/card_img/1437849456423194639/_1t0btyt?format=jpg&name=800x320_1" + if url.ThumbnailRemoteUrl != expected_remote_url { + t.Errorf("Expected %q, got %q", expected_remote_url, url.ThumbnailRemoteUrl) + } + expected_local_filename := "_1t0btyt_800x320_1.jpg" + if url.ThumbnailLocalPath != expected_local_filename { + t.Errorf("Expected %q, got %q", expected_local_filename, url.ThumbnailLocalPath) + } + expected_site_id := scraper.UserID(10228272) + if url.SiteID != expected_site_id { + t.Errorf("Expected %d, got %d", expected_site_id, url.SiteID) + } + if url.IsContentDownloaded { + t.Errorf("Expected it not to be downloaded, but it was") + } +}