From 4fa30e246d876a6598d2c49c88e86c0a15d3fbe9 Mon Sep 17 00:00:00 2001 From: Alessio Date: Sun, 10 Oct 2021 16:06:47 -0700 Subject: [PATCH] Add dimension info to media content --- persistence/media_queries.go | 30 +++++++++++++++--------------- persistence/schema.sql | 6 ++++++ persistence/utils_test.go | 6 ++++++ scraper/api_types.go | 10 ++++++++++ scraper/image.go | 4 ++++ scraper/image_test.go | 8 ++++++++ scraper/url.go | 4 ++++ scraper/url_test.go | 8 ++++++++ scraper/video.go | 4 ++++ scraper/video_test.go | 8 ++++++++ 10 files changed, 73 insertions(+), 15 deletions(-) diff --git a/persistence/media_queries.go b/persistence/media_queries.go index 42ef949..7947ee7 100644 --- a/persistence/media_queries.go +++ b/persistence/media_queries.go @@ -12,12 +12,12 @@ import ( */ func (p Profile) SaveImage(img scraper.Image) error { _, err := p.DB.Exec(` - insert into images (id, tweet_id, remote_url, local_filename, is_downloaded) - values (?, ?, ?, ?, ?) + insert into images (id, tweet_id, width, height, remote_url, local_filename, is_downloaded) + values (?, ?, ?, ?, ?, ?, ?) on conflict do update set is_downloaded=(is_downloaded or ?) `, - img.ID, img.TweetID, img.RemoteURL, img.LocalFilename, img.IsDownloaded, + img.ID, img.TweetID, img.Width, img.Height, img.RemoteURL, img.LocalFilename, img.IsDownloaded, img.IsDownloaded, ) return err @@ -31,12 +31,12 @@ func (p Profile) SaveImage(img scraper.Image) error { */ func (p Profile) SaveVideo(vid scraper.Video) error { _, err := p.DB.Exec(` - insert into videos (id, tweet_id, remote_url, local_filename, is_downloaded, is_gif) - values (?, ?, ?, ?, ?, ?) + insert into videos (id, tweet_id, width, height, remote_url, local_filename, is_downloaded, is_gif) + values (?, ?, ?, ?, ?, ?, ?, ?) on conflict do update set is_downloaded=(is_downloaded or ?) `, - vid.ID, vid.TweetID, vid.RemoteURL, vid.LocalFilename, vid.IsDownloaded, vid.IsGif, + vid.ID, vid.TweetID, vid.Width, vid.Height, vid.RemoteURL, vid.LocalFilename, vid.IsDownloaded, vid.IsGif, vid.IsDownloaded, ) return err @@ -47,12 +47,12 @@ func (p Profile) SaveVideo(vid scraper.Video) error { */ func (p Profile) SaveUrl(url scraper.Url) error { _, err := p.DB.Exec(` - insert into urls (tweet_id, domain, text, title, description, creator_id, site_id, thumbnail_remote_url, thumbnail_local_path, has_card, has_thumbnail, is_content_downloaded) - values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + insert into urls (tweet_id, domain, text, title, description, creator_id, site_id, thumbnail_width, thumbnail_height, thumbnail_remote_url, thumbnail_local_path, has_card, has_thumbnail, is_content_downloaded) + values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) on conflict do update set is_content_downloaded=(is_content_downloaded or ?) `, - url.TweetID, url.Domain, url.Text, url.Title, url.Description, url.CreatorID, url.SiteID, url.ThumbnailRemoteUrl, url.ThumbnailLocalPath, url.HasCard, url.HasThumbnail, url.IsContentDownloaded, + url.TweetID, url.Domain, url.Text, url.Title, url.Description, url.CreatorID, url.SiteID, url.ThumbnailWidth, url.ThumbnailHeight, url.ThumbnailRemoteUrl, url.ThumbnailLocalPath, url.HasCard, url.HasThumbnail, url.IsContentDownloaded, url.IsContentDownloaded, ) return err @@ -62,7 +62,7 @@ func (p Profile) SaveUrl(url scraper.Url) error { * Get the list of images for a tweet */ func (p Profile) GetImagesForTweet(t scraper.Tweet) (imgs []scraper.Image, err error) { - stmt, err := p.DB.Prepare("select id, remote_url, local_filename, is_downloaded from images where tweet_id=?") + stmt, err := p.DB.Prepare("select id, width, height, remote_url, local_filename, is_downloaded from images where tweet_id=?") if err != nil { return } @@ -74,7 +74,7 @@ func (p Profile) GetImagesForTweet(t scraper.Tweet) (imgs []scraper.Image, err e var img scraper.Image for rows.Next() { - err = rows.Scan(&img.ID, &img.RemoteURL, &img.LocalFilename, &img.IsDownloaded) + err = rows.Scan(&img.ID, &img.Width, &img.Height, &img.RemoteURL, &img.LocalFilename, &img.IsDownloaded) if err != nil { return } @@ -89,7 +89,7 @@ func (p Profile) GetImagesForTweet(t scraper.Tweet) (imgs []scraper.Image, err e * Get the list of videos for a tweet */ func (p Profile) GetVideosForTweet(t scraper.Tweet) (vids []scraper.Video, err error) { - stmt, err := p.DB.Prepare("select id, remote_url, local_filename, is_downloaded, is_gif from videos where tweet_id=?") + stmt, err := p.DB.Prepare("select id, width, height, remote_url, local_filename, is_downloaded, is_gif from videos where tweet_id=?") if err != nil { return } @@ -100,7 +100,7 @@ func (p Profile) GetVideosForTweet(t scraper.Tweet) (vids []scraper.Video, err e } var vid scraper.Video for rows.Next() { - err = rows.Scan(&vid.ID, &vid.RemoteURL, &vid.LocalFilename, &vid.IsDownloaded, &vid.IsGif) + err = rows.Scan(&vid.ID, &vid.Width, &vid.Height, &vid.RemoteURL, &vid.LocalFilename, &vid.IsDownloaded, &vid.IsGif) if err != nil { return } @@ -114,7 +114,7 @@ func (p Profile) GetVideosForTweet(t scraper.Tweet) (vids []scraper.Video, err e * Get the list of Urls for a Tweet */ func (p Profile) GetUrlsForTweet(t scraper.Tweet) (urls []scraper.Url, err error) { - stmt, err := p.DB.Prepare("select domain, text, title, description, creator_id, site_id, thumbnail_remote_url, thumbnail_local_path, has_card, has_thumbnail, is_content_downloaded from urls where tweet_id=? order by rowid") + stmt, err := p.DB.Prepare("select domain, text, title, description, creator_id, site_id, thumbnail_width, thumbnail_height, thumbnail_remote_url, thumbnail_local_path, has_card, has_thumbnail, is_content_downloaded from urls where tweet_id=? order by rowid") if err != nil { return } @@ -125,7 +125,7 @@ func (p Profile) GetUrlsForTweet(t scraper.Tweet) (urls []scraper.Url, err error } var url scraper.Url for rows.Next() { - err = rows.Scan(&url.Domain, &url.Text, &url.Title, &url.Description, &url.CreatorID, &url.SiteID, &url.ThumbnailRemoteUrl, &url.ThumbnailLocalPath, &url.HasCard, &url.HasThumbnail, &url.IsContentDownloaded) + err = rows.Scan(&url.Domain, &url.Text, &url.Title, &url.Description, &url.CreatorID, &url.SiteID, &url.ThumbnailWidth, &url.ThumbnailHeight, &url.ThumbnailRemoteUrl, &url.ThumbnailLocalPath, &url.HasCard, &url.HasThumbnail, &url.IsContentDownloaded) if err != nil { return } diff --git a/persistence/schema.sql b/persistence/schema.sql index 67acd99..13592ce 100644 --- a/persistence/schema.sql +++ b/persistence/schema.sql @@ -61,6 +61,8 @@ create table urls (rowid integer primary key, description text, creator_id integer, site_id integer, + thumbnail_width integer not null, + thumbnail_height integer not null, thumbnail_remote_url text, thumbnail_local_path text, has_card boolean, @@ -76,6 +78,8 @@ create table urls (rowid integer primary key, create table images (rowid integer primary key, id integer unique not null check(typeof(id) = 'integer'), tweet_id integer not null, + width integer not null, + height integer not null, remote_url text not null unique, local_filename text not null unique, is_downloaded boolean default 0, @@ -86,6 +90,8 @@ create table images (rowid integer primary key, create table videos (rowid integer primary key, id integer unique not null check(typeof(id) = 'integer'), tweet_id integer not null, + width integer not null, + height integer not null, remote_url text not null unique, local_filename text not null unique, is_gif boolean default 0, diff --git a/persistence/utils_test.go b/persistence/utils_test.go index 1aebf4c..02bce59 100644 --- a/persistence/utils_test.go +++ b/persistence/utils_test.go @@ -72,6 +72,8 @@ func create_image_from_id(id int) scraper.Image { return scraper.Image{ ID: scraper.ImageID(id), TweetID: -1, + Width: id * 10, + Height: id * 5, RemoteURL: filename, LocalFilename: filename, IsDownloaded: false, @@ -86,6 +88,8 @@ func create_video_from_id(id int) scraper.Video { return scraper.Video{ ID: scraper.VideoID(id), TweetID: -1, + Width: id * 10, + Height: id * 5, RemoteURL: filename, LocalFilename: filename, IsDownloaded: false, @@ -104,6 +108,8 @@ func create_url_from_id(id int) scraper.Url { Text: s + "text", Title: s + "title", Description: s + "description", + ThumbnailWidth: id * 23, + ThumbnailHeight: id * 7, ThumbnailRemoteUrl: s + "remote url", ThumbnailLocalPath: s + "local path", CreatorID: scraper.UserID(id), diff --git a/scraper/api_types.go b/scraper/api_types.go index 57cb80b..dda8726 100644 --- a/scraper/api_types.go +++ b/scraper/api_types.go @@ -13,6 +13,10 @@ type APIMedia struct { MediaURLHttps string `json:"media_url_https"` Type string `json:"type"` URL string `json:"url"` + OriginalInfo struct { + Width int `json:"width"` + Height int `json:"height"` + } `json:"original_info"` } type SortableVariants []struct { @@ -30,6 +34,10 @@ type APIExtendedMedia struct { VideoInfo struct { Variants SortableVariants `json:"variants"` } `json:"video_info"` + OriginalInfo struct { + Width int `json:"width"` + Height int `json:"height"` + } `json:"original_info"` } type APICard struct { @@ -58,6 +66,8 @@ type APICard struct { Thumbnail struct { ImageValue struct { Url string `json:"url"` + Width int `json:"width"` + Height int `json:"height"` } `json:"image_value"` } `json:"thumbnail_image_large"` PlayerImage struct { diff --git a/scraper/image.go b/scraper/image.go index 4b67ac8..c7ed727 100644 --- a/scraper/image.go +++ b/scraper/image.go @@ -9,6 +9,8 @@ type ImageID int64 type Image struct { ID ImageID TweetID TweetID + Width int + Height int RemoteURL string LocalFilename string IsDownloaded bool @@ -19,6 +21,8 @@ func ParseAPIMedia(apiMedia APIMedia) Image { return Image{ ID: ImageID(apiMedia.ID), RemoteURL: apiMedia.MediaURLHttps, + Width: apiMedia.OriginalInfo.Width, + Height: apiMedia.OriginalInfo.Height, LocalFilename: local_filename, IsDownloaded: false, } diff --git a/scraper/image_test.go b/scraper/image_test.go index e9015df..216ccdb 100644 --- a/scraper/image_test.go +++ b/scraper/image_test.go @@ -28,6 +28,14 @@ func TestParseAPIMedia(t *testing.T) { if image.RemoteURL != expected_remote_url { t.Errorf("Expected %q, got %q", expected_remote_url, image.RemoteURL) } + expected_width := 593 + if image.Width != expected_width { + t.Errorf("Expected width %d, got %d", expected_width, image.Width) + } + expected_height := 239 + if image.Height != expected_height { + t.Errorf("Expected height %d, got %d", expected_height, image.Height) + } expected_local_filename := "E18sEUrWYAk8dBl.jpg" if image.LocalFilename != expected_local_filename { t.Errorf("Expected %q, got %q", expected_local_filename, image.LocalFilename) diff --git a/scraper/url.go b/scraper/url.go index 733cce3..49e543b 100644 --- a/scraper/url.go +++ b/scraper/url.go @@ -13,6 +13,8 @@ type Url struct { Text string Title string Description string + ThumbnailWidth int + ThumbnailHeight int ThumbnailRemoteUrl string ThumbnailLocalPath string CreatorID UserID @@ -49,6 +51,8 @@ func ParseAPIUrlCard(apiCard APICard) Url { ret.HasThumbnail = true ret.ThumbnailRemoteUrl = thumbnail_url ret.ThumbnailLocalPath = get_thumbnail_local_path(thumbnail_url) + ret.ThumbnailWidth = values.Thumbnail.ImageValue.Width + ret.ThumbnailHeight = values.Thumbnail.ImageValue.Height } return ret diff --git a/scraper/url_test.go b/scraper/url_test.go index 5f49596..5ea9b08 100644 --- a/scraper/url_test.go +++ b/scraper/url_test.go @@ -32,6 +32,14 @@ func TestParseAPIUrlCard(t *testing.T) { if url.Description != expected_description { t.Errorf("Expected %q, got %q", expected_description, url.Description) } + expected_width := 600 + if url.ThumbnailWidth != expected_width { + t.Errorf("Expected thumbnail width %d, got %d", expected_width, url.ThumbnailWidth) + } + expected_height := 315 + if url.ThumbnailHeight != expected_height { + t.Errorf("Expected thumbnail height %d, got %d", expected_height, url.ThumbnailHeight) + } expected_remote_url := "https://pbs.twimg.com/card_img/1434998862305968129/odDi9EqO?format=jpg&name=600x600" if url.ThumbnailRemoteUrl != expected_remote_url { t.Errorf("Expected %q, got %q", expected_remote_url, url.ThumbnailRemoteUrl) diff --git a/scraper/video.go b/scraper/video.go index f198688..6fa379f 100644 --- a/scraper/video.go +++ b/scraper/video.go @@ -13,6 +13,8 @@ type VideoID int64 type Video struct { ID VideoID TweetID TweetID + Width int + Height int RemoteURL string LocalFilename string IsDownloaded bool @@ -28,6 +30,8 @@ func ParseAPIVideo(apiVideo APIExtendedMedia, tweet_id TweetID) Video { return Video{ ID: VideoID(apiVideo.ID), TweetID: tweet_id, + Width: apiVideo.OriginalInfo.Width, + Height: apiVideo.OriginalInfo.Height, RemoteURL: variants[0].URL, LocalFilename: local_filename, IsDownloaded: false, diff --git a/scraper/video_test.go b/scraper/video_test.go index 7716fab..78fc724 100644 --- a/scraper/video_test.go +++ b/scraper/video_test.go @@ -32,6 +32,14 @@ func TestParseAPIVideo(t *testing.T) { if video.RemoteURL != expected_remote_url { t.Errorf("Expected %q, got %q", expected_remote_url, video.RemoteURL) } + expected_height := 1280 + if video.Height != expected_height { + t.Errorf("Expected height %d, got %d", expected_height, video.Height) + } + expected_width := 720 + if video.Width != expected_width { + t.Errorf("Expected width %d, got %d", expected_width, video.Width) + } expected_local_filename := "28.mp4" if video.LocalFilename != expected_local_filename {