Add dimension info to media content

This commit is contained in:
Alessio 2021-10-10 16:06:47 -07:00
parent dd22b25831
commit 4fa30e246d
10 changed files with 73 additions and 15 deletions

View File

@ -12,12 +12,12 @@ import (
*/ */
func (p Profile) SaveImage(img scraper.Image) error { func (p Profile) SaveImage(img scraper.Image) error {
_, err := p.DB.Exec(` _, err := p.DB.Exec(`
insert into images (id, tweet_id, remote_url, local_filename, is_downloaded) insert into images (id, tweet_id, width, height, remote_url, local_filename, is_downloaded)
values (?, ?, ?, ?, ?) values (?, ?, ?, ?, ?, ?, ?)
on conflict do update on conflict do update
set is_downloaded=(is_downloaded or ?) set is_downloaded=(is_downloaded or ?)
`, `,
img.ID, img.TweetID, img.RemoteURL, img.LocalFilename, img.IsDownloaded, img.ID, img.TweetID, img.Width, img.Height, img.RemoteURL, img.LocalFilename, img.IsDownloaded,
img.IsDownloaded, img.IsDownloaded,
) )
return err return err
@ -31,12 +31,12 @@ func (p Profile) SaveImage(img scraper.Image) error {
*/ */
func (p Profile) SaveVideo(vid scraper.Video) error { func (p Profile) SaveVideo(vid scraper.Video) error {
_, err := p.DB.Exec(` _, err := p.DB.Exec(`
insert into videos (id, tweet_id, remote_url, local_filename, is_downloaded, is_gif) insert into videos (id, tweet_id, width, height, remote_url, local_filename, is_downloaded, is_gif)
values (?, ?, ?, ?, ?, ?) values (?, ?, ?, ?, ?, ?, ?, ?)
on conflict do update on conflict do update
set is_downloaded=(is_downloaded or ?) set is_downloaded=(is_downloaded or ?)
`, `,
vid.ID, vid.TweetID, vid.RemoteURL, vid.LocalFilename, vid.IsDownloaded, vid.IsGif, vid.ID, vid.TweetID, vid.Width, vid.Height, vid.RemoteURL, vid.LocalFilename, vid.IsDownloaded, vid.IsGif,
vid.IsDownloaded, vid.IsDownloaded,
) )
return err return err
@ -47,12 +47,12 @@ func (p Profile) SaveVideo(vid scraper.Video) error {
*/ */
func (p Profile) SaveUrl(url scraper.Url) error { func (p Profile) SaveUrl(url scraper.Url) error {
_, err := p.DB.Exec(` _, err := p.DB.Exec(`
insert into urls (tweet_id, domain, text, title, description, creator_id, site_id, thumbnail_remote_url, thumbnail_local_path, has_card, has_thumbnail, is_content_downloaded) insert into urls (tweet_id, domain, text, title, description, creator_id, site_id, thumbnail_width, thumbnail_height, thumbnail_remote_url, thumbnail_local_path, has_card, has_thumbnail, is_content_downloaded)
values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
on conflict do update on conflict do update
set is_content_downloaded=(is_content_downloaded or ?) set is_content_downloaded=(is_content_downloaded or ?)
`, `,
url.TweetID, url.Domain, url.Text, url.Title, url.Description, url.CreatorID, url.SiteID, url.ThumbnailRemoteUrl, url.ThumbnailLocalPath, url.HasCard, url.HasThumbnail, url.IsContentDownloaded, url.TweetID, url.Domain, url.Text, url.Title, url.Description, url.CreatorID, url.SiteID, url.ThumbnailWidth, url.ThumbnailHeight, url.ThumbnailRemoteUrl, url.ThumbnailLocalPath, url.HasCard, url.HasThumbnail, url.IsContentDownloaded,
url.IsContentDownloaded, url.IsContentDownloaded,
) )
return err return err
@ -62,7 +62,7 @@ func (p Profile) SaveUrl(url scraper.Url) error {
* Get the list of images for a tweet * Get the list of images for a tweet
*/ */
func (p Profile) GetImagesForTweet(t scraper.Tweet) (imgs []scraper.Image, err error) { func (p Profile) GetImagesForTweet(t scraper.Tweet) (imgs []scraper.Image, err error) {
stmt, err := p.DB.Prepare("select id, remote_url, local_filename, is_downloaded from images where tweet_id=?") stmt, err := p.DB.Prepare("select id, width, height, remote_url, local_filename, is_downloaded from images where tweet_id=?")
if err != nil { if err != nil {
return return
} }
@ -74,7 +74,7 @@ func (p Profile) GetImagesForTweet(t scraper.Tweet) (imgs []scraper.Image, err e
var img scraper.Image var img scraper.Image
for rows.Next() { for rows.Next() {
err = rows.Scan(&img.ID, &img.RemoteURL, &img.LocalFilename, &img.IsDownloaded) err = rows.Scan(&img.ID, &img.Width, &img.Height, &img.RemoteURL, &img.LocalFilename, &img.IsDownloaded)
if err != nil { if err != nil {
return return
} }
@ -89,7 +89,7 @@ func (p Profile) GetImagesForTweet(t scraper.Tweet) (imgs []scraper.Image, err e
* Get the list of videos for a tweet * Get the list of videos for a tweet
*/ */
func (p Profile) GetVideosForTweet(t scraper.Tweet) (vids []scraper.Video, err error) { func (p Profile) GetVideosForTweet(t scraper.Tweet) (vids []scraper.Video, err error) {
stmt, err := p.DB.Prepare("select id, remote_url, local_filename, is_downloaded, is_gif from videos where tweet_id=?") stmt, err := p.DB.Prepare("select id, width, height, remote_url, local_filename, is_downloaded, is_gif from videos where tweet_id=?")
if err != nil { if err != nil {
return return
} }
@ -100,7 +100,7 @@ func (p Profile) GetVideosForTweet(t scraper.Tweet) (vids []scraper.Video, err e
} }
var vid scraper.Video var vid scraper.Video
for rows.Next() { for rows.Next() {
err = rows.Scan(&vid.ID, &vid.RemoteURL, &vid.LocalFilename, &vid.IsDownloaded, &vid.IsGif) err = rows.Scan(&vid.ID, &vid.Width, &vid.Height, &vid.RemoteURL, &vid.LocalFilename, &vid.IsDownloaded, &vid.IsGif)
if err != nil { if err != nil {
return return
} }
@ -114,7 +114,7 @@ func (p Profile) GetVideosForTweet(t scraper.Tweet) (vids []scraper.Video, err e
* Get the list of Urls for a Tweet * Get the list of Urls for a Tweet
*/ */
func (p Profile) GetUrlsForTweet(t scraper.Tweet) (urls []scraper.Url, err error) { func (p Profile) GetUrlsForTweet(t scraper.Tweet) (urls []scraper.Url, err error) {
stmt, err := p.DB.Prepare("select domain, text, title, description, creator_id, site_id, thumbnail_remote_url, thumbnail_local_path, has_card, has_thumbnail, is_content_downloaded from urls where tweet_id=? order by rowid") stmt, err := p.DB.Prepare("select domain, text, title, description, creator_id, site_id, thumbnail_width, thumbnail_height, thumbnail_remote_url, thumbnail_local_path, has_card, has_thumbnail, is_content_downloaded from urls where tweet_id=? order by rowid")
if err != nil { if err != nil {
return return
} }
@ -125,7 +125,7 @@ func (p Profile) GetUrlsForTweet(t scraper.Tweet) (urls []scraper.Url, err error
} }
var url scraper.Url var url scraper.Url
for rows.Next() { for rows.Next() {
err = rows.Scan(&url.Domain, &url.Text, &url.Title, &url.Description, &url.CreatorID, &url.SiteID, &url.ThumbnailRemoteUrl, &url.ThumbnailLocalPath, &url.HasCard, &url.HasThumbnail, &url.IsContentDownloaded) err = rows.Scan(&url.Domain, &url.Text, &url.Title, &url.Description, &url.CreatorID, &url.SiteID, &url.ThumbnailWidth, &url.ThumbnailHeight, &url.ThumbnailRemoteUrl, &url.ThumbnailLocalPath, &url.HasCard, &url.HasThumbnail, &url.IsContentDownloaded)
if err != nil { if err != nil {
return return
} }

View File

@ -61,6 +61,8 @@ create table urls (rowid integer primary key,
description text, description text,
creator_id integer, creator_id integer,
site_id integer, site_id integer,
thumbnail_width integer not null,
thumbnail_height integer not null,
thumbnail_remote_url text, thumbnail_remote_url text,
thumbnail_local_path text, thumbnail_local_path text,
has_card boolean, has_card boolean,
@ -76,6 +78,8 @@ create table urls (rowid integer primary key,
create table images (rowid integer primary key, create table images (rowid integer primary key,
id integer unique not null check(typeof(id) = 'integer'), id integer unique not null check(typeof(id) = 'integer'),
tweet_id integer not null, tweet_id integer not null,
width integer not null,
height integer not null,
remote_url text not null unique, remote_url text not null unique,
local_filename text not null unique, local_filename text not null unique,
is_downloaded boolean default 0, is_downloaded boolean default 0,
@ -86,6 +90,8 @@ create table images (rowid integer primary key,
create table videos (rowid integer primary key, create table videos (rowid integer primary key,
id integer unique not null check(typeof(id) = 'integer'), id integer unique not null check(typeof(id) = 'integer'),
tweet_id integer not null, tweet_id integer not null,
width integer not null,
height integer not null,
remote_url text not null unique, remote_url text not null unique,
local_filename text not null unique, local_filename text not null unique,
is_gif boolean default 0, is_gif boolean default 0,

View File

@ -72,6 +72,8 @@ func create_image_from_id(id int) scraper.Image {
return scraper.Image{ return scraper.Image{
ID: scraper.ImageID(id), ID: scraper.ImageID(id),
TweetID: -1, TweetID: -1,
Width: id * 10,
Height: id * 5,
RemoteURL: filename, RemoteURL: filename,
LocalFilename: filename, LocalFilename: filename,
IsDownloaded: false, IsDownloaded: false,
@ -86,6 +88,8 @@ func create_video_from_id(id int) scraper.Video {
return scraper.Video{ return scraper.Video{
ID: scraper.VideoID(id), ID: scraper.VideoID(id),
TweetID: -1, TweetID: -1,
Width: id * 10,
Height: id * 5,
RemoteURL: filename, RemoteURL: filename,
LocalFilename: filename, LocalFilename: filename,
IsDownloaded: false, IsDownloaded: false,
@ -104,6 +108,8 @@ func create_url_from_id(id int) scraper.Url {
Text: s + "text", Text: s + "text",
Title: s + "title", Title: s + "title",
Description: s + "description", Description: s + "description",
ThumbnailWidth: id * 23,
ThumbnailHeight: id * 7,
ThumbnailRemoteUrl: s + "remote url", ThumbnailRemoteUrl: s + "remote url",
ThumbnailLocalPath: s + "local path", ThumbnailLocalPath: s + "local path",
CreatorID: scraper.UserID(id), CreatorID: scraper.UserID(id),

View File

@ -13,6 +13,10 @@ type APIMedia struct {
MediaURLHttps string `json:"media_url_https"` MediaURLHttps string `json:"media_url_https"`
Type string `json:"type"` Type string `json:"type"`
URL string `json:"url"` URL string `json:"url"`
OriginalInfo struct {
Width int `json:"width"`
Height int `json:"height"`
} `json:"original_info"`
} }
type SortableVariants []struct { type SortableVariants []struct {
@ -30,6 +34,10 @@ type APIExtendedMedia struct {
VideoInfo struct { VideoInfo struct {
Variants SortableVariants `json:"variants"` Variants SortableVariants `json:"variants"`
} `json:"video_info"` } `json:"video_info"`
OriginalInfo struct {
Width int `json:"width"`
Height int `json:"height"`
} `json:"original_info"`
} }
type APICard struct { type APICard struct {
@ -58,6 +66,8 @@ type APICard struct {
Thumbnail struct { Thumbnail struct {
ImageValue struct { ImageValue struct {
Url string `json:"url"` Url string `json:"url"`
Width int `json:"width"`
Height int `json:"height"`
} `json:"image_value"` } `json:"image_value"`
} `json:"thumbnail_image_large"` } `json:"thumbnail_image_large"`
PlayerImage struct { PlayerImage struct {

View File

@ -9,6 +9,8 @@ type ImageID int64
type Image struct { type Image struct {
ID ImageID ID ImageID
TweetID TweetID TweetID TweetID
Width int
Height int
RemoteURL string RemoteURL string
LocalFilename string LocalFilename string
IsDownloaded bool IsDownloaded bool
@ -19,6 +21,8 @@ func ParseAPIMedia(apiMedia APIMedia) Image {
return Image{ return Image{
ID: ImageID(apiMedia.ID), ID: ImageID(apiMedia.ID),
RemoteURL: apiMedia.MediaURLHttps, RemoteURL: apiMedia.MediaURLHttps,
Width: apiMedia.OriginalInfo.Width,
Height: apiMedia.OriginalInfo.Height,
LocalFilename: local_filename, LocalFilename: local_filename,
IsDownloaded: false, IsDownloaded: false,
} }

View File

@ -28,6 +28,14 @@ func TestParseAPIMedia(t *testing.T) {
if image.RemoteURL != expected_remote_url { if image.RemoteURL != expected_remote_url {
t.Errorf("Expected %q, got %q", expected_remote_url, image.RemoteURL) t.Errorf("Expected %q, got %q", expected_remote_url, image.RemoteURL)
} }
expected_width := 593
if image.Width != expected_width {
t.Errorf("Expected width %d, got %d", expected_width, image.Width)
}
expected_height := 239
if image.Height != expected_height {
t.Errorf("Expected height %d, got %d", expected_height, image.Height)
}
expected_local_filename := "E18sEUrWYAk8dBl.jpg" expected_local_filename := "E18sEUrWYAk8dBl.jpg"
if image.LocalFilename != expected_local_filename { if image.LocalFilename != expected_local_filename {
t.Errorf("Expected %q, got %q", expected_local_filename, image.LocalFilename) t.Errorf("Expected %q, got %q", expected_local_filename, image.LocalFilename)

View File

@ -13,6 +13,8 @@ type Url struct {
Text string Text string
Title string Title string
Description string Description string
ThumbnailWidth int
ThumbnailHeight int
ThumbnailRemoteUrl string ThumbnailRemoteUrl string
ThumbnailLocalPath string ThumbnailLocalPath string
CreatorID UserID CreatorID UserID
@ -49,6 +51,8 @@ func ParseAPIUrlCard(apiCard APICard) Url {
ret.HasThumbnail = true ret.HasThumbnail = true
ret.ThumbnailRemoteUrl = thumbnail_url ret.ThumbnailRemoteUrl = thumbnail_url
ret.ThumbnailLocalPath = get_thumbnail_local_path(thumbnail_url) ret.ThumbnailLocalPath = get_thumbnail_local_path(thumbnail_url)
ret.ThumbnailWidth = values.Thumbnail.ImageValue.Width
ret.ThumbnailHeight = values.Thumbnail.ImageValue.Height
} }
return ret return ret

View File

@ -32,6 +32,14 @@ func TestParseAPIUrlCard(t *testing.T) {
if url.Description != expected_description { if url.Description != expected_description {
t.Errorf("Expected %q, got %q", expected_description, url.Description) t.Errorf("Expected %q, got %q", expected_description, url.Description)
} }
expected_width := 600
if url.ThumbnailWidth != expected_width {
t.Errorf("Expected thumbnail width %d, got %d", expected_width, url.ThumbnailWidth)
}
expected_height := 315
if url.ThumbnailHeight != expected_height {
t.Errorf("Expected thumbnail height %d, got %d", expected_height, url.ThumbnailHeight)
}
expected_remote_url := "https://pbs.twimg.com/card_img/1434998862305968129/odDi9EqO?format=jpg&name=600x600" expected_remote_url := "https://pbs.twimg.com/card_img/1434998862305968129/odDi9EqO?format=jpg&name=600x600"
if url.ThumbnailRemoteUrl != expected_remote_url { if url.ThumbnailRemoteUrl != expected_remote_url {
t.Errorf("Expected %q, got %q", expected_remote_url, url.ThumbnailRemoteUrl) t.Errorf("Expected %q, got %q", expected_remote_url, url.ThumbnailRemoteUrl)

View File

@ -13,6 +13,8 @@ type VideoID int64
type Video struct { type Video struct {
ID VideoID ID VideoID
TweetID TweetID TweetID TweetID
Width int
Height int
RemoteURL string RemoteURL string
LocalFilename string LocalFilename string
IsDownloaded bool IsDownloaded bool
@ -28,6 +30,8 @@ func ParseAPIVideo(apiVideo APIExtendedMedia, tweet_id TweetID) Video {
return Video{ return Video{
ID: VideoID(apiVideo.ID), ID: VideoID(apiVideo.ID),
TweetID: tweet_id, TweetID: tweet_id,
Width: apiVideo.OriginalInfo.Width,
Height: apiVideo.OriginalInfo.Height,
RemoteURL: variants[0].URL, RemoteURL: variants[0].URL,
LocalFilename: local_filename, LocalFilename: local_filename,
IsDownloaded: false, IsDownloaded: false,

View File

@ -32,6 +32,14 @@ func TestParseAPIVideo(t *testing.T) {
if video.RemoteURL != expected_remote_url { if video.RemoteURL != expected_remote_url {
t.Errorf("Expected %q, got %q", expected_remote_url, video.RemoteURL) t.Errorf("Expected %q, got %q", expected_remote_url, video.RemoteURL)
} }
expected_height := 1280
if video.Height != expected_height {
t.Errorf("Expected height %d, got %d", expected_height, video.Height)
}
expected_width := 720
if video.Width != expected_width {
t.Errorf("Expected width %d, got %d", expected_width, video.Width)
}
expected_local_filename := "28.mp4" expected_local_filename := "28.mp4"
if video.LocalFilename != expected_local_filename { if video.LocalFilename != expected_local_filename {