Add dimension info to media content

This commit is contained in:
Alessio 2021-10-10 16:06:47 -07:00
parent dd22b25831
commit 4fa30e246d
10 changed files with 73 additions and 15 deletions

View File

@ -12,12 +12,12 @@ import (
*/
func (p Profile) SaveImage(img scraper.Image) error {
_, err := p.DB.Exec(`
insert into images (id, tweet_id, remote_url, local_filename, is_downloaded)
values (?, ?, ?, ?, ?)
insert into images (id, tweet_id, width, height, remote_url, local_filename, is_downloaded)
values (?, ?, ?, ?, ?, ?, ?)
on conflict do update
set is_downloaded=(is_downloaded or ?)
`,
img.ID, img.TweetID, img.RemoteURL, img.LocalFilename, img.IsDownloaded,
img.ID, img.TweetID, img.Width, img.Height, img.RemoteURL, img.LocalFilename, img.IsDownloaded,
img.IsDownloaded,
)
return err
@ -31,12 +31,12 @@ func (p Profile) SaveImage(img scraper.Image) error {
*/
func (p Profile) SaveVideo(vid scraper.Video) error {
_, err := p.DB.Exec(`
insert into videos (id, tweet_id, remote_url, local_filename, is_downloaded, is_gif)
values (?, ?, ?, ?, ?, ?)
insert into videos (id, tweet_id, width, height, remote_url, local_filename, is_downloaded, is_gif)
values (?, ?, ?, ?, ?, ?, ?, ?)
on conflict do update
set is_downloaded=(is_downloaded or ?)
`,
vid.ID, vid.TweetID, vid.RemoteURL, vid.LocalFilename, vid.IsDownloaded, vid.IsGif,
vid.ID, vid.TweetID, vid.Width, vid.Height, vid.RemoteURL, vid.LocalFilename, vid.IsDownloaded, vid.IsGif,
vid.IsDownloaded,
)
return err
@ -47,12 +47,12 @@ func (p Profile) SaveVideo(vid scraper.Video) error {
*/
func (p Profile) SaveUrl(url scraper.Url) error {
_, err := p.DB.Exec(`
insert into urls (tweet_id, domain, text, title, description, creator_id, site_id, thumbnail_remote_url, thumbnail_local_path, has_card, has_thumbnail, is_content_downloaded)
values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
insert into urls (tweet_id, domain, text, title, description, creator_id, site_id, thumbnail_width, thumbnail_height, thumbnail_remote_url, thumbnail_local_path, has_card, has_thumbnail, is_content_downloaded)
values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
on conflict do update
set is_content_downloaded=(is_content_downloaded or ?)
`,
url.TweetID, url.Domain, url.Text, url.Title, url.Description, url.CreatorID, url.SiteID, url.ThumbnailRemoteUrl, url.ThumbnailLocalPath, url.HasCard, url.HasThumbnail, url.IsContentDownloaded,
url.TweetID, url.Domain, url.Text, url.Title, url.Description, url.CreatorID, url.SiteID, url.ThumbnailWidth, url.ThumbnailHeight, url.ThumbnailRemoteUrl, url.ThumbnailLocalPath, url.HasCard, url.HasThumbnail, url.IsContentDownloaded,
url.IsContentDownloaded,
)
return err
@ -62,7 +62,7 @@ func (p Profile) SaveUrl(url scraper.Url) error {
* Get the list of images for a tweet
*/
func (p Profile) GetImagesForTweet(t scraper.Tweet) (imgs []scraper.Image, err error) {
stmt, err := p.DB.Prepare("select id, remote_url, local_filename, is_downloaded from images where tweet_id=?")
stmt, err := p.DB.Prepare("select id, width, height, remote_url, local_filename, is_downloaded from images where tweet_id=?")
if err != nil {
return
}
@ -74,7 +74,7 @@ func (p Profile) GetImagesForTweet(t scraper.Tweet) (imgs []scraper.Image, err e
var img scraper.Image
for rows.Next() {
err = rows.Scan(&img.ID, &img.RemoteURL, &img.LocalFilename, &img.IsDownloaded)
err = rows.Scan(&img.ID, &img.Width, &img.Height, &img.RemoteURL, &img.LocalFilename, &img.IsDownloaded)
if err != nil {
return
}
@ -89,7 +89,7 @@ func (p Profile) GetImagesForTweet(t scraper.Tweet) (imgs []scraper.Image, err e
* Get the list of videos for a tweet
*/
func (p Profile) GetVideosForTweet(t scraper.Tweet) (vids []scraper.Video, err error) {
stmt, err := p.DB.Prepare("select id, remote_url, local_filename, is_downloaded, is_gif from videos where tweet_id=?")
stmt, err := p.DB.Prepare("select id, width, height, remote_url, local_filename, is_downloaded, is_gif from videos where tweet_id=?")
if err != nil {
return
}
@ -100,7 +100,7 @@ func (p Profile) GetVideosForTweet(t scraper.Tweet) (vids []scraper.Video, err e
}
var vid scraper.Video
for rows.Next() {
err = rows.Scan(&vid.ID, &vid.RemoteURL, &vid.LocalFilename, &vid.IsDownloaded, &vid.IsGif)
err = rows.Scan(&vid.ID, &vid.Width, &vid.Height, &vid.RemoteURL, &vid.LocalFilename, &vid.IsDownloaded, &vid.IsGif)
if err != nil {
return
}
@ -114,7 +114,7 @@ func (p Profile) GetVideosForTweet(t scraper.Tweet) (vids []scraper.Video, err e
* Get the list of Urls for a Tweet
*/
func (p Profile) GetUrlsForTweet(t scraper.Tweet) (urls []scraper.Url, err error) {
stmt, err := p.DB.Prepare("select domain, text, title, description, creator_id, site_id, thumbnail_remote_url, thumbnail_local_path, has_card, has_thumbnail, is_content_downloaded from urls where tweet_id=? order by rowid")
stmt, err := p.DB.Prepare("select domain, text, title, description, creator_id, site_id, thumbnail_width, thumbnail_height, thumbnail_remote_url, thumbnail_local_path, has_card, has_thumbnail, is_content_downloaded from urls where tweet_id=? order by rowid")
if err != nil {
return
}
@ -125,7 +125,7 @@ func (p Profile) GetUrlsForTweet(t scraper.Tweet) (urls []scraper.Url, err error
}
var url scraper.Url
for rows.Next() {
err = rows.Scan(&url.Domain, &url.Text, &url.Title, &url.Description, &url.CreatorID, &url.SiteID, &url.ThumbnailRemoteUrl, &url.ThumbnailLocalPath, &url.HasCard, &url.HasThumbnail, &url.IsContentDownloaded)
err = rows.Scan(&url.Domain, &url.Text, &url.Title, &url.Description, &url.CreatorID, &url.SiteID, &url.ThumbnailWidth, &url.ThumbnailHeight, &url.ThumbnailRemoteUrl, &url.ThumbnailLocalPath, &url.HasCard, &url.HasThumbnail, &url.IsContentDownloaded)
if err != nil {
return
}

View File

@ -61,6 +61,8 @@ create table urls (rowid integer primary key,
description text,
creator_id integer,
site_id integer,
thumbnail_width integer not null,
thumbnail_height integer not null,
thumbnail_remote_url text,
thumbnail_local_path text,
has_card boolean,
@ -76,6 +78,8 @@ create table urls (rowid integer primary key,
create table images (rowid integer primary key,
id integer unique not null check(typeof(id) = 'integer'),
tweet_id integer not null,
width integer not null,
height integer not null,
remote_url text not null unique,
local_filename text not null unique,
is_downloaded boolean default 0,
@ -86,6 +90,8 @@ create table images (rowid integer primary key,
create table videos (rowid integer primary key,
id integer unique not null check(typeof(id) = 'integer'),
tweet_id integer not null,
width integer not null,
height integer not null,
remote_url text not null unique,
local_filename text not null unique,
is_gif boolean default 0,

View File

@ -72,6 +72,8 @@ func create_image_from_id(id int) scraper.Image {
return scraper.Image{
ID: scraper.ImageID(id),
TweetID: -1,
Width: id * 10,
Height: id * 5,
RemoteURL: filename,
LocalFilename: filename,
IsDownloaded: false,
@ -86,6 +88,8 @@ func create_video_from_id(id int) scraper.Video {
return scraper.Video{
ID: scraper.VideoID(id),
TweetID: -1,
Width: id * 10,
Height: id * 5,
RemoteURL: filename,
LocalFilename: filename,
IsDownloaded: false,
@ -104,6 +108,8 @@ func create_url_from_id(id int) scraper.Url {
Text: s + "text",
Title: s + "title",
Description: s + "description",
ThumbnailWidth: id * 23,
ThumbnailHeight: id * 7,
ThumbnailRemoteUrl: s + "remote url",
ThumbnailLocalPath: s + "local path",
CreatorID: scraper.UserID(id),

View File

@ -13,6 +13,10 @@ type APIMedia struct {
MediaURLHttps string `json:"media_url_https"`
Type string `json:"type"`
URL string `json:"url"`
OriginalInfo struct {
Width int `json:"width"`
Height int `json:"height"`
} `json:"original_info"`
}
type SortableVariants []struct {
@ -30,6 +34,10 @@ type APIExtendedMedia struct {
VideoInfo struct {
Variants SortableVariants `json:"variants"`
} `json:"video_info"`
OriginalInfo struct {
Width int `json:"width"`
Height int `json:"height"`
} `json:"original_info"`
}
type APICard struct {
@ -58,6 +66,8 @@ type APICard struct {
Thumbnail struct {
ImageValue struct {
Url string `json:"url"`
Width int `json:"width"`
Height int `json:"height"`
} `json:"image_value"`
} `json:"thumbnail_image_large"`
PlayerImage struct {

View File

@ -9,6 +9,8 @@ type ImageID int64
type Image struct {
ID ImageID
TweetID TweetID
Width int
Height int
RemoteURL string
LocalFilename string
IsDownloaded bool
@ -19,6 +21,8 @@ func ParseAPIMedia(apiMedia APIMedia) Image {
return Image{
ID: ImageID(apiMedia.ID),
RemoteURL: apiMedia.MediaURLHttps,
Width: apiMedia.OriginalInfo.Width,
Height: apiMedia.OriginalInfo.Height,
LocalFilename: local_filename,
IsDownloaded: false,
}

View File

@ -28,6 +28,14 @@ func TestParseAPIMedia(t *testing.T) {
if image.RemoteURL != expected_remote_url {
t.Errorf("Expected %q, got %q", expected_remote_url, image.RemoteURL)
}
expected_width := 593
if image.Width != expected_width {
t.Errorf("Expected width %d, got %d", expected_width, image.Width)
}
expected_height := 239
if image.Height != expected_height {
t.Errorf("Expected height %d, got %d", expected_height, image.Height)
}
expected_local_filename := "E18sEUrWYAk8dBl.jpg"
if image.LocalFilename != expected_local_filename {
t.Errorf("Expected %q, got %q", expected_local_filename, image.LocalFilename)

View File

@ -13,6 +13,8 @@ type Url struct {
Text string
Title string
Description string
ThumbnailWidth int
ThumbnailHeight int
ThumbnailRemoteUrl string
ThumbnailLocalPath string
CreatorID UserID
@ -49,6 +51,8 @@ func ParseAPIUrlCard(apiCard APICard) Url {
ret.HasThumbnail = true
ret.ThumbnailRemoteUrl = thumbnail_url
ret.ThumbnailLocalPath = get_thumbnail_local_path(thumbnail_url)
ret.ThumbnailWidth = values.Thumbnail.ImageValue.Width
ret.ThumbnailHeight = values.Thumbnail.ImageValue.Height
}
return ret

View File

@ -32,6 +32,14 @@ func TestParseAPIUrlCard(t *testing.T) {
if url.Description != expected_description {
t.Errorf("Expected %q, got %q", expected_description, url.Description)
}
expected_width := 600
if url.ThumbnailWidth != expected_width {
t.Errorf("Expected thumbnail width %d, got %d", expected_width, url.ThumbnailWidth)
}
expected_height := 315
if url.ThumbnailHeight != expected_height {
t.Errorf("Expected thumbnail height %d, got %d", expected_height, url.ThumbnailHeight)
}
expected_remote_url := "https://pbs.twimg.com/card_img/1434998862305968129/odDi9EqO?format=jpg&name=600x600"
if url.ThumbnailRemoteUrl != expected_remote_url {
t.Errorf("Expected %q, got %q", expected_remote_url, url.ThumbnailRemoteUrl)

View File

@ -13,6 +13,8 @@ type VideoID int64
type Video struct {
ID VideoID
TweetID TweetID
Width int
Height int
RemoteURL string
LocalFilename string
IsDownloaded bool
@ -28,6 +30,8 @@ func ParseAPIVideo(apiVideo APIExtendedMedia, tweet_id TweetID) Video {
return Video{
ID: VideoID(apiVideo.ID),
TweetID: tweet_id,
Width: apiVideo.OriginalInfo.Width,
Height: apiVideo.OriginalInfo.Height,
RemoteURL: variants[0].URL,
LocalFilename: local_filename,
IsDownloaded: false,

View File

@ -32,6 +32,14 @@ func TestParseAPIVideo(t *testing.T) {
if video.RemoteURL != expected_remote_url {
t.Errorf("Expected %q, got %q", expected_remote_url, video.RemoteURL)
}
expected_height := 1280
if video.Height != expected_height {
t.Errorf("Expected height %d, got %d", expected_height, video.Height)
}
expected_width := 720
if video.Width != expected_width {
t.Errorf("Expected width %d, got %d", expected_width, video.Width)
}
expected_local_filename := "28.mp4"
if video.LocalFilename != expected_local_filename {