Add video thumbnails

This commit is contained in:
Alessio 2021-12-23 15:12:01 -05:00
parent 692d3de564
commit fbbb6834de
10 changed files with 58 additions and 15 deletions

View File

@ -63,11 +63,13 @@ test $(sqlite3 twitter.db "select count(*) from videos where tweet_id = 14189716
test $(sqlite3 twitter.db "select count(*) from videos where tweet_id = 1418971605674467340 and is_downloaded = 1") = "0" test $(sqlite3 twitter.db "select count(*) from videos where tweet_id = 1418971605674467340 and is_downloaded = 1") = "0"
test $(sqlite3 twitter.db "select is_content_downloaded from tweets where id = 1418971605674467340") = "0" test $(sqlite3 twitter.db "select is_content_downloaded from tweets where id = 1418971605674467340") = "0"
test $(find videos | wc -l) = "1" test $(find videos | wc -l) = "1"
test $(find video_thumbnails | wc -l) = "1"
tw download_tweet_content 1418971605674467340 tw download_tweet_content 1418971605674467340
test $(sqlite3 twitter.db "select count(*) from videos where tweet_id = 1418971605674467340 and is_downloaded = 0") = "0" test $(sqlite3 twitter.db "select count(*) from videos where tweet_id = 1418971605674467340 and is_downloaded = 0") = "0"
test $(sqlite3 twitter.db "select count(*) from videos where tweet_id = 1418971605674467340 and is_downloaded = 1") = "1" test $(sqlite3 twitter.db "select count(*) from videos where tweet_id = 1418971605674467340 and is_downloaded = 1") = "1"
test $(sqlite3 twitter.db "select is_content_downloaded from tweets where id = 1418971605674467340") = "1" test $(sqlite3 twitter.db "select is_content_downloaded from tweets where id = 1418971605674467340") = "1"
test $(find videos | wc -l) = "2" test $(find videos | wc -l) = "2"
test $(find video_thumbnails | wc -l) = "2"
# Try to double-download it # Try to double-download it
tw fetch_tweet_only https://twitter.com/DiamondChariots/status/1418971605674467340 tw fetch_tweet_only https://twitter.com/DiamondChariots/status/1418971605674467340

View File

@ -61,14 +61,23 @@ func (p Profile) download_tweet_image(img *scraper.Image, downloader MediaDownlo
/** /**
* Downloads an Video, and if successful, marks it as downloaded in the DB * Downloads a Video and its thumbnail, and if successful, marks it as downloaded in the DB
*/ */
func (p Profile) download_tweet_video(v *scraper.Video, downloader MediaDownloader) error { func (p Profile) download_tweet_video(v *scraper.Video, downloader MediaDownloader) error {
// Download the video
outfile := path.Join(p.ProfileDir, "videos", v.LocalFilename) outfile := path.Join(p.ProfileDir, "videos", v.LocalFilename)
err := downloader.Curl(v.RemoteURL, outfile) err := downloader.Curl(v.RemoteURL, outfile)
if err != nil { if err != nil {
return err return err
} }
// Download the thumbnail
outfile = path.Join(p.ProfileDir, "video_thumbnails", v.ThumbnailLocalPath)
err = downloader.Curl(v.ThumbnailRemoteUrl, outfile)
if err != nil {
return err
}
v.IsDownloaded = true v.IsDownloaded = true
return p.SaveVideo(*v) return p.SaveVideo(*v)
} }

View File

@ -33,12 +33,12 @@ func (p Profile) SaveImage(img scraper.Image) error {
*/ */
func (p Profile) SaveVideo(vid scraper.Video) error { func (p Profile) SaveVideo(vid scraper.Video) error {
_, err := p.DB.Exec(` _, err := p.DB.Exec(`
insert into videos (id, tweet_id, width, height, remote_url, local_filename, is_downloaded, is_gif) insert into videos (id, tweet_id, width, height, remote_url, local_filename, thumbnail_remote_url, thumbnail_local_filename, is_downloaded, is_gif)
values (?, ?, ?, ?, ?, ?, ?, ?) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
on conflict do update on conflict do update
set is_downloaded=(is_downloaded or ?) set is_downloaded=(is_downloaded or ?)
`, `,
vid.ID, vid.TweetID, vid.Width, vid.Height, vid.RemoteURL, vid.LocalFilename, vid.IsDownloaded, vid.IsGif, vid.ID, vid.TweetID, vid.Width, vid.Height, vid.RemoteURL, vid.LocalFilename, vid.ThumbnailRemoteUrl, vid.ThumbnailLocalPath, vid.IsDownloaded, vid.IsGif,
vid.IsDownloaded, vid.IsDownloaded,
) )
return err return err
@ -112,7 +112,7 @@ func (p Profile) GetImagesForTweet(t scraper.Tweet) (imgs []scraper.Image, err e
* Get the list of videos for a tweet * Get the list of videos for a tweet
*/ */
func (p Profile) GetVideosForTweet(t scraper.Tweet) (vids []scraper.Video, err error) { func (p Profile) GetVideosForTweet(t scraper.Tweet) (vids []scraper.Video, err error) {
stmt, err := p.DB.Prepare("select id, width, height, remote_url, local_filename, is_downloaded, is_gif from videos where tweet_id=?") stmt, err := p.DB.Prepare("select id, width, height, remote_url, local_filename, thumbnail_remote_url, thumbnail_local_filename, is_downloaded, is_gif from videos where tweet_id=?")
if err != nil { if err != nil {
return return
} }
@ -123,7 +123,7 @@ func (p Profile) GetVideosForTweet(t scraper.Tweet) (vids []scraper.Video, err e
} }
var vid scraper.Video var vid scraper.Video
for rows.Next() { for rows.Next() {
err = rows.Scan(&vid.ID, &vid.Width, &vid.Height, &vid.RemoteURL, &vid.LocalFilename, &vid.IsDownloaded, &vid.IsGif) err = rows.Scan(&vid.ID, &vid.Width, &vid.Height, &vid.RemoteURL, &vid.LocalFilename, &vid.ThumbnailRemoteUrl, &vid.ThumbnailLocalPath, &vid.IsDownloaded, &vid.IsGif)
if err != nil { if err != nil {
return return
} }

View File

@ -64,6 +64,7 @@ func NewProfile(target_dir string) (Profile, error) {
link_thumbnails_dir := path.Join(target_dir, "link_preview_images") link_thumbnails_dir := path.Join(target_dir, "link_preview_images")
images_dir := path.Join(target_dir, "images") images_dir := path.Join(target_dir, "images")
videos_dir := path.Join(target_dir, "videos") videos_dir := path.Join(target_dir, "videos")
video_thumbnails_dir := path.Join(target_dir, "video_thumbnails")
// Create the directory // Create the directory
fmt.Printf("Creating new profile: %s\n", target_dir) fmt.Printf("Creating new profile: %s\n", target_dir)
@ -131,6 +132,13 @@ func NewProfile(target_dir string) (Profile, error) {
return Profile{}, err return Profile{}, err
} }
// Create `video_thumbnails`
fmt.Printf("Creating............. %s/\n", video_thumbnails_dir)
err = os.Mkdir(video_thumbnails_dir, os.FileMode(0755))
if err != nil {
return Profile{}, err
}
return Profile{target_dir, UsersList{}, settings, db}, nil return Profile{target_dir, UsersList{}, settings, db}, nil
} }

View File

@ -83,8 +83,8 @@ func TestNewProfile(t *testing.T) {
if err != nil { if err != nil {
panic(err) panic(err)
} }
if len(contents) != 7 { if len(contents) != 8 {
t.Fatalf("Expected 7 contents, got %d instead", len(contents)) t.Fatalf("Expected 8 contents, got %d instead", len(contents))
} }
expected_files := []struct { expected_files := []struct {
@ -97,6 +97,7 @@ func TestNewProfile(t *testing.T) {
{"settings.yaml", false}, {"settings.yaml", false},
{"twitter.db", false}, {"twitter.db", false},
{"users.yaml", false}, {"users.yaml", false},
{"video_thumbnails", true},
{"videos", true}, {"videos", true},
} }

View File

@ -126,6 +126,8 @@ create table videos (rowid integer primary key,
height integer not null, height integer not null,
remote_url text not null unique, remote_url text not null unique,
local_filename text not null unique, local_filename text not null unique,
thumbnail_remote_url text not null unique default "missing",
thumbnail_local_filename text not null unique default "missing",
is_gif boolean default 0, is_gif boolean default 0,
is_downloaded boolean default 0, is_downloaded boolean default 0,

View File

@ -92,6 +92,8 @@ func create_video_from_id(id int) scraper.Video {
Height: id * 5, Height: id * 5,
RemoteURL: filename, RemoteURL: filename,
LocalFilename: filename, LocalFilename: filename,
ThumbnailRemoteUrl: filename,
ThumbnailLocalPath: filename,
IsDownloaded: false, IsDownloaded: false,
IsGif: false, IsGif: false,
} }

View File

@ -8,7 +8,7 @@ import (
) )
const ENGINE_DATABASE_VERSION = 3 const ENGINE_DATABASE_VERSION = 4
type VersionMismatchError struct { type VersionMismatchError struct {
@ -54,6 +54,8 @@ var MIGRATIONS = []string{
alter table tweets add column last_scraped_at integer not null default 0`, alter table tweets add column last_scraped_at integer not null default 0`,
`update tombstone_types set tombstone_text = 'This Tweet is from a suspended account' where rowid = 2; `update tombstone_types set tombstone_text = 'This Tweet is from a suspended account' where rowid = 2;
insert into tombstone_types (rowid, short_name, tombstone_text) values (5, 'violated', 'This Tweet violated the Twitter Rules'), (6, 'no longer exists', 'This Tweet is from an account that no longer exists')`, insert into tombstone_types (rowid, short_name, tombstone_text) values (5, 'violated', 'This Tweet violated the Twitter Rules'), (6, 'no longer exists', 'This Tweet is from an account that no longer exists')`,
`alter table videos add column thumbnail_remote_url text not null unique default "missing";
alter table videos add column thumbnail_local_filename text not null unique default "missing"`,
} }
/** /**

View File

@ -3,6 +3,7 @@ package scraper
import ( import (
"fmt" "fmt"
"sort" "sort"
"path"
) )
type VideoID int64 type VideoID int64
@ -17,6 +18,10 @@ type Video struct {
Height int Height int
RemoteURL string RemoteURL string
LocalFilename string LocalFilename string
ThumbnailRemoteUrl string
ThumbnailLocalPath string
IsDownloaded bool IsDownloaded bool
IsGif bool IsGif bool
} }
@ -34,6 +39,10 @@ func ParseAPIVideo(apiVideo APIExtendedMedia, tweet_id TweetID) Video {
Height: apiVideo.OriginalInfo.Height, Height: apiVideo.OriginalInfo.Height,
RemoteURL: variants[0].URL, RemoteURL: variants[0].URL,
LocalFilename: local_filename, LocalFilename: local_filename,
ThumbnailRemoteUrl: apiVideo.MediaURLHttps,
ThumbnailLocalPath: path.Base(apiVideo.MediaURLHttps),
IsDownloaded: false, IsDownloaded: false,
IsGif: apiVideo.Type == "animated_gif", IsGif: apiVideo.Type == "animated_gif",
} }

View File

@ -28,10 +28,7 @@ func TestParseAPIVideo(t *testing.T) {
if video.TweetID != tweet_id { if video.TweetID != tweet_id {
t.Errorf("Expected ID of %d, got %d", tweet_id, video.TweetID) t.Errorf("Expected ID of %d, got %d", tweet_id, video.TweetID)
} }
expected_remote_url := "https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12"
if video.RemoteURL != expected_remote_url {
t.Errorf("Expected %q, got %q", expected_remote_url, video.RemoteURL)
}
expected_height := 1280 expected_height := 1280
if video.Height != expected_height { if video.Height != expected_height {
t.Errorf("Expected height %d, got %d", expected_height, video.Height) t.Errorf("Expected height %d, got %d", expected_height, video.Height)
@ -40,11 +37,22 @@ func TestParseAPIVideo(t *testing.T) {
if video.Width != expected_width { if video.Width != expected_width {
t.Errorf("Expected width %d, got %d", expected_width, video.Width) t.Errorf("Expected width %d, got %d", expected_width, video.Width)
} }
expected_remote_url := "https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12"
if video.RemoteURL != expected_remote_url {
t.Errorf("Expected %q, got %q", expected_remote_url, video.RemoteURL)
}
expected_local_filename := "28.mp4" expected_local_filename := "28.mp4"
if video.LocalFilename != expected_local_filename { if video.LocalFilename != expected_local_filename {
t.Errorf("Expected %q, got %q", expected_local_filename, video.LocalFilename) t.Errorf("Expected %q, got %q", expected_local_filename, video.LocalFilename)
} }
expected_thumbnail_url := "https://pbs.twimg.com/ext_tw_video_thumb/1418951950020845568/pu/img/eUTaYYfuAJ8FyjUi.jpg"
if video.ThumbnailRemoteUrl != expected_thumbnail_url {
t.Errorf("Expected %q, got %q", expected_thumbnail_url, video.ThumbnailRemoteUrl)
}
expected_thumbnail_filename := "eUTaYYfuAJ8FyjUi.jpg"
if video.ThumbnailLocalPath != expected_thumbnail_filename {
t.Errorf("Expected %q, got %q", expected_thumbnail_filename, video.ThumbnailLocalPath)
}
if video.IsDownloaded { if video.IsDownloaded {
t.Errorf("Expected it not to be downloaded, but it was") t.Errorf("Expected it not to be downloaded, but it was")
} }