Add video thumbnails

This commit is contained in:
Alessio 2021-12-23 15:12:01 -05:00
parent 692d3de564
commit fbbb6834de
10 changed files with 58 additions and 15 deletions

View File

@ -63,11 +63,13 @@ test $(sqlite3 twitter.db "select count(*) from videos where tweet_id = 14189716
test $(sqlite3 twitter.db "select count(*) from videos where tweet_id = 1418971605674467340 and is_downloaded = 1") = "0"
test $(sqlite3 twitter.db "select is_content_downloaded from tweets where id = 1418971605674467340") = "0"
test $(find videos | wc -l) = "1"
test $(find video_thumbnails | wc -l) = "1"
tw download_tweet_content 1418971605674467340
test $(sqlite3 twitter.db "select count(*) from videos where tweet_id = 1418971605674467340 and is_downloaded = 0") = "0"
test $(sqlite3 twitter.db "select count(*) from videos where tweet_id = 1418971605674467340 and is_downloaded = 1") = "1"
test $(sqlite3 twitter.db "select is_content_downloaded from tweets where id = 1418971605674467340") = "1"
test $(find videos | wc -l) = "2"
test $(find video_thumbnails | wc -l) = "2"
# Try to double-download it
tw fetch_tweet_only https://twitter.com/DiamondChariots/status/1418971605674467340

View File

@ -61,14 +61,23 @@ func (p Profile) download_tweet_image(img *scraper.Image, downloader MediaDownlo
/**
* Downloads an Video, and if successful, marks it as downloaded in the DB
* Downloads a Video and its thumbnail, and if successful, marks it as downloaded in the DB
*/
func (p Profile) download_tweet_video(v *scraper.Video, downloader MediaDownloader) error {
// Download the video
outfile := path.Join(p.ProfileDir, "videos", v.LocalFilename)
err := downloader.Curl(v.RemoteURL, outfile)
if err != nil {
return err
}
// Download the thumbnail
outfile = path.Join(p.ProfileDir, "video_thumbnails", v.ThumbnailLocalPath)
err = downloader.Curl(v.ThumbnailRemoteUrl, outfile)
if err != nil {
return err
}
v.IsDownloaded = true
return p.SaveVideo(*v)
}

View File

@ -33,12 +33,12 @@ func (p Profile) SaveImage(img scraper.Image) error {
*/
func (p Profile) SaveVideo(vid scraper.Video) error {
_, err := p.DB.Exec(`
insert into videos (id, tweet_id, width, height, remote_url, local_filename, is_downloaded, is_gif)
values (?, ?, ?, ?, ?, ?, ?, ?)
insert into videos (id, tweet_id, width, height, remote_url, local_filename, thumbnail_remote_url, thumbnail_local_filename, is_downloaded, is_gif)
values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
on conflict do update
set is_downloaded=(is_downloaded or ?)
`,
vid.ID, vid.TweetID, vid.Width, vid.Height, vid.RemoteURL, vid.LocalFilename, vid.IsDownloaded, vid.IsGif,
vid.ID, vid.TweetID, vid.Width, vid.Height, vid.RemoteURL, vid.LocalFilename, vid.ThumbnailRemoteUrl, vid.ThumbnailLocalPath, vid.IsDownloaded, vid.IsGif,
vid.IsDownloaded,
)
return err
@ -112,7 +112,7 @@ func (p Profile) GetImagesForTweet(t scraper.Tweet) (imgs []scraper.Image, err e
* Get the list of videos for a tweet
*/
func (p Profile) GetVideosForTweet(t scraper.Tweet) (vids []scraper.Video, err error) {
stmt, err := p.DB.Prepare("select id, width, height, remote_url, local_filename, is_downloaded, is_gif from videos where tweet_id=?")
stmt, err := p.DB.Prepare("select id, width, height, remote_url, local_filename, thumbnail_remote_url, thumbnail_local_filename, is_downloaded, is_gif from videos where tweet_id=?")
if err != nil {
return
}
@ -123,7 +123,7 @@ func (p Profile) GetVideosForTweet(t scraper.Tweet) (vids []scraper.Video, err e
}
var vid scraper.Video
for rows.Next() {
err = rows.Scan(&vid.ID, &vid.Width, &vid.Height, &vid.RemoteURL, &vid.LocalFilename, &vid.IsDownloaded, &vid.IsGif)
err = rows.Scan(&vid.ID, &vid.Width, &vid.Height, &vid.RemoteURL, &vid.LocalFilename, &vid.ThumbnailRemoteUrl, &vid.ThumbnailLocalPath, &vid.IsDownloaded, &vid.IsGif)
if err != nil {
return
}

View File

@ -64,6 +64,7 @@ func NewProfile(target_dir string) (Profile, error) {
link_thumbnails_dir := path.Join(target_dir, "link_preview_images")
images_dir := path.Join(target_dir, "images")
videos_dir := path.Join(target_dir, "videos")
video_thumbnails_dir := path.Join(target_dir, "video_thumbnails")
// Create the directory
fmt.Printf("Creating new profile: %s\n", target_dir)
@ -131,6 +132,13 @@ func NewProfile(target_dir string) (Profile, error) {
return Profile{}, err
}
// Create `video_thumbnails`
fmt.Printf("Creating............. %s/\n", video_thumbnails_dir)
err = os.Mkdir(video_thumbnails_dir, os.FileMode(0755))
if err != nil {
return Profile{}, err
}
return Profile{target_dir, UsersList{}, settings, db}, nil
}

View File

@ -83,8 +83,8 @@ func TestNewProfile(t *testing.T) {
if err != nil {
panic(err)
}
if len(contents) != 7 {
t.Fatalf("Expected 7 contents, got %d instead", len(contents))
if len(contents) != 8 {
t.Fatalf("Expected 8 contents, got %d instead", len(contents))
}
expected_files := []struct {
@ -97,6 +97,7 @@ func TestNewProfile(t *testing.T) {
{"settings.yaml", false},
{"twitter.db", false},
{"users.yaml", false},
{"video_thumbnails", true},
{"videos", true},
}

View File

@ -126,6 +126,8 @@ create table videos (rowid integer primary key,
height integer not null,
remote_url text not null unique,
local_filename text not null unique,
thumbnail_remote_url text not null unique default "missing",
thumbnail_local_filename text not null unique default "missing",
is_gif boolean default 0,
is_downloaded boolean default 0,

View File

@ -92,6 +92,8 @@ func create_video_from_id(id int) scraper.Video {
Height: id * 5,
RemoteURL: filename,
LocalFilename: filename,
ThumbnailRemoteUrl: filename,
ThumbnailLocalPath: filename,
IsDownloaded: false,
IsGif: false,
}

View File

@ -8,7 +8,7 @@ import (
)
const ENGINE_DATABASE_VERSION = 3
const ENGINE_DATABASE_VERSION = 4
type VersionMismatchError struct {
@ -54,6 +54,8 @@ var MIGRATIONS = []string{
alter table tweets add column last_scraped_at integer not null default 0`,
`update tombstone_types set tombstone_text = 'This Tweet is from a suspended account' where rowid = 2;
insert into tombstone_types (rowid, short_name, tombstone_text) values (5, 'violated', 'This Tweet violated the Twitter Rules'), (6, 'no longer exists', 'This Tweet is from an account that no longer exists')`,
`alter table videos add column thumbnail_remote_url text not null unique default "missing";
alter table videos add column thumbnail_local_filename text not null unique default "missing"`,
}
/**

View File

@ -3,6 +3,7 @@ package scraper
import (
"fmt"
"sort"
"path"
)
type VideoID int64
@ -17,6 +18,10 @@ type Video struct {
Height int
RemoteURL string
LocalFilename string
ThumbnailRemoteUrl string
ThumbnailLocalPath string
IsDownloaded bool
IsGif bool
}
@ -34,6 +39,10 @@ func ParseAPIVideo(apiVideo APIExtendedMedia, tweet_id TweetID) Video {
Height: apiVideo.OriginalInfo.Height,
RemoteURL: variants[0].URL,
LocalFilename: local_filename,
ThumbnailRemoteUrl: apiVideo.MediaURLHttps,
ThumbnailLocalPath: path.Base(apiVideo.MediaURLHttps),
IsDownloaded: false,
IsGif: apiVideo.Type == "animated_gif",
}

View File

@ -28,10 +28,7 @@ func TestParseAPIVideo(t *testing.T) {
if video.TweetID != tweet_id {
t.Errorf("Expected ID of %d, got %d", tweet_id, video.TweetID)
}
expected_remote_url := "https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12"
if video.RemoteURL != expected_remote_url {
t.Errorf("Expected %q, got %q", expected_remote_url, video.RemoteURL)
}
expected_height := 1280
if video.Height != expected_height {
t.Errorf("Expected height %d, got %d", expected_height, video.Height)
@ -40,11 +37,22 @@ func TestParseAPIVideo(t *testing.T) {
if video.Width != expected_width {
t.Errorf("Expected width %d, got %d", expected_width, video.Width)
}
expected_remote_url := "https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12"
if video.RemoteURL != expected_remote_url {
t.Errorf("Expected %q, got %q", expected_remote_url, video.RemoteURL)
}
expected_local_filename := "28.mp4"
if video.LocalFilename != expected_local_filename {
t.Errorf("Expected %q, got %q", expected_local_filename, video.LocalFilename)
}
expected_thumbnail_url := "https://pbs.twimg.com/ext_tw_video_thumb/1418951950020845568/pu/img/eUTaYYfuAJ8FyjUi.jpg"
if video.ThumbnailRemoteUrl != expected_thumbnail_url {
t.Errorf("Expected %q, got %q", expected_thumbnail_url, video.ThumbnailRemoteUrl)
}
expected_thumbnail_filename := "eUTaYYfuAJ8FyjUi.jpg"
if video.ThumbnailLocalPath != expected_thumbnail_filename {
t.Errorf("Expected %q, got %q", expected_thumbnail_filename, video.ThumbnailLocalPath)
}
if video.IsDownloaded {
t.Errorf("Expected it not to be downloaded, but it was")
}