diff --git a/cmd/tests.sh b/cmd/tests.sh index 33c69ba..fa48a17 100755 --- a/cmd/tests.sh +++ b/cmd/tests.sh @@ -62,12 +62,14 @@ test $(sqlite3 twitter.db "select count(*) from videos") = "1" test $(sqlite3 twitter.db "select count(*) from videos where tweet_id = 1418971605674467340 and is_downloaded = 0") = "1" test $(sqlite3 twitter.db "select count(*) from videos where tweet_id = 1418971605674467340 and is_downloaded = 1") = "0" test $(sqlite3 twitter.db "select is_content_downloaded from tweets where id = 1418971605674467340") = "0" -test $(find videos| wc -l) = "1" +test $(find videos | wc -l) = "1" +test $(find video_thumbnails | wc -l) = "1" tw download_tweet_content 1418971605674467340 test $(sqlite3 twitter.db "select count(*) from videos where tweet_id = 1418971605674467340 and is_downloaded = 0") = "0" test $(sqlite3 twitter.db "select count(*) from videos where tweet_id = 1418971605674467340 and is_downloaded = 1") = "1" test $(sqlite3 twitter.db "select is_content_downloaded from tweets where id = 1418971605674467340") = "1" test $(find videos | wc -l) = "2" +test $(find video_thumbnails | wc -l) = "2" # Try to double-download it tw fetch_tweet_only https://twitter.com/DiamondChariots/status/1418971605674467340 diff --git a/persistence/media_download.go b/persistence/media_download.go index 88284a8..d55c075 100644 --- a/persistence/media_download.go +++ b/persistence/media_download.go @@ -61,14 +61,23 @@ func (p Profile) download_tweet_image(img *scraper.Image, downloader MediaDownlo /** - * Downloads an Video, and if successful, marks it as downloaded in the DB + * Downloads a Video and its thumbnail, and if successful, marks it as downloaded in the DB */ func (p Profile) download_tweet_video(v *scraper.Video, downloader MediaDownloader) error { + // Download the video outfile := path.Join(p.ProfileDir, "videos", v.LocalFilename) err := downloader.Curl(v.RemoteURL, outfile) if err != nil { return err } + + // Download the thumbnail + outfile = path.Join(p.ProfileDir, "video_thumbnails", v.ThumbnailLocalPath) + err = downloader.Curl(v.ThumbnailRemoteUrl, outfile) + if err != nil { + return err + } + v.IsDownloaded = true return p.SaveVideo(*v) } diff --git a/persistence/media_queries.go b/persistence/media_queries.go index d40f0b3..7369235 100644 --- a/persistence/media_queries.go +++ b/persistence/media_queries.go @@ -33,12 +33,12 @@ func (p Profile) SaveImage(img scraper.Image) error { */ func (p Profile) SaveVideo(vid scraper.Video) error { _, err := p.DB.Exec(` - insert into videos (id, tweet_id, width, height, remote_url, local_filename, is_downloaded, is_gif) - values (?, ?, ?, ?, ?, ?, ?, ?) + insert into videos (id, tweet_id, width, height, remote_url, local_filename, thumbnail_remote_url, thumbnail_local_filename, is_downloaded, is_gif) + values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) on conflict do update set is_downloaded=(is_downloaded or ?) `, - vid.ID, vid.TweetID, vid.Width, vid.Height, vid.RemoteURL, vid.LocalFilename, vid.IsDownloaded, vid.IsGif, + vid.ID, vid.TweetID, vid.Width, vid.Height, vid.RemoteURL, vid.LocalFilename, vid.ThumbnailRemoteUrl, vid.ThumbnailLocalPath, vid.IsDownloaded, vid.IsGif, vid.IsDownloaded, ) return err @@ -112,7 +112,7 @@ func (p Profile) GetImagesForTweet(t scraper.Tweet) (imgs []scraper.Image, err e * Get the list of videos for a tweet */ func (p Profile) GetVideosForTweet(t scraper.Tweet) (vids []scraper.Video, err error) { - stmt, err := p.DB.Prepare("select id, width, height, remote_url, local_filename, is_downloaded, is_gif from videos where tweet_id=?") + stmt, err := p.DB.Prepare("select id, width, height, remote_url, local_filename, thumbnail_remote_url, thumbnail_local_filename, is_downloaded, is_gif from videos where tweet_id=?") if err != nil { return } @@ -123,7 +123,7 @@ func (p Profile) GetVideosForTweet(t scraper.Tweet) (vids []scraper.Video, err e } var vid scraper.Video for rows.Next() { - err = rows.Scan(&vid.ID, &vid.Width, &vid.Height, &vid.RemoteURL, &vid.LocalFilename, &vid.IsDownloaded, &vid.IsGif) + err = rows.Scan(&vid.ID, &vid.Width, &vid.Height, &vid.RemoteURL, &vid.LocalFilename, &vid.ThumbnailRemoteUrl, &vid.ThumbnailLocalPath, &vid.IsDownloaded, &vid.IsGif) if err != nil { return } diff --git a/persistence/profile.go b/persistence/profile.go index 65f9c1f..6410d7e 100644 --- a/persistence/profile.go +++ b/persistence/profile.go @@ -64,6 +64,7 @@ func NewProfile(target_dir string) (Profile, error) { link_thumbnails_dir := path.Join(target_dir, "link_preview_images") images_dir := path.Join(target_dir, "images") videos_dir := path.Join(target_dir, "videos") + video_thumbnails_dir := path.Join(target_dir, "video_thumbnails") // Create the directory fmt.Printf("Creating new profile: %s\n", target_dir) @@ -131,6 +132,13 @@ func NewProfile(target_dir string) (Profile, error) { return Profile{}, err } + // Create `video_thumbnails` + fmt.Printf("Creating............. %s/\n", video_thumbnails_dir) + err = os.Mkdir(video_thumbnails_dir, os.FileMode(0755)) + if err != nil { + return Profile{}, err + } + return Profile{target_dir, UsersList{}, settings, db}, nil } diff --git a/persistence/profile_test.go b/persistence/profile_test.go index d74943b..eaa5e0b 100644 --- a/persistence/profile_test.go +++ b/persistence/profile_test.go @@ -83,8 +83,8 @@ func TestNewProfile(t *testing.T) { if err != nil { panic(err) } - if len(contents) != 7 { - t.Fatalf("Expected 7 contents, got %d instead", len(contents)) + if len(contents) != 8 { + t.Fatalf("Expected 8 contents, got %d instead", len(contents)) } expected_files := []struct { @@ -97,6 +97,7 @@ func TestNewProfile(t *testing.T) { {"settings.yaml", false}, {"twitter.db", false}, {"users.yaml", false}, + {"video_thumbnails", true}, {"videos", true}, } diff --git a/persistence/schema.sql b/persistence/schema.sql index 5737b27..5582e5b 100644 --- a/persistence/schema.sql +++ b/persistence/schema.sql @@ -126,6 +126,8 @@ create table videos (rowid integer primary key, height integer not null, remote_url text not null unique, local_filename text not null unique, + thumbnail_remote_url text not null unique default "missing", + thumbnail_local_filename text not null unique default "missing", is_gif boolean default 0, is_downloaded boolean default 0, diff --git a/persistence/utils_test.go b/persistence/utils_test.go index 48bb5de..3dcce0f 100644 --- a/persistence/utils_test.go +++ b/persistence/utils_test.go @@ -92,6 +92,8 @@ func create_video_from_id(id int) scraper.Video { Height: id * 5, RemoteURL: filename, LocalFilename: filename, + ThumbnailRemoteUrl: filename, + ThumbnailLocalPath: filename, IsDownloaded: false, IsGif: false, } diff --git a/persistence/versions.go b/persistence/versions.go index e48a3b8..b2f3c3f 100644 --- a/persistence/versions.go +++ b/persistence/versions.go @@ -8,7 +8,7 @@ import ( ) -const ENGINE_DATABASE_VERSION = 3 +const ENGINE_DATABASE_VERSION = 4 type VersionMismatchError struct { @@ -54,6 +54,8 @@ var MIGRATIONS = []string{ alter table tweets add column last_scraped_at integer not null default 0`, `update tombstone_types set tombstone_text = 'This Tweet is from a suspended account' where rowid = 2; insert into tombstone_types (rowid, short_name, tombstone_text) values (5, 'violated', 'This Tweet violated the Twitter Rules'), (6, 'no longer exists', 'This Tweet is from an account that no longer exists')`, +`alter table videos add column thumbnail_remote_url text not null unique default "missing"; + alter table videos add column thumbnail_local_filename text not null unique default "missing"`, } /** diff --git a/scraper/video.go b/scraper/video.go index 6fa379f..e34ff73 100644 --- a/scraper/video.go +++ b/scraper/video.go @@ -3,6 +3,7 @@ package scraper import ( "fmt" "sort" + "path" ) type VideoID int64 @@ -17,6 +18,10 @@ type Video struct { Height int RemoteURL string LocalFilename string + + ThumbnailRemoteUrl string + ThumbnailLocalPath string + IsDownloaded bool IsGif bool } @@ -34,6 +39,10 @@ func ParseAPIVideo(apiVideo APIExtendedMedia, tweet_id TweetID) Video { Height: apiVideo.OriginalInfo.Height, RemoteURL: variants[0].URL, LocalFilename: local_filename, + + ThumbnailRemoteUrl: apiVideo.MediaURLHttps, + ThumbnailLocalPath: path.Base(apiVideo.MediaURLHttps), + IsDownloaded: false, IsGif: apiVideo.Type == "animated_gif", } diff --git a/scraper/video_test.go b/scraper/video_test.go index 78fc724..0a2bc9e 100644 --- a/scraper/video_test.go +++ b/scraper/video_test.go @@ -28,10 +28,7 @@ func TestParseAPIVideo(t *testing.T) { if video.TweetID != tweet_id { t.Errorf("Expected ID of %d, got %d", tweet_id, video.TweetID) } - expected_remote_url := "https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12" - if video.RemoteURL != expected_remote_url { - t.Errorf("Expected %q, got %q", expected_remote_url, video.RemoteURL) - } + expected_height := 1280 if video.Height != expected_height { t.Errorf("Expected height %d, got %d", expected_height, video.Height) @@ -40,11 +37,22 @@ func TestParseAPIVideo(t *testing.T) { if video.Width != expected_width { t.Errorf("Expected width %d, got %d", expected_width, video.Width) } - + expected_remote_url := "https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12" + if video.RemoteURL != expected_remote_url { + t.Errorf("Expected %q, got %q", expected_remote_url, video.RemoteURL) + } expected_local_filename := "28.mp4" if video.LocalFilename != expected_local_filename { t.Errorf("Expected %q, got %q", expected_local_filename, video.LocalFilename) } + expected_thumbnail_url := "https://pbs.twimg.com/ext_tw_video_thumb/1418951950020845568/pu/img/eUTaYYfuAJ8FyjUi.jpg" + if video.ThumbnailRemoteUrl != expected_thumbnail_url { + t.Errorf("Expected %q, got %q", expected_thumbnail_url, video.ThumbnailRemoteUrl) + } + expected_thumbnail_filename := "eUTaYYfuAJ8FyjUi.jpg" + if video.ThumbnailLocalPath != expected_thumbnail_filename { + t.Errorf("Expected %q, got %q", expected_thumbnail_filename, video.ThumbnailLocalPath) + } if video.IsDownloaded { t.Errorf("Expected it not to be downloaded, but it was") }