Add video thumbnails
This commit is contained in:
parent
692d3de564
commit
fbbb6834de
@ -62,12 +62,14 @@ test $(sqlite3 twitter.db "select count(*) from videos") = "1"
|
||||
test $(sqlite3 twitter.db "select count(*) from videos where tweet_id = 1418971605674467340 and is_downloaded = 0") = "1"
|
||||
test $(sqlite3 twitter.db "select count(*) from videos where tweet_id = 1418971605674467340 and is_downloaded = 1") = "0"
|
||||
test $(sqlite3 twitter.db "select is_content_downloaded from tweets where id = 1418971605674467340") = "0"
|
||||
test $(find videos| wc -l) = "1"
|
||||
test $(find videos | wc -l) = "1"
|
||||
test $(find video_thumbnails | wc -l) = "1"
|
||||
tw download_tweet_content 1418971605674467340
|
||||
test $(sqlite3 twitter.db "select count(*) from videos where tweet_id = 1418971605674467340 and is_downloaded = 0") = "0"
|
||||
test $(sqlite3 twitter.db "select count(*) from videos where tweet_id = 1418971605674467340 and is_downloaded = 1") = "1"
|
||||
test $(sqlite3 twitter.db "select is_content_downloaded from tweets where id = 1418971605674467340") = "1"
|
||||
test $(find videos | wc -l) = "2"
|
||||
test $(find video_thumbnails | wc -l) = "2"
|
||||
|
||||
# Try to double-download it
|
||||
tw fetch_tweet_only https://twitter.com/DiamondChariots/status/1418971605674467340
|
||||
|
@ -61,14 +61,23 @@ func (p Profile) download_tweet_image(img *scraper.Image, downloader MediaDownlo
|
||||
|
||||
|
||||
/**
|
||||
* Downloads an Video, and if successful, marks it as downloaded in the DB
|
||||
* Downloads a Video and its thumbnail, and if successful, marks it as downloaded in the DB
|
||||
*/
|
||||
func (p Profile) download_tweet_video(v *scraper.Video, downloader MediaDownloader) error {
|
||||
// Download the video
|
||||
outfile := path.Join(p.ProfileDir, "videos", v.LocalFilename)
|
||||
err := downloader.Curl(v.RemoteURL, outfile)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Download the thumbnail
|
||||
outfile = path.Join(p.ProfileDir, "video_thumbnails", v.ThumbnailLocalPath)
|
||||
err = downloader.Curl(v.ThumbnailRemoteUrl, outfile)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
v.IsDownloaded = true
|
||||
return p.SaveVideo(*v)
|
||||
}
|
||||
|
@ -33,12 +33,12 @@ func (p Profile) SaveImage(img scraper.Image) error {
|
||||
*/
|
||||
func (p Profile) SaveVideo(vid scraper.Video) error {
|
||||
_, err := p.DB.Exec(`
|
||||
insert into videos (id, tweet_id, width, height, remote_url, local_filename, is_downloaded, is_gif)
|
||||
values (?, ?, ?, ?, ?, ?, ?, ?)
|
||||
insert into videos (id, tweet_id, width, height, remote_url, local_filename, thumbnail_remote_url, thumbnail_local_filename, is_downloaded, is_gif)
|
||||
values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
on conflict do update
|
||||
set is_downloaded=(is_downloaded or ?)
|
||||
`,
|
||||
vid.ID, vid.TweetID, vid.Width, vid.Height, vid.RemoteURL, vid.LocalFilename, vid.IsDownloaded, vid.IsGif,
|
||||
vid.ID, vid.TweetID, vid.Width, vid.Height, vid.RemoteURL, vid.LocalFilename, vid.ThumbnailRemoteUrl, vid.ThumbnailLocalPath, vid.IsDownloaded, vid.IsGif,
|
||||
vid.IsDownloaded,
|
||||
)
|
||||
return err
|
||||
@ -112,7 +112,7 @@ func (p Profile) GetImagesForTweet(t scraper.Tweet) (imgs []scraper.Image, err e
|
||||
* Get the list of videos for a tweet
|
||||
*/
|
||||
func (p Profile) GetVideosForTweet(t scraper.Tweet) (vids []scraper.Video, err error) {
|
||||
stmt, err := p.DB.Prepare("select id, width, height, remote_url, local_filename, is_downloaded, is_gif from videos where tweet_id=?")
|
||||
stmt, err := p.DB.Prepare("select id, width, height, remote_url, local_filename, thumbnail_remote_url, thumbnail_local_filename, is_downloaded, is_gif from videos where tweet_id=?")
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
@ -123,7 +123,7 @@ func (p Profile) GetVideosForTweet(t scraper.Tweet) (vids []scraper.Video, err e
|
||||
}
|
||||
var vid scraper.Video
|
||||
for rows.Next() {
|
||||
err = rows.Scan(&vid.ID, &vid.Width, &vid.Height, &vid.RemoteURL, &vid.LocalFilename, &vid.IsDownloaded, &vid.IsGif)
|
||||
err = rows.Scan(&vid.ID, &vid.Width, &vid.Height, &vid.RemoteURL, &vid.LocalFilename, &vid.ThumbnailRemoteUrl, &vid.ThumbnailLocalPath, &vid.IsDownloaded, &vid.IsGif)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
@ -64,6 +64,7 @@ func NewProfile(target_dir string) (Profile, error) {
|
||||
link_thumbnails_dir := path.Join(target_dir, "link_preview_images")
|
||||
images_dir := path.Join(target_dir, "images")
|
||||
videos_dir := path.Join(target_dir, "videos")
|
||||
video_thumbnails_dir := path.Join(target_dir, "video_thumbnails")
|
||||
|
||||
// Create the directory
|
||||
fmt.Printf("Creating new profile: %s\n", target_dir)
|
||||
@ -131,6 +132,13 @@ func NewProfile(target_dir string) (Profile, error) {
|
||||
return Profile{}, err
|
||||
}
|
||||
|
||||
// Create `video_thumbnails`
|
||||
fmt.Printf("Creating............. %s/\n", video_thumbnails_dir)
|
||||
err = os.Mkdir(video_thumbnails_dir, os.FileMode(0755))
|
||||
if err != nil {
|
||||
return Profile{}, err
|
||||
}
|
||||
|
||||
return Profile{target_dir, UsersList{}, settings, db}, nil
|
||||
}
|
||||
|
||||
|
@ -83,8 +83,8 @@ func TestNewProfile(t *testing.T) {
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
if len(contents) != 7 {
|
||||
t.Fatalf("Expected 7 contents, got %d instead", len(contents))
|
||||
if len(contents) != 8 {
|
||||
t.Fatalf("Expected 8 contents, got %d instead", len(contents))
|
||||
}
|
||||
|
||||
expected_files := []struct {
|
||||
@ -97,6 +97,7 @@ func TestNewProfile(t *testing.T) {
|
||||
{"settings.yaml", false},
|
||||
{"twitter.db", false},
|
||||
{"users.yaml", false},
|
||||
{"video_thumbnails", true},
|
||||
{"videos", true},
|
||||
}
|
||||
|
||||
|
@ -126,6 +126,8 @@ create table videos (rowid integer primary key,
|
||||
height integer not null,
|
||||
remote_url text not null unique,
|
||||
local_filename text not null unique,
|
||||
thumbnail_remote_url text not null unique default "missing",
|
||||
thumbnail_local_filename text not null unique default "missing",
|
||||
is_gif boolean default 0,
|
||||
is_downloaded boolean default 0,
|
||||
|
||||
|
@ -92,6 +92,8 @@ func create_video_from_id(id int) scraper.Video {
|
||||
Height: id * 5,
|
||||
RemoteURL: filename,
|
||||
LocalFilename: filename,
|
||||
ThumbnailRemoteUrl: filename,
|
||||
ThumbnailLocalPath: filename,
|
||||
IsDownloaded: false,
|
||||
IsGif: false,
|
||||
}
|
||||
|
@ -8,7 +8,7 @@ import (
|
||||
)
|
||||
|
||||
|
||||
const ENGINE_DATABASE_VERSION = 3
|
||||
const ENGINE_DATABASE_VERSION = 4
|
||||
|
||||
|
||||
type VersionMismatchError struct {
|
||||
@ -54,6 +54,8 @@ var MIGRATIONS = []string{
|
||||
alter table tweets add column last_scraped_at integer not null default 0`,
|
||||
`update tombstone_types set tombstone_text = 'This Tweet is from a suspended account' where rowid = 2;
|
||||
insert into tombstone_types (rowid, short_name, tombstone_text) values (5, 'violated', 'This Tweet violated the Twitter Rules'), (6, 'no longer exists', 'This Tweet is from an account that no longer exists')`,
|
||||
`alter table videos add column thumbnail_remote_url text not null unique default "missing";
|
||||
alter table videos add column thumbnail_local_filename text not null unique default "missing"`,
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -3,6 +3,7 @@ package scraper
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"path"
|
||||
)
|
||||
|
||||
type VideoID int64
|
||||
@ -17,6 +18,10 @@ type Video struct {
|
||||
Height int
|
||||
RemoteURL string
|
||||
LocalFilename string
|
||||
|
||||
ThumbnailRemoteUrl string
|
||||
ThumbnailLocalPath string
|
||||
|
||||
IsDownloaded bool
|
||||
IsGif bool
|
||||
}
|
||||
@ -34,6 +39,10 @@ func ParseAPIVideo(apiVideo APIExtendedMedia, tweet_id TweetID) Video {
|
||||
Height: apiVideo.OriginalInfo.Height,
|
||||
RemoteURL: variants[0].URL,
|
||||
LocalFilename: local_filename,
|
||||
|
||||
ThumbnailRemoteUrl: apiVideo.MediaURLHttps,
|
||||
ThumbnailLocalPath: path.Base(apiVideo.MediaURLHttps),
|
||||
|
||||
IsDownloaded: false,
|
||||
IsGif: apiVideo.Type == "animated_gif",
|
||||
}
|
||||
|
@ -28,10 +28,7 @@ func TestParseAPIVideo(t *testing.T) {
|
||||
if video.TweetID != tweet_id {
|
||||
t.Errorf("Expected ID of %d, got %d", tweet_id, video.TweetID)
|
||||
}
|
||||
expected_remote_url := "https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12"
|
||||
if video.RemoteURL != expected_remote_url {
|
||||
t.Errorf("Expected %q, got %q", expected_remote_url, video.RemoteURL)
|
||||
}
|
||||
|
||||
expected_height := 1280
|
||||
if video.Height != expected_height {
|
||||
t.Errorf("Expected height %d, got %d", expected_height, video.Height)
|
||||
@ -40,11 +37,22 @@ func TestParseAPIVideo(t *testing.T) {
|
||||
if video.Width != expected_width {
|
||||
t.Errorf("Expected width %d, got %d", expected_width, video.Width)
|
||||
}
|
||||
|
||||
expected_remote_url := "https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12"
|
||||
if video.RemoteURL != expected_remote_url {
|
||||
t.Errorf("Expected %q, got %q", expected_remote_url, video.RemoteURL)
|
||||
}
|
||||
expected_local_filename := "28.mp4"
|
||||
if video.LocalFilename != expected_local_filename {
|
||||
t.Errorf("Expected %q, got %q", expected_local_filename, video.LocalFilename)
|
||||
}
|
||||
expected_thumbnail_url := "https://pbs.twimg.com/ext_tw_video_thumb/1418951950020845568/pu/img/eUTaYYfuAJ8FyjUi.jpg"
|
||||
if video.ThumbnailRemoteUrl != expected_thumbnail_url {
|
||||
t.Errorf("Expected %q, got %q", expected_thumbnail_url, video.ThumbnailRemoteUrl)
|
||||
}
|
||||
expected_thumbnail_filename := "eUTaYYfuAJ8FyjUi.jpg"
|
||||
if video.ThumbnailLocalPath != expected_thumbnail_filename {
|
||||
t.Errorf("Expected %q, got %q", expected_thumbnail_filename, video.ThumbnailLocalPath)
|
||||
}
|
||||
if video.IsDownloaded {
|
||||
t.Errorf("Expected it not to be downloaded, but it was")
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user