Add video thumbnails
This commit is contained in:
parent
692d3de564
commit
fbbb6834de
@ -63,11 +63,13 @@ test $(sqlite3 twitter.db "select count(*) from videos where tweet_id = 14189716
|
|||||||
test $(sqlite3 twitter.db "select count(*) from videos where tweet_id = 1418971605674467340 and is_downloaded = 1") = "0"
|
test $(sqlite3 twitter.db "select count(*) from videos where tweet_id = 1418971605674467340 and is_downloaded = 1") = "0"
|
||||||
test $(sqlite3 twitter.db "select is_content_downloaded from tweets where id = 1418971605674467340") = "0"
|
test $(sqlite3 twitter.db "select is_content_downloaded from tweets where id = 1418971605674467340") = "0"
|
||||||
test $(find videos | wc -l) = "1"
|
test $(find videos | wc -l) = "1"
|
||||||
|
test $(find video_thumbnails | wc -l) = "1"
|
||||||
tw download_tweet_content 1418971605674467340
|
tw download_tweet_content 1418971605674467340
|
||||||
test $(sqlite3 twitter.db "select count(*) from videos where tweet_id = 1418971605674467340 and is_downloaded = 0") = "0"
|
test $(sqlite3 twitter.db "select count(*) from videos where tweet_id = 1418971605674467340 and is_downloaded = 0") = "0"
|
||||||
test $(sqlite3 twitter.db "select count(*) from videos where tweet_id = 1418971605674467340 and is_downloaded = 1") = "1"
|
test $(sqlite3 twitter.db "select count(*) from videos where tweet_id = 1418971605674467340 and is_downloaded = 1") = "1"
|
||||||
test $(sqlite3 twitter.db "select is_content_downloaded from tweets where id = 1418971605674467340") = "1"
|
test $(sqlite3 twitter.db "select is_content_downloaded from tweets where id = 1418971605674467340") = "1"
|
||||||
test $(find videos | wc -l) = "2"
|
test $(find videos | wc -l) = "2"
|
||||||
|
test $(find video_thumbnails | wc -l) = "2"
|
||||||
|
|
||||||
# Try to double-download it
|
# Try to double-download it
|
||||||
tw fetch_tweet_only https://twitter.com/DiamondChariots/status/1418971605674467340
|
tw fetch_tweet_only https://twitter.com/DiamondChariots/status/1418971605674467340
|
||||||
|
@ -61,14 +61,23 @@ func (p Profile) download_tweet_image(img *scraper.Image, downloader MediaDownlo
|
|||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Downloads an Video, and if successful, marks it as downloaded in the DB
|
* Downloads a Video and its thumbnail, and if successful, marks it as downloaded in the DB
|
||||||
*/
|
*/
|
||||||
func (p Profile) download_tweet_video(v *scraper.Video, downloader MediaDownloader) error {
|
func (p Profile) download_tweet_video(v *scraper.Video, downloader MediaDownloader) error {
|
||||||
|
// Download the video
|
||||||
outfile := path.Join(p.ProfileDir, "videos", v.LocalFilename)
|
outfile := path.Join(p.ProfileDir, "videos", v.LocalFilename)
|
||||||
err := downloader.Curl(v.RemoteURL, outfile)
|
err := downloader.Curl(v.RemoteURL, outfile)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Download the thumbnail
|
||||||
|
outfile = path.Join(p.ProfileDir, "video_thumbnails", v.ThumbnailLocalPath)
|
||||||
|
err = downloader.Curl(v.ThumbnailRemoteUrl, outfile)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
v.IsDownloaded = true
|
v.IsDownloaded = true
|
||||||
return p.SaveVideo(*v)
|
return p.SaveVideo(*v)
|
||||||
}
|
}
|
||||||
|
@ -33,12 +33,12 @@ func (p Profile) SaveImage(img scraper.Image) error {
|
|||||||
*/
|
*/
|
||||||
func (p Profile) SaveVideo(vid scraper.Video) error {
|
func (p Profile) SaveVideo(vid scraper.Video) error {
|
||||||
_, err := p.DB.Exec(`
|
_, err := p.DB.Exec(`
|
||||||
insert into videos (id, tweet_id, width, height, remote_url, local_filename, is_downloaded, is_gif)
|
insert into videos (id, tweet_id, width, height, remote_url, local_filename, thumbnail_remote_url, thumbnail_local_filename, is_downloaded, is_gif)
|
||||||
values (?, ?, ?, ?, ?, ?, ?, ?)
|
values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||||
on conflict do update
|
on conflict do update
|
||||||
set is_downloaded=(is_downloaded or ?)
|
set is_downloaded=(is_downloaded or ?)
|
||||||
`,
|
`,
|
||||||
vid.ID, vid.TweetID, vid.Width, vid.Height, vid.RemoteURL, vid.LocalFilename, vid.IsDownloaded, vid.IsGif,
|
vid.ID, vid.TweetID, vid.Width, vid.Height, vid.RemoteURL, vid.LocalFilename, vid.ThumbnailRemoteUrl, vid.ThumbnailLocalPath, vid.IsDownloaded, vid.IsGif,
|
||||||
vid.IsDownloaded,
|
vid.IsDownloaded,
|
||||||
)
|
)
|
||||||
return err
|
return err
|
||||||
@ -112,7 +112,7 @@ func (p Profile) GetImagesForTweet(t scraper.Tweet) (imgs []scraper.Image, err e
|
|||||||
* Get the list of videos for a tweet
|
* Get the list of videos for a tweet
|
||||||
*/
|
*/
|
||||||
func (p Profile) GetVideosForTweet(t scraper.Tweet) (vids []scraper.Video, err error) {
|
func (p Profile) GetVideosForTweet(t scraper.Tweet) (vids []scraper.Video, err error) {
|
||||||
stmt, err := p.DB.Prepare("select id, width, height, remote_url, local_filename, is_downloaded, is_gif from videos where tweet_id=?")
|
stmt, err := p.DB.Prepare("select id, width, height, remote_url, local_filename, thumbnail_remote_url, thumbnail_local_filename, is_downloaded, is_gif from videos where tweet_id=?")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@ -123,7 +123,7 @@ func (p Profile) GetVideosForTweet(t scraper.Tweet) (vids []scraper.Video, err e
|
|||||||
}
|
}
|
||||||
var vid scraper.Video
|
var vid scraper.Video
|
||||||
for rows.Next() {
|
for rows.Next() {
|
||||||
err = rows.Scan(&vid.ID, &vid.Width, &vid.Height, &vid.RemoteURL, &vid.LocalFilename, &vid.IsDownloaded, &vid.IsGif)
|
err = rows.Scan(&vid.ID, &vid.Width, &vid.Height, &vid.RemoteURL, &vid.LocalFilename, &vid.ThumbnailRemoteUrl, &vid.ThumbnailLocalPath, &vid.IsDownloaded, &vid.IsGif)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -64,6 +64,7 @@ func NewProfile(target_dir string) (Profile, error) {
|
|||||||
link_thumbnails_dir := path.Join(target_dir, "link_preview_images")
|
link_thumbnails_dir := path.Join(target_dir, "link_preview_images")
|
||||||
images_dir := path.Join(target_dir, "images")
|
images_dir := path.Join(target_dir, "images")
|
||||||
videos_dir := path.Join(target_dir, "videos")
|
videos_dir := path.Join(target_dir, "videos")
|
||||||
|
video_thumbnails_dir := path.Join(target_dir, "video_thumbnails")
|
||||||
|
|
||||||
// Create the directory
|
// Create the directory
|
||||||
fmt.Printf("Creating new profile: %s\n", target_dir)
|
fmt.Printf("Creating new profile: %s\n", target_dir)
|
||||||
@ -131,6 +132,13 @@ func NewProfile(target_dir string) (Profile, error) {
|
|||||||
return Profile{}, err
|
return Profile{}, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Create `video_thumbnails`
|
||||||
|
fmt.Printf("Creating............. %s/\n", video_thumbnails_dir)
|
||||||
|
err = os.Mkdir(video_thumbnails_dir, os.FileMode(0755))
|
||||||
|
if err != nil {
|
||||||
|
return Profile{}, err
|
||||||
|
}
|
||||||
|
|
||||||
return Profile{target_dir, UsersList{}, settings, db}, nil
|
return Profile{target_dir, UsersList{}, settings, db}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -83,8 +83,8 @@ func TestNewProfile(t *testing.T) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
if len(contents) != 7 {
|
if len(contents) != 8 {
|
||||||
t.Fatalf("Expected 7 contents, got %d instead", len(contents))
|
t.Fatalf("Expected 8 contents, got %d instead", len(contents))
|
||||||
}
|
}
|
||||||
|
|
||||||
expected_files := []struct {
|
expected_files := []struct {
|
||||||
@ -97,6 +97,7 @@ func TestNewProfile(t *testing.T) {
|
|||||||
{"settings.yaml", false},
|
{"settings.yaml", false},
|
||||||
{"twitter.db", false},
|
{"twitter.db", false},
|
||||||
{"users.yaml", false},
|
{"users.yaml", false},
|
||||||
|
{"video_thumbnails", true},
|
||||||
{"videos", true},
|
{"videos", true},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -126,6 +126,8 @@ create table videos (rowid integer primary key,
|
|||||||
height integer not null,
|
height integer not null,
|
||||||
remote_url text not null unique,
|
remote_url text not null unique,
|
||||||
local_filename text not null unique,
|
local_filename text not null unique,
|
||||||
|
thumbnail_remote_url text not null unique default "missing",
|
||||||
|
thumbnail_local_filename text not null unique default "missing",
|
||||||
is_gif boolean default 0,
|
is_gif boolean default 0,
|
||||||
is_downloaded boolean default 0,
|
is_downloaded boolean default 0,
|
||||||
|
|
||||||
|
@ -92,6 +92,8 @@ func create_video_from_id(id int) scraper.Video {
|
|||||||
Height: id * 5,
|
Height: id * 5,
|
||||||
RemoteURL: filename,
|
RemoteURL: filename,
|
||||||
LocalFilename: filename,
|
LocalFilename: filename,
|
||||||
|
ThumbnailRemoteUrl: filename,
|
||||||
|
ThumbnailLocalPath: filename,
|
||||||
IsDownloaded: false,
|
IsDownloaded: false,
|
||||||
IsGif: false,
|
IsGif: false,
|
||||||
}
|
}
|
||||||
|
@ -8,7 +8,7 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
const ENGINE_DATABASE_VERSION = 3
|
const ENGINE_DATABASE_VERSION = 4
|
||||||
|
|
||||||
|
|
||||||
type VersionMismatchError struct {
|
type VersionMismatchError struct {
|
||||||
@ -54,6 +54,8 @@ var MIGRATIONS = []string{
|
|||||||
alter table tweets add column last_scraped_at integer not null default 0`,
|
alter table tweets add column last_scraped_at integer not null default 0`,
|
||||||
`update tombstone_types set tombstone_text = 'This Tweet is from a suspended account' where rowid = 2;
|
`update tombstone_types set tombstone_text = 'This Tweet is from a suspended account' where rowid = 2;
|
||||||
insert into tombstone_types (rowid, short_name, tombstone_text) values (5, 'violated', 'This Tweet violated the Twitter Rules'), (6, 'no longer exists', 'This Tweet is from an account that no longer exists')`,
|
insert into tombstone_types (rowid, short_name, tombstone_text) values (5, 'violated', 'This Tweet violated the Twitter Rules'), (6, 'no longer exists', 'This Tweet is from an account that no longer exists')`,
|
||||||
|
`alter table videos add column thumbnail_remote_url text not null unique default "missing";
|
||||||
|
alter table videos add column thumbnail_local_filename text not null unique default "missing"`,
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -3,6 +3,7 @@ package scraper
|
|||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"sort"
|
"sort"
|
||||||
|
"path"
|
||||||
)
|
)
|
||||||
|
|
||||||
type VideoID int64
|
type VideoID int64
|
||||||
@ -17,6 +18,10 @@ type Video struct {
|
|||||||
Height int
|
Height int
|
||||||
RemoteURL string
|
RemoteURL string
|
||||||
LocalFilename string
|
LocalFilename string
|
||||||
|
|
||||||
|
ThumbnailRemoteUrl string
|
||||||
|
ThumbnailLocalPath string
|
||||||
|
|
||||||
IsDownloaded bool
|
IsDownloaded bool
|
||||||
IsGif bool
|
IsGif bool
|
||||||
}
|
}
|
||||||
@ -34,6 +39,10 @@ func ParseAPIVideo(apiVideo APIExtendedMedia, tweet_id TweetID) Video {
|
|||||||
Height: apiVideo.OriginalInfo.Height,
|
Height: apiVideo.OriginalInfo.Height,
|
||||||
RemoteURL: variants[0].URL,
|
RemoteURL: variants[0].URL,
|
||||||
LocalFilename: local_filename,
|
LocalFilename: local_filename,
|
||||||
|
|
||||||
|
ThumbnailRemoteUrl: apiVideo.MediaURLHttps,
|
||||||
|
ThumbnailLocalPath: path.Base(apiVideo.MediaURLHttps),
|
||||||
|
|
||||||
IsDownloaded: false,
|
IsDownloaded: false,
|
||||||
IsGif: apiVideo.Type == "animated_gif",
|
IsGif: apiVideo.Type == "animated_gif",
|
||||||
}
|
}
|
||||||
|
@ -28,10 +28,7 @@ func TestParseAPIVideo(t *testing.T) {
|
|||||||
if video.TweetID != tweet_id {
|
if video.TweetID != tweet_id {
|
||||||
t.Errorf("Expected ID of %d, got %d", tweet_id, video.TweetID)
|
t.Errorf("Expected ID of %d, got %d", tweet_id, video.TweetID)
|
||||||
}
|
}
|
||||||
expected_remote_url := "https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12"
|
|
||||||
if video.RemoteURL != expected_remote_url {
|
|
||||||
t.Errorf("Expected %q, got %q", expected_remote_url, video.RemoteURL)
|
|
||||||
}
|
|
||||||
expected_height := 1280
|
expected_height := 1280
|
||||||
if video.Height != expected_height {
|
if video.Height != expected_height {
|
||||||
t.Errorf("Expected height %d, got %d", expected_height, video.Height)
|
t.Errorf("Expected height %d, got %d", expected_height, video.Height)
|
||||||
@ -40,11 +37,22 @@ func TestParseAPIVideo(t *testing.T) {
|
|||||||
if video.Width != expected_width {
|
if video.Width != expected_width {
|
||||||
t.Errorf("Expected width %d, got %d", expected_width, video.Width)
|
t.Errorf("Expected width %d, got %d", expected_width, video.Width)
|
||||||
}
|
}
|
||||||
|
expected_remote_url := "https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12"
|
||||||
|
if video.RemoteURL != expected_remote_url {
|
||||||
|
t.Errorf("Expected %q, got %q", expected_remote_url, video.RemoteURL)
|
||||||
|
}
|
||||||
expected_local_filename := "28.mp4"
|
expected_local_filename := "28.mp4"
|
||||||
if video.LocalFilename != expected_local_filename {
|
if video.LocalFilename != expected_local_filename {
|
||||||
t.Errorf("Expected %q, got %q", expected_local_filename, video.LocalFilename)
|
t.Errorf("Expected %q, got %q", expected_local_filename, video.LocalFilename)
|
||||||
}
|
}
|
||||||
|
expected_thumbnail_url := "https://pbs.twimg.com/ext_tw_video_thumb/1418951950020845568/pu/img/eUTaYYfuAJ8FyjUi.jpg"
|
||||||
|
if video.ThumbnailRemoteUrl != expected_thumbnail_url {
|
||||||
|
t.Errorf("Expected %q, got %q", expected_thumbnail_url, video.ThumbnailRemoteUrl)
|
||||||
|
}
|
||||||
|
expected_thumbnail_filename := "eUTaYYfuAJ8FyjUi.jpg"
|
||||||
|
if video.ThumbnailLocalPath != expected_thumbnail_filename {
|
||||||
|
t.Errorf("Expected %q, got %q", expected_thumbnail_filename, video.ThumbnailLocalPath)
|
||||||
|
}
|
||||||
if video.IsDownloaded {
|
if video.IsDownloaded {
|
||||||
t.Errorf("Expected it not to be downloaded, but it was")
|
t.Errorf("Expected it not to be downloaded, but it was")
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user