From 28e139a3e45ced90dd7d1eb524dfb4439cbf20fd Mon Sep 17 00:00:00 2001 From: Alessio Date: Fri, 24 Dec 2021 16:26:34 -0500 Subject: [PATCH] Add video durations and view counts --- persistence/media_queries.go | 15 ++++++++------- persistence/media_queries_test.go | 3 ++- persistence/schema.sql | 2 ++ persistence/utils_test.go | 2 ++ persistence/versions.go | 4 +++- scraper/api_types.go | 6 ++++++ scraper/video.go | 24 ++++++++++++++++++++++++ scraper/video_test.go | 10 ++++++++++ 8 files changed, 57 insertions(+), 9 deletions(-) diff --git a/persistence/media_queries.go b/persistence/media_queries.go index 7369235..50b35d3 100644 --- a/persistence/media_queries.go +++ b/persistence/media_queries.go @@ -33,13 +33,14 @@ func (p Profile) SaveImage(img scraper.Image) error { */ func (p Profile) SaveVideo(vid scraper.Video) error { _, err := p.DB.Exec(` - insert into videos (id, tweet_id, width, height, remote_url, local_filename, thumbnail_remote_url, thumbnail_local_filename, is_downloaded, is_gif) - values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + insert into videos (id, tweet_id, width, height, remote_url, local_filename, thumbnail_remote_url, thumbnail_local_filename, duration, view_count, is_downloaded, is_gif) + values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) on conflict do update - set is_downloaded=(is_downloaded or ?) + set is_downloaded=(is_downloaded or ?), + view_count=max(view_count, ?) `, - vid.ID, vid.TweetID, vid.Width, vid.Height, vid.RemoteURL, vid.LocalFilename, vid.ThumbnailRemoteUrl, vid.ThumbnailLocalPath, vid.IsDownloaded, vid.IsGif, - vid.IsDownloaded, + vid.ID, vid.TweetID, vid.Width, vid.Height, vid.RemoteURL, vid.LocalFilename, vid.ThumbnailRemoteUrl, vid.ThumbnailLocalPath, vid.Duration, vid.ViewCount, vid.IsDownloaded, vid.IsGif, + vid.IsDownloaded, vid.ViewCount, ) return err } @@ -112,7 +113,7 @@ func (p Profile) GetImagesForTweet(t scraper.Tweet) (imgs []scraper.Image, err e * Get the list of videos for a tweet */ func (p Profile) GetVideosForTweet(t scraper.Tweet) (vids []scraper.Video, err error) { - stmt, err := p.DB.Prepare("select id, width, height, remote_url, local_filename, thumbnail_remote_url, thumbnail_local_filename, is_downloaded, is_gif from videos where tweet_id=?") + stmt, err := p.DB.Prepare("select id, width, height, remote_url, local_filename, thumbnail_remote_url, thumbnail_local_filename, duration, view_count, is_downloaded, is_gif from videos where tweet_id=?") if err != nil { return } @@ -123,7 +124,7 @@ func (p Profile) GetVideosForTweet(t scraper.Tweet) (vids []scraper.Video, err e } var vid scraper.Video for rows.Next() { - err = rows.Scan(&vid.ID, &vid.Width, &vid.Height, &vid.RemoteURL, &vid.LocalFilename, &vid.ThumbnailRemoteUrl, &vid.ThumbnailLocalPath, &vid.IsDownloaded, &vid.IsGif) + err = rows.Scan(&vid.ID, &vid.Width, &vid.Height, &vid.RemoteURL, &vid.LocalFilename, &vid.ThumbnailRemoteUrl, &vid.ThumbnailLocalPath, &vid.Duration, &vid.ViewCount, &vid.IsDownloaded, &vid.IsGif) if err != nil { return } diff --git a/persistence/media_queries_test.go b/persistence/media_queries_test.go index 5b28177..4b8773a 100644 --- a/persistence/media_queries_test.go +++ b/persistence/media_queries_test.go @@ -131,7 +131,7 @@ func TestSaveAndLoadVideo(t *testing.T) { } /** - * Change an Image, save the changes, reload it, and check if it comes back the same + * Change an Video, save the changes, reload it, and check if it comes back the same */ func TestModifyVideo(t *testing.T) { profile_path := "test_profiles/TestMediaQueries" @@ -145,6 +145,7 @@ func TestModifyVideo(t *testing.T) { } vid.IsDownloaded = true + vid.ViewCount = 23000 // Save the changes err := profile.SaveVideo(vid) diff --git a/persistence/schema.sql b/persistence/schema.sql index ee68910..69cf178 100644 --- a/persistence/schema.sql +++ b/persistence/schema.sql @@ -128,6 +128,8 @@ create table videos (rowid integer primary key, local_filename text not null unique, thumbnail_remote_url text not null default "missing", thumbnail_local_filename text not null default "missing", + duration integer not null default 0, + view_count integer not null default 0, is_gif boolean default 0, is_downloaded boolean default 0, diff --git a/persistence/utils_test.go b/persistence/utils_test.go index 3dcce0f..36d9af2 100644 --- a/persistence/utils_test.go +++ b/persistence/utils_test.go @@ -94,6 +94,8 @@ func create_video_from_id(id int) scraper.Video { LocalFilename: filename, ThumbnailRemoteUrl: filename, ThumbnailLocalPath: filename, + Duration: 10000, + ViewCount: 200, IsDownloaded: false, IsGif: false, } diff --git a/persistence/versions.go b/persistence/versions.go index b9d66e0..5fa7e7a 100644 --- a/persistence/versions.go +++ b/persistence/versions.go @@ -8,7 +8,7 @@ import ( ) -const ENGINE_DATABASE_VERSION = 4 +const ENGINE_DATABASE_VERSION = 5 type VersionMismatchError struct { @@ -56,6 +56,8 @@ var MIGRATIONS = []string{ insert into tombstone_types (rowid, short_name, tombstone_text) values (5, 'violated', 'This Tweet violated the Twitter Rules'), (6, 'no longer exists', 'This Tweet is from an account that no longer exists')`, `alter table videos add column thumbnail_remote_url text not null default "missing"; alter table videos add column thumbnail_local_filename text not null default "missing"`, +`alter table videos add column duration integer not null default 0; + alter table videos add column view_count integer not null default 0`, } /** diff --git a/scraper/api_types.go b/scraper/api_types.go index a15ffe5..903898f 100644 --- a/scraper/api_types.go +++ b/scraper/api_types.go @@ -35,11 +35,17 @@ type APIExtendedMedia struct { Type string `json:"type"` VideoInfo struct { Variants SortableVariants `json:"variants"` + Duration int `json:"duration_millis"` } `json:"video_info"` OriginalInfo struct { Width int `json:"width"` Height int `json:"height"` } `json:"original_info"` + Ext struct { + MediaStats struct { + R interface{} `json:"r"` + } `json:"mediaStats"` + } `json:"ext"` } type APICard struct { diff --git a/scraper/video.go b/scraper/video.go index e34ff73..e20399a 100644 --- a/scraper/video.go +++ b/scraper/video.go @@ -4,6 +4,7 @@ import ( "fmt" "sort" "path" + "reflect" ) type VideoID int64 @@ -21,6 +22,8 @@ type Video struct { ThumbnailRemoteUrl string ThumbnailLocalPath string + Duration int // milliseconds + ViewCount int IsDownloaded bool IsGif bool @@ -30,6 +33,25 @@ func ParseAPIVideo(apiVideo APIExtendedMedia, tweet_id TweetID) Video { variants := apiVideo.VideoInfo.Variants sort.Sort(variants) + var view_count int + + r := apiVideo.Ext.MediaStats.R + + switch r.(type) { + case string: + view_count = 0 + case map[string]interface{}: + OK_entry, ok := r.(map[string]interface{})["ok"] + if !ok { + panic("No 'ok' value found in the R!") + } + view_count_str, ok := OK_entry.(map[string]interface{})["viewCount"] + view_count = int_or_panic(view_count_str.(string)) + if !ok { + panic("No 'viewCount' value found in the OK!") + } + } + local_filename := fmt.Sprintf("%d.mp4", tweet_id) return Video{ @@ -42,6 +64,8 @@ func ParseAPIVideo(apiVideo APIExtendedMedia, tweet_id TweetID) Video { ThumbnailRemoteUrl: apiVideo.MediaURLHttps, ThumbnailLocalPath: path.Base(apiVideo.MediaURLHttps), + Duration: apiVideo.VideoInfo.Duration, + ViewCount: view_count, IsDownloaded: false, IsGif: apiVideo.Type == "animated_gif", diff --git a/scraper/video_test.go b/scraper/video_test.go index 0a2bc9e..dc261a6 100644 --- a/scraper/video_test.go +++ b/scraper/video_test.go @@ -53,6 +53,16 @@ func TestParseAPIVideo(t *testing.T) { if video.ThumbnailLocalPath != expected_thumbnail_filename { t.Errorf("Expected %q, got %q", expected_thumbnail_filename, video.ThumbnailLocalPath) } + expected_view_count := 275952 + if video.ViewCount != expected_view_count { + t.Errorf("Expected view count %d, got %d", expected_view_count, video.ViewCount) + } + expected_duration := 88300 + if video.Duration != expected_duration { + t.Errorf("Expected duration %d, got %d", expected_duration, video.Duration) + } + + if video.IsDownloaded { t.Errorf("Expected it not to be downloaded, but it was") }