diff --git a/persistence/media_queries.go b/persistence/media_queries.go index ec64f5a..da4fc76 100644 --- a/persistence/media_queries.go +++ b/persistence/media_queries.go @@ -1,7 +1,7 @@ package persistence import ( - "database/sql" + "fmt" "offline_twitter/scraper" ) @@ -26,22 +26,22 @@ func (p Profile) SaveImage(img scraper.Image) error { } /** - * Save a Video. If it's a new Video (no rowid), does an insert; otherwise, does an update. + * Save a Video * * args: * - img: the Video to save - * - * returns: - * - the rowid */ -func (p Profile) SaveVideo(vid scraper.Video) (sql.Result, error) { - if vid.ID == 0 { - // New image - return p.DB.Exec("insert into videos (tweet_id, filename) values (?, ?) on conflict do nothing", vid.TweetID, vid.Filename) - } else { - // Updating an existing image - return p.DB.Exec("update videos set filename=?, is_downloaded=? where rowid=?", vid.Filename, vid.IsDownloaded, vid.ID) - } +func (p Profile) SaveVideo(vid scraper.Video) error { + _, err := p.DB.Exec(` + insert into videos (id, tweet_id, filename, is_downloaded) + values (?, ?, ?, ?) + on conflict do update + set is_downloaded=? + `, + vid.ID, vid.TweetID, vid.Filename, vid.IsDownloaded, + vid.IsDownloaded, + ) + return err } /** @@ -75,7 +75,7 @@ func (p Profile) GetImagesForTweet(t scraper.Tweet) (imgs []scraper.Image, err e * Get the list of videos for a tweet */ func (p Profile) GetVideosForTweet(t scraper.Tweet) (vids []scraper.Video, err error) { - stmt, err := p.DB.Prepare("select rowid, filename, is_downloaded from videos where tweet_id=?") + stmt, err := p.DB.Prepare("select id, filename, is_downloaded from videos where tweet_id=?") if err != nil { return } diff --git a/persistence/media_queries_test.go b/persistence/media_queries_test.go index 8d3a7cc..49b6665 100644 --- a/persistence/media_queries_test.go +++ b/persistence/media_queries_test.go @@ -3,7 +3,6 @@ package persistence_test import ( "testing" "math/rand" - "fmt" "time" "github.com/go-test/deep" @@ -101,19 +100,14 @@ func TestSaveAndLoadVideo(t *testing.T) { // Create a fresh Video to test on rand.Seed(time.Now().UnixNano()) - filename := fmt.Sprint(rand.Int()) - vid := scraper.Video{TweetID: tweet.ID, Filename: filename, IsDownloaded: false} + vid := create_video_from_id(rand.Int()) + vid.TweetID = tweet.ID // Save the Video - result, err := profile.SaveVideo(vid) + err := profile.SaveVideo(vid) if err != nil { t.Fatalf("Failed to save the video: %s", err.Error()) } - last_insert, err := result.LastInsertId() - if err != nil { - t.Fatalf("last insert??? %s", err.Error()) - } - vid.ID = scraper.VideoID(last_insert) // Reload the Video vids, err := profile.GetVideosForTweet(tweet) @@ -145,25 +139,17 @@ func TestModifyVideo(t *testing.T) { tweet := create_stable_tweet() vid := tweet.Videos[0] - if vid.ID != 1 { - t.Fatalf("Got the wrong video back: wanted ID %d, got %d", 1, vid.ID) + if vid.ID != -1 { + t.Fatalf("Got the wrong video back: wanted ID %d, got %d", -1, vid.ID) } - vid.Filename = "local/sdfjk.jpg" vid.IsDownloaded = true // Save the changes - result, err := profile.SaveVideo(vid) + err := profile.SaveVideo(vid) if err != nil { t.Error(err) } - rows_affected, err := result.RowsAffected() - if err != nil { - t.Error(err) - } - if rows_affected != 1 { - t.Errorf("Expected 1 row changed, but got %d", rows_affected) - } // Reload it vids, err := profile.GetVideosForTweet(tweet) @@ -172,7 +158,7 @@ func TestModifyVideo(t *testing.T) { } new_vid := vids[0] if new_vid.ID != vid.ID { - t.Fatalf("Got the wrong video back: wanted ID %d, got %d", 1, new_vid.ID) + t.Fatalf("Got the wrong video back: wanted ID %d, got %d", -1, new_vid.ID) } if diff := deep.Equal(vid, new_vid); diff != nil { diff --git a/persistence/schema.sql b/persistence/schema.sql index 13f9ada..f65f9bc 100644 --- a/persistence/schema.sql +++ b/persistence/schema.sql @@ -68,6 +68,7 @@ create table images (rowid integer primary key, ); create table videos (rowid integer primary key, + id integer unique not null check(typeof(id) = 'integer'), tweet_id integer not null, filename text not null unique, is_downloaded boolean default 0, diff --git a/persistence/tweet_queries.go b/persistence/tweet_queries.go index 0f68e60..0e9d28f 100644 --- a/persistence/tweet_queries.go +++ b/persistence/tweet_queries.go @@ -45,7 +45,7 @@ func (p Profile) SaveTweet(t scraper.Tweet) error { } } for _, video := range t.Videos { - _, err := p.SaveVideo(video) + err := p.SaveVideo(video) if err != nil { return err } diff --git a/persistence/tweet_queries_test.go b/persistence/tweet_queries_test.go index 0c13b2f..878452b 100644 --- a/persistence/tweet_queries_test.go +++ b/persistence/tweet_queries_test.go @@ -28,10 +28,6 @@ func TestSaveAndLoadTweet(t *testing.T) { t.Fatalf("Failed to load the tweet: %s", err.Error()) } - for i := range tweet.Videos { - tweet.Videos[i].ID = new_tweet.Videos[i].ID - } - if diff := deep.Equal(tweet, new_tweet); diff != nil { t.Error(diff) } diff --git a/persistence/user_queries.go b/persistence/user_queries.go index 262fd47..ac3e7b7 100644 --- a/persistence/user_queries.go +++ b/persistence/user_queries.go @@ -85,7 +85,6 @@ func parse_user_from_row(row *sql.Row) (scraper.User, error) { if err != nil { return u, err } - u.JoinDate = time.Unix(joinDate, 0) return u, nil diff --git a/persistence/utils_test.go b/persistence/utils_test.go index 04a25dd..aee6fa4 100644 --- a/persistence/utils_test.go +++ b/persistence/utils_test.go @@ -59,7 +59,7 @@ func create_stable_user() scraper.User { } /** - * Create a semi-stable image based on the given ID + * Create a semi-stable Image based on the given ID */ func create_image_from_id(id int) scraper.Image { filename := fmt.Sprintf("image%d.jpg", id) @@ -71,6 +71,19 @@ func create_image_from_id(id int) scraper.Image { } } +/** + * Create a semi-stable Video based on the given ID + */ +func create_video_from_id(id int) scraper.Video { + filename := fmt.Sprintf("video%d.jpg", id) + return scraper.Video{ + ID: scraper.VideoID(id), + TweetID: -1, + Filename: filename, + IsDownloaded: false, + } +} + /** * Create a stable tweet with a fixed ID and content */ @@ -85,7 +98,9 @@ func create_stable_tweet() scraper.Tweet { NumRetweets: 10, NumReplies: 10, NumQuoteTweets: 10, - Videos: []scraper.Video{{ID: scraper.VideoID(1), TweetID: tweet_id, Filename: "asdf", IsDownloaded: false}}, + Videos: []scraper.Video{ + create_video_from_id(-1), + }, Urls: []string{}, Images: []scraper.Image{ create_image_from_id(-1), @@ -133,6 +148,8 @@ func create_dummy_tweet() scraper.Tweet { img1.TweetID = tweet_id img2 := create_image_from_id(rand.Int()) img2.TweetID = tweet_id + vid := create_video_from_id(rand.Int()) + vid.TweetID = tweet_id return scraper.Tweet{ ID: tweet_id, @@ -143,7 +160,7 @@ func create_dummy_tweet() scraper.Tweet { NumRetweets: 2, NumReplies: 3, NumQuoteTweets: 4, - Videos: []scraper.Video{scraper.Video{TweetID: tweet_id, Filename: "video" + fmt.Sprint(tweet_id), IsDownloaded: false}}, + Videos: []scraper.Video{vid}, Urls: []string{"url1", "url2"}, Images: []scraper.Image{img1, img2}, Mentions: []scraper.UserHandle{"mention1", "mention2"}, diff --git a/scraper/api_types.go b/scraper/api_types.go index e41b93c..f8b73b0 100644 --- a/scraper/api_types.go +++ b/scraper/api_types.go @@ -7,6 +7,14 @@ import ( "strconv" ) + +type APIMedia struct { + ID int64 `json:"id_str,string"` + MediaURLHttps string `json:"media_url_https"` + Type string `json:"type"` + URL string `json:"url"` +} + type SortableVariants []struct { Bitrate int `json:"bitrate,omitempty"` URL string `json:"url"` @@ -15,11 +23,13 @@ func (v SortableVariants) Len() int { return len(v) } func (v SortableVariants) Swap(i, j int) { v[i], v[j] = v[j], v[i] } func (v SortableVariants) Less(i, j int) bool { return v[i].Bitrate > v[j].Bitrate } -type APIMedia struct { +type APIExtendedMedia struct { ID int64 `json:"id_str,string"` MediaURLHttps string `json:"media_url_https"` Type string `json:"type"` - URL string `json:"url"` + VideoInfo struct { + Variants SortableVariants `json:"variants"` + } `json:"video_info"` } type APITweet struct { @@ -43,14 +53,7 @@ type APITweet struct { } `json:"user_mentions"` } `json:"entities"` ExtendedEntities struct { - Media []struct { - IDStr string `json:"id_str"` - MediaURLHttps string `json:"media_url_https"` - Type string `json:"type"` - VideoInfo struct { - Variants SortableVariants `json:"variants"` - } `json:"video_info"` - } `json:"media"` + Media []APIExtendedMedia `json:"media"` } `json:"extended_entities"` InReplyToStatusID int64 `json:"in_reply_to_status_id_str,string"` InReplyToScreenName string `json:"in_reply_to_screen_name"` @@ -123,7 +126,7 @@ type APIUser struct { ListedCount int `json:"listed_count"` Name string `json:"name"` Location string `json:"location"` - PinnedTweetIdsStr []string `json:"pinned_tweet_ids_str"` + PinnedTweetIdsStr []string `json:"pinned_tweet_ids_str"` // Dunno how to type-convert an array ProfileBannerURL string `json:"profile_banner_url"` ProfileImageURLHTTPS string `json:"profile_image_url_https"` Protected bool `json:"protected"` diff --git a/scraper/api_types_test.go b/scraper/api_types_test.go index 8823dec..f4c3662 100644 --- a/scraper/api_types_test.go +++ b/scraper/api_types_test.go @@ -32,6 +32,7 @@ func TestNormalizeContent(t *testing.T) { var tweet scraper.APITweet err = json.Unmarshal(data, &tweet) if err != nil { + println("Failed at " + v.filename) t.Errorf(err.Error()) } diff --git a/scraper/image.go b/scraper/image.go index 9b28e31..55c8426 100644 --- a/scraper/image.go +++ b/scraper/image.go @@ -19,7 +19,7 @@ func ParseAPIMedia(apiMedia APIMedia) Image { local_filename := path.Base(apiMedia.MediaURLHttps) return Image{ ID: ImageID(apiMedia.ID), - Filename: apiMedia.MediaURLHttps, // XXX filename + Filename: apiMedia.MediaURLHttps, // TODO filename RemoteURL: apiMedia.MediaURLHttps, LocalFilename: local_filename, IsDownloaded: false, diff --git a/scraper/tweet.go b/scraper/tweet.go index ecfac39..26a38d8 100644 --- a/scraper/tweet.go +++ b/scraper/tweet.go @@ -3,7 +3,6 @@ package scraper import ( "time" "fmt" - "sort" "offline_twitter/terminal_utils" ) @@ -116,9 +115,8 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) { if len(apiTweet.ExtendedEntities.Media) != 1 { panic(fmt.Sprintf("Surprising ExtendedEntities: %v", apiTweet.ExtendedEntities.Media)) } - variants := apiTweet.ExtendedEntities.Media[0].VideoInfo.Variants - sort.Sort(variants) - ret.Videos = []Video{Video{TweetID: ret.ID, Filename: variants[0].URL}} + new_video := ParseAPIVideo(apiTweet.ExtendedEntities.Media[0], ret.ID) + ret.Videos = []Video{new_video} ret.Images = []Image{} } return diff --git a/scraper/video.go b/scraper/video.go index 406e9e1..725306f 100644 --- a/scraper/video.go +++ b/scraper/video.go @@ -2,17 +2,39 @@ package scraper import ( "fmt" + "sort" ) -type VideoID int +type VideoID int64 + +// TODO video-source-user: extract source user information (e.g., someone shares a video +// from someone else). type Video struct { ID VideoID TweetID TweetID - Filename string + Filename string // TODO video-filename: delete when it all works + RemoteURL string + LocalFilename string IsDownloaded bool } -func (v Video) FilenameWhenDownloaded() string { +func ParseAPIVideo(apiVideo APIExtendedMedia, tweet_id TweetID) Video { + variants := apiVideo.VideoInfo.Variants + sort.Sort(variants) + + local_filename := fmt.Sprintf("%d.mp4", tweet_id) + + return Video{ + ID: VideoID(apiVideo.ID), + TweetID: tweet_id, + Filename: variants[0].URL, + RemoteURL: variants[0].URL, + LocalFilename: local_filename, + IsDownloaded: false, + } +} + +func (v Video) FilenameWhenDownloaded() string { // TODO video-filename: delete whole method and associated test return fmt.Sprintf("%d.mp4", v.TweetID) }