Use VideoID given by the Twitter API instead of a rowid

This commit is contained in:
Alessio 2021-08-04 23:41:58 -07:00
parent e06bb4dc9a
commit 58dc223f84
12 changed files with 86 additions and 63 deletions

View File

@ -1,7 +1,7 @@
package persistence package persistence
import ( import (
"database/sql" "fmt"
"offline_twitter/scraper" "offline_twitter/scraper"
) )
@ -26,22 +26,22 @@ func (p Profile) SaveImage(img scraper.Image) error {
} }
/** /**
* Save a Video. If it's a new Video (no rowid), does an insert; otherwise, does an update. * Save a Video
* *
* args: * args:
* - img: the Video to save * - img: the Video to save
*
* returns:
* - the rowid
*/ */
func (p Profile) SaveVideo(vid scraper.Video) (sql.Result, error) { func (p Profile) SaveVideo(vid scraper.Video) error {
if vid.ID == 0 { _, err := p.DB.Exec(`
// New image insert into videos (id, tweet_id, filename, is_downloaded)
return p.DB.Exec("insert into videos (tweet_id, filename) values (?, ?) on conflict do nothing", vid.TweetID, vid.Filename) values (?, ?, ?, ?)
} else { on conflict do update
// Updating an existing image set is_downloaded=?
return p.DB.Exec("update videos set filename=?, is_downloaded=? where rowid=?", vid.Filename, vid.IsDownloaded, vid.ID) `,
} vid.ID, vid.TweetID, vid.Filename, vid.IsDownloaded,
vid.IsDownloaded,
)
return err
} }
/** /**
@ -75,7 +75,7 @@ func (p Profile) GetImagesForTweet(t scraper.Tweet) (imgs []scraper.Image, err e
* Get the list of videos for a tweet * Get the list of videos for a tweet
*/ */
func (p Profile) GetVideosForTweet(t scraper.Tweet) (vids []scraper.Video, err error) { func (p Profile) GetVideosForTweet(t scraper.Tweet) (vids []scraper.Video, err error) {
stmt, err := p.DB.Prepare("select rowid, filename, is_downloaded from videos where tweet_id=?") stmt, err := p.DB.Prepare("select id, filename, is_downloaded from videos where tweet_id=?")
if err != nil { if err != nil {
return return
} }

View File

@ -3,7 +3,6 @@ package persistence_test
import ( import (
"testing" "testing"
"math/rand" "math/rand"
"fmt"
"time" "time"
"github.com/go-test/deep" "github.com/go-test/deep"
@ -101,19 +100,14 @@ func TestSaveAndLoadVideo(t *testing.T) {
// Create a fresh Video to test on // Create a fresh Video to test on
rand.Seed(time.Now().UnixNano()) rand.Seed(time.Now().UnixNano())
filename := fmt.Sprint(rand.Int()) vid := create_video_from_id(rand.Int())
vid := scraper.Video{TweetID: tweet.ID, Filename: filename, IsDownloaded: false} vid.TweetID = tweet.ID
// Save the Video // Save the Video
result, err := profile.SaveVideo(vid) err := profile.SaveVideo(vid)
if err != nil { if err != nil {
t.Fatalf("Failed to save the video: %s", err.Error()) t.Fatalf("Failed to save the video: %s", err.Error())
} }
last_insert, err := result.LastInsertId()
if err != nil {
t.Fatalf("last insert??? %s", err.Error())
}
vid.ID = scraper.VideoID(last_insert)
// Reload the Video // Reload the Video
vids, err := profile.GetVideosForTweet(tweet) vids, err := profile.GetVideosForTweet(tweet)
@ -145,25 +139,17 @@ func TestModifyVideo(t *testing.T) {
tweet := create_stable_tweet() tweet := create_stable_tweet()
vid := tweet.Videos[0] vid := tweet.Videos[0]
if vid.ID != 1 { if vid.ID != -1 {
t.Fatalf("Got the wrong video back: wanted ID %d, got %d", 1, vid.ID) t.Fatalf("Got the wrong video back: wanted ID %d, got %d", -1, vid.ID)
} }
vid.Filename = "local/sdfjk.jpg"
vid.IsDownloaded = true vid.IsDownloaded = true
// Save the changes // Save the changes
result, err := profile.SaveVideo(vid) err := profile.SaveVideo(vid)
if err != nil { if err != nil {
t.Error(err) t.Error(err)
} }
rows_affected, err := result.RowsAffected()
if err != nil {
t.Error(err)
}
if rows_affected != 1 {
t.Errorf("Expected 1 row changed, but got %d", rows_affected)
}
// Reload it // Reload it
vids, err := profile.GetVideosForTweet(tweet) vids, err := profile.GetVideosForTweet(tweet)
@ -172,7 +158,7 @@ func TestModifyVideo(t *testing.T) {
} }
new_vid := vids[0] new_vid := vids[0]
if new_vid.ID != vid.ID { if new_vid.ID != vid.ID {
t.Fatalf("Got the wrong video back: wanted ID %d, got %d", 1, new_vid.ID) t.Fatalf("Got the wrong video back: wanted ID %d, got %d", -1, new_vid.ID)
} }
if diff := deep.Equal(vid, new_vid); diff != nil { if diff := deep.Equal(vid, new_vid); diff != nil {

View File

@ -68,6 +68,7 @@ create table images (rowid integer primary key,
); );
create table videos (rowid integer primary key, create table videos (rowid integer primary key,
id integer unique not null check(typeof(id) = 'integer'),
tweet_id integer not null, tweet_id integer not null,
filename text not null unique, filename text not null unique,
is_downloaded boolean default 0, is_downloaded boolean default 0,

View File

@ -45,7 +45,7 @@ func (p Profile) SaveTweet(t scraper.Tweet) error {
} }
} }
for _, video := range t.Videos { for _, video := range t.Videos {
_, err := p.SaveVideo(video) err := p.SaveVideo(video)
if err != nil { if err != nil {
return err return err
} }

View File

@ -28,10 +28,6 @@ func TestSaveAndLoadTweet(t *testing.T) {
t.Fatalf("Failed to load the tweet: %s", err.Error()) t.Fatalf("Failed to load the tweet: %s", err.Error())
} }
for i := range tweet.Videos {
tweet.Videos[i].ID = new_tweet.Videos[i].ID
}
if diff := deep.Equal(tweet, new_tweet); diff != nil { if diff := deep.Equal(tweet, new_tweet); diff != nil {
t.Error(diff) t.Error(diff)
} }

View File

@ -85,7 +85,6 @@ func parse_user_from_row(row *sql.Row) (scraper.User, error) {
if err != nil { if err != nil {
return u, err return u, err
} }
u.JoinDate = time.Unix(joinDate, 0) u.JoinDate = time.Unix(joinDate, 0)
return u, nil return u, nil

View File

@ -59,7 +59,7 @@ func create_stable_user() scraper.User {
} }
/** /**
* Create a semi-stable image based on the given ID * Create a semi-stable Image based on the given ID
*/ */
func create_image_from_id(id int) scraper.Image { func create_image_from_id(id int) scraper.Image {
filename := fmt.Sprintf("image%d.jpg", id) filename := fmt.Sprintf("image%d.jpg", id)
@ -71,6 +71,19 @@ func create_image_from_id(id int) scraper.Image {
} }
} }
/**
* Create a semi-stable Video based on the given ID
*/
func create_video_from_id(id int) scraper.Video {
filename := fmt.Sprintf("video%d.jpg", id)
return scraper.Video{
ID: scraper.VideoID(id),
TweetID: -1,
Filename: filename,
IsDownloaded: false,
}
}
/** /**
* Create a stable tweet with a fixed ID and content * Create a stable tweet with a fixed ID and content
*/ */
@ -85,7 +98,9 @@ func create_stable_tweet() scraper.Tweet {
NumRetweets: 10, NumRetweets: 10,
NumReplies: 10, NumReplies: 10,
NumQuoteTweets: 10, NumQuoteTweets: 10,
Videos: []scraper.Video{{ID: scraper.VideoID(1), TweetID: tweet_id, Filename: "asdf", IsDownloaded: false}}, Videos: []scraper.Video{
create_video_from_id(-1),
},
Urls: []string{}, Urls: []string{},
Images: []scraper.Image{ Images: []scraper.Image{
create_image_from_id(-1), create_image_from_id(-1),
@ -133,6 +148,8 @@ func create_dummy_tweet() scraper.Tweet {
img1.TweetID = tweet_id img1.TweetID = tweet_id
img2 := create_image_from_id(rand.Int()) img2 := create_image_from_id(rand.Int())
img2.TweetID = tweet_id img2.TweetID = tweet_id
vid := create_video_from_id(rand.Int())
vid.TweetID = tweet_id
return scraper.Tweet{ return scraper.Tweet{
ID: tweet_id, ID: tweet_id,
@ -143,7 +160,7 @@ func create_dummy_tweet() scraper.Tweet {
NumRetweets: 2, NumRetweets: 2,
NumReplies: 3, NumReplies: 3,
NumQuoteTweets: 4, NumQuoteTweets: 4,
Videos: []scraper.Video{scraper.Video{TweetID: tweet_id, Filename: "video" + fmt.Sprint(tweet_id), IsDownloaded: false}}, Videos: []scraper.Video{vid},
Urls: []string{"url1", "url2"}, Urls: []string{"url1", "url2"},
Images: []scraper.Image{img1, img2}, Images: []scraper.Image{img1, img2},
Mentions: []scraper.UserHandle{"mention1", "mention2"}, Mentions: []scraper.UserHandle{"mention1", "mention2"},

View File

@ -7,6 +7,14 @@ import (
"strconv" "strconv"
) )
type APIMedia struct {
ID int64 `json:"id_str,string"`
MediaURLHttps string `json:"media_url_https"`
Type string `json:"type"`
URL string `json:"url"`
}
type SortableVariants []struct { type SortableVariants []struct {
Bitrate int `json:"bitrate,omitempty"` Bitrate int `json:"bitrate,omitempty"`
URL string `json:"url"` URL string `json:"url"`
@ -15,11 +23,13 @@ func (v SortableVariants) Len() int { return len(v) }
func (v SortableVariants) Swap(i, j int) { v[i], v[j] = v[j], v[i] } func (v SortableVariants) Swap(i, j int) { v[i], v[j] = v[j], v[i] }
func (v SortableVariants) Less(i, j int) bool { return v[i].Bitrate > v[j].Bitrate } func (v SortableVariants) Less(i, j int) bool { return v[i].Bitrate > v[j].Bitrate }
type APIMedia struct { type APIExtendedMedia struct {
ID int64 `json:"id_str,string"` ID int64 `json:"id_str,string"`
MediaURLHttps string `json:"media_url_https"` MediaURLHttps string `json:"media_url_https"`
Type string `json:"type"` Type string `json:"type"`
URL string `json:"url"` VideoInfo struct {
Variants SortableVariants `json:"variants"`
} `json:"video_info"`
} }
type APITweet struct { type APITweet struct {
@ -43,14 +53,7 @@ type APITweet struct {
} `json:"user_mentions"` } `json:"user_mentions"`
} `json:"entities"` } `json:"entities"`
ExtendedEntities struct { ExtendedEntities struct {
Media []struct { Media []APIExtendedMedia `json:"media"`
IDStr string `json:"id_str"`
MediaURLHttps string `json:"media_url_https"`
Type string `json:"type"`
VideoInfo struct {
Variants SortableVariants `json:"variants"`
} `json:"video_info"`
} `json:"media"`
} `json:"extended_entities"` } `json:"extended_entities"`
InReplyToStatusID int64 `json:"in_reply_to_status_id_str,string"` InReplyToStatusID int64 `json:"in_reply_to_status_id_str,string"`
InReplyToScreenName string `json:"in_reply_to_screen_name"` InReplyToScreenName string `json:"in_reply_to_screen_name"`
@ -123,7 +126,7 @@ type APIUser struct {
ListedCount int `json:"listed_count"` ListedCount int `json:"listed_count"`
Name string `json:"name"` Name string `json:"name"`
Location string `json:"location"` Location string `json:"location"`
PinnedTweetIdsStr []string `json:"pinned_tweet_ids_str"` PinnedTweetIdsStr []string `json:"pinned_tweet_ids_str"` // Dunno how to type-convert an array
ProfileBannerURL string `json:"profile_banner_url"` ProfileBannerURL string `json:"profile_banner_url"`
ProfileImageURLHTTPS string `json:"profile_image_url_https"` ProfileImageURLHTTPS string `json:"profile_image_url_https"`
Protected bool `json:"protected"` Protected bool `json:"protected"`

View File

@ -32,6 +32,7 @@ func TestNormalizeContent(t *testing.T) {
var tweet scraper.APITweet var tweet scraper.APITweet
err = json.Unmarshal(data, &tweet) err = json.Unmarshal(data, &tweet)
if err != nil { if err != nil {
println("Failed at " + v.filename)
t.Errorf(err.Error()) t.Errorf(err.Error())
} }

View File

@ -19,7 +19,7 @@ func ParseAPIMedia(apiMedia APIMedia) Image {
local_filename := path.Base(apiMedia.MediaURLHttps) local_filename := path.Base(apiMedia.MediaURLHttps)
return Image{ return Image{
ID: ImageID(apiMedia.ID), ID: ImageID(apiMedia.ID),
Filename: apiMedia.MediaURLHttps, // XXX filename Filename: apiMedia.MediaURLHttps, // TODO filename
RemoteURL: apiMedia.MediaURLHttps, RemoteURL: apiMedia.MediaURLHttps,
LocalFilename: local_filename, LocalFilename: local_filename,
IsDownloaded: false, IsDownloaded: false,

View File

@ -3,7 +3,6 @@ package scraper
import ( import (
"time" "time"
"fmt" "fmt"
"sort"
"offline_twitter/terminal_utils" "offline_twitter/terminal_utils"
) )
@ -116,9 +115,8 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
if len(apiTweet.ExtendedEntities.Media) != 1 { if len(apiTweet.ExtendedEntities.Media) != 1 {
panic(fmt.Sprintf("Surprising ExtendedEntities: %v", apiTweet.ExtendedEntities.Media)) panic(fmt.Sprintf("Surprising ExtendedEntities: %v", apiTweet.ExtendedEntities.Media))
} }
variants := apiTweet.ExtendedEntities.Media[0].VideoInfo.Variants new_video := ParseAPIVideo(apiTweet.ExtendedEntities.Media[0], ret.ID)
sort.Sort(variants) ret.Videos = []Video{new_video}
ret.Videos = []Video{Video{TweetID: ret.ID, Filename: variants[0].URL}}
ret.Images = []Image{} ret.Images = []Image{}
} }
return return

View File

@ -2,17 +2,39 @@ package scraper
import ( import (
"fmt" "fmt"
"sort"
) )
type VideoID int type VideoID int64
// TODO video-source-user: extract source user information (e.g., someone shares a video
// from someone else).
type Video struct { type Video struct {
ID VideoID ID VideoID
TweetID TweetID TweetID TweetID
Filename string Filename string // TODO video-filename: delete when it all works
RemoteURL string
LocalFilename string
IsDownloaded bool IsDownloaded bool
} }
func (v Video) FilenameWhenDownloaded() string { func ParseAPIVideo(apiVideo APIExtendedMedia, tweet_id TweetID) Video {
variants := apiVideo.VideoInfo.Variants
sort.Sort(variants)
local_filename := fmt.Sprintf("%d.mp4", tweet_id)
return Video{
ID: VideoID(apiVideo.ID),
TweetID: tweet_id,
Filename: variants[0].URL,
RemoteURL: variants[0].URL,
LocalFilename: local_filename,
IsDownloaded: false,
}
}
func (v Video) FilenameWhenDownloaded() string { // TODO video-filename: delete whole method and associated test
return fmt.Sprintf("%d.mp4", v.TweetID) return fmt.Sprintf("%d.mp4", v.TweetID)
} }