Use VideoID given by the Twitter API instead of a rowid

This commit is contained in:
Alessio 2021-08-04 23:41:58 -07:00
parent e06bb4dc9a
commit 58dc223f84
12 changed files with 86 additions and 63 deletions

View File

@ -1,7 +1,7 @@
package persistence
import (
"database/sql"
"fmt"
"offline_twitter/scraper"
)
@ -26,22 +26,22 @@ func (p Profile) SaveImage(img scraper.Image) error {
}
/**
* Save a Video. If it's a new Video (no rowid), does an insert; otherwise, does an update.
* Save a Video
*
* args:
* - img: the Video to save
*
* returns:
* - the rowid
*/
func (p Profile) SaveVideo(vid scraper.Video) (sql.Result, error) {
if vid.ID == 0 {
// New image
return p.DB.Exec("insert into videos (tweet_id, filename) values (?, ?) on conflict do nothing", vid.TweetID, vid.Filename)
} else {
// Updating an existing image
return p.DB.Exec("update videos set filename=?, is_downloaded=? where rowid=?", vid.Filename, vid.IsDownloaded, vid.ID)
}
func (p Profile) SaveVideo(vid scraper.Video) error {
_, err := p.DB.Exec(`
insert into videos (id, tweet_id, filename, is_downloaded)
values (?, ?, ?, ?)
on conflict do update
set is_downloaded=?
`,
vid.ID, vid.TweetID, vid.Filename, vid.IsDownloaded,
vid.IsDownloaded,
)
return err
}
/**
@ -75,7 +75,7 @@ func (p Profile) GetImagesForTweet(t scraper.Tweet) (imgs []scraper.Image, err e
* Get the list of videos for a tweet
*/
func (p Profile) GetVideosForTweet(t scraper.Tweet) (vids []scraper.Video, err error) {
stmt, err := p.DB.Prepare("select rowid, filename, is_downloaded from videos where tweet_id=?")
stmt, err := p.DB.Prepare("select id, filename, is_downloaded from videos where tweet_id=?")
if err != nil {
return
}

View File

@ -3,7 +3,6 @@ package persistence_test
import (
"testing"
"math/rand"
"fmt"
"time"
"github.com/go-test/deep"
@ -101,19 +100,14 @@ func TestSaveAndLoadVideo(t *testing.T) {
// Create a fresh Video to test on
rand.Seed(time.Now().UnixNano())
filename := fmt.Sprint(rand.Int())
vid := scraper.Video{TweetID: tweet.ID, Filename: filename, IsDownloaded: false}
vid := create_video_from_id(rand.Int())
vid.TweetID = tweet.ID
// Save the Video
result, err := profile.SaveVideo(vid)
err := profile.SaveVideo(vid)
if err != nil {
t.Fatalf("Failed to save the video: %s", err.Error())
}
last_insert, err := result.LastInsertId()
if err != nil {
t.Fatalf("last insert??? %s", err.Error())
}
vid.ID = scraper.VideoID(last_insert)
// Reload the Video
vids, err := profile.GetVideosForTweet(tweet)
@ -145,25 +139,17 @@ func TestModifyVideo(t *testing.T) {
tweet := create_stable_tweet()
vid := tweet.Videos[0]
if vid.ID != 1 {
t.Fatalf("Got the wrong video back: wanted ID %d, got %d", 1, vid.ID)
if vid.ID != -1 {
t.Fatalf("Got the wrong video back: wanted ID %d, got %d", -1, vid.ID)
}
vid.Filename = "local/sdfjk.jpg"
vid.IsDownloaded = true
// Save the changes
result, err := profile.SaveVideo(vid)
err := profile.SaveVideo(vid)
if err != nil {
t.Error(err)
}
rows_affected, err := result.RowsAffected()
if err != nil {
t.Error(err)
}
if rows_affected != 1 {
t.Errorf("Expected 1 row changed, but got %d", rows_affected)
}
// Reload it
vids, err := profile.GetVideosForTweet(tweet)
@ -172,7 +158,7 @@ func TestModifyVideo(t *testing.T) {
}
new_vid := vids[0]
if new_vid.ID != vid.ID {
t.Fatalf("Got the wrong video back: wanted ID %d, got %d", 1, new_vid.ID)
t.Fatalf("Got the wrong video back: wanted ID %d, got %d", -1, new_vid.ID)
}
if diff := deep.Equal(vid, new_vid); diff != nil {

View File

@ -68,6 +68,7 @@ create table images (rowid integer primary key,
);
create table videos (rowid integer primary key,
id integer unique not null check(typeof(id) = 'integer'),
tweet_id integer not null,
filename text not null unique,
is_downloaded boolean default 0,

View File

@ -45,7 +45,7 @@ func (p Profile) SaveTweet(t scraper.Tweet) error {
}
}
for _, video := range t.Videos {
_, err := p.SaveVideo(video)
err := p.SaveVideo(video)
if err != nil {
return err
}

View File

@ -28,10 +28,6 @@ func TestSaveAndLoadTweet(t *testing.T) {
t.Fatalf("Failed to load the tweet: %s", err.Error())
}
for i := range tweet.Videos {
tweet.Videos[i].ID = new_tweet.Videos[i].ID
}
if diff := deep.Equal(tweet, new_tweet); diff != nil {
t.Error(diff)
}

View File

@ -85,7 +85,6 @@ func parse_user_from_row(row *sql.Row) (scraper.User, error) {
if err != nil {
return u, err
}
u.JoinDate = time.Unix(joinDate, 0)
return u, nil

View File

@ -59,7 +59,7 @@ func create_stable_user() scraper.User {
}
/**
* Create a semi-stable image based on the given ID
* Create a semi-stable Image based on the given ID
*/
func create_image_from_id(id int) scraper.Image {
filename := fmt.Sprintf("image%d.jpg", id)
@ -71,6 +71,19 @@ func create_image_from_id(id int) scraper.Image {
}
}
/**
* Create a semi-stable Video based on the given ID
*/
func create_video_from_id(id int) scraper.Video {
filename := fmt.Sprintf("video%d.jpg", id)
return scraper.Video{
ID: scraper.VideoID(id),
TweetID: -1,
Filename: filename,
IsDownloaded: false,
}
}
/**
* Create a stable tweet with a fixed ID and content
*/
@ -85,7 +98,9 @@ func create_stable_tweet() scraper.Tweet {
NumRetweets: 10,
NumReplies: 10,
NumQuoteTweets: 10,
Videos: []scraper.Video{{ID: scraper.VideoID(1), TweetID: tweet_id, Filename: "asdf", IsDownloaded: false}},
Videos: []scraper.Video{
create_video_from_id(-1),
},
Urls: []string{},
Images: []scraper.Image{
create_image_from_id(-1),
@ -133,6 +148,8 @@ func create_dummy_tweet() scraper.Tweet {
img1.TweetID = tweet_id
img2 := create_image_from_id(rand.Int())
img2.TweetID = tweet_id
vid := create_video_from_id(rand.Int())
vid.TweetID = tweet_id
return scraper.Tweet{
ID: tweet_id,
@ -143,7 +160,7 @@ func create_dummy_tweet() scraper.Tweet {
NumRetweets: 2,
NumReplies: 3,
NumQuoteTweets: 4,
Videos: []scraper.Video{scraper.Video{TweetID: tweet_id, Filename: "video" + fmt.Sprint(tweet_id), IsDownloaded: false}},
Videos: []scraper.Video{vid},
Urls: []string{"url1", "url2"},
Images: []scraper.Image{img1, img2},
Mentions: []scraper.UserHandle{"mention1", "mention2"},

View File

@ -7,6 +7,14 @@ import (
"strconv"
)
type APIMedia struct {
ID int64 `json:"id_str,string"`
MediaURLHttps string `json:"media_url_https"`
Type string `json:"type"`
URL string `json:"url"`
}
type SortableVariants []struct {
Bitrate int `json:"bitrate,omitempty"`
URL string `json:"url"`
@ -15,11 +23,13 @@ func (v SortableVariants) Len() int { return len(v) }
func (v SortableVariants) Swap(i, j int) { v[i], v[j] = v[j], v[i] }
func (v SortableVariants) Less(i, j int) bool { return v[i].Bitrate > v[j].Bitrate }
type APIMedia struct {
type APIExtendedMedia struct {
ID int64 `json:"id_str,string"`
MediaURLHttps string `json:"media_url_https"`
Type string `json:"type"`
URL string `json:"url"`
VideoInfo struct {
Variants SortableVariants `json:"variants"`
} `json:"video_info"`
}
type APITweet struct {
@ -43,14 +53,7 @@ type APITweet struct {
} `json:"user_mentions"`
} `json:"entities"`
ExtendedEntities struct {
Media []struct {
IDStr string `json:"id_str"`
MediaURLHttps string `json:"media_url_https"`
Type string `json:"type"`
VideoInfo struct {
Variants SortableVariants `json:"variants"`
} `json:"video_info"`
} `json:"media"`
Media []APIExtendedMedia `json:"media"`
} `json:"extended_entities"`
InReplyToStatusID int64 `json:"in_reply_to_status_id_str,string"`
InReplyToScreenName string `json:"in_reply_to_screen_name"`
@ -123,7 +126,7 @@ type APIUser struct {
ListedCount int `json:"listed_count"`
Name string `json:"name"`
Location string `json:"location"`
PinnedTweetIdsStr []string `json:"pinned_tweet_ids_str"`
PinnedTweetIdsStr []string `json:"pinned_tweet_ids_str"` // Dunno how to type-convert an array
ProfileBannerURL string `json:"profile_banner_url"`
ProfileImageURLHTTPS string `json:"profile_image_url_https"`
Protected bool `json:"protected"`

View File

@ -32,6 +32,7 @@ func TestNormalizeContent(t *testing.T) {
var tweet scraper.APITweet
err = json.Unmarshal(data, &tweet)
if err != nil {
println("Failed at " + v.filename)
t.Errorf(err.Error())
}

View File

@ -19,7 +19,7 @@ func ParseAPIMedia(apiMedia APIMedia) Image {
local_filename := path.Base(apiMedia.MediaURLHttps)
return Image{
ID: ImageID(apiMedia.ID),
Filename: apiMedia.MediaURLHttps, // XXX filename
Filename: apiMedia.MediaURLHttps, // TODO filename
RemoteURL: apiMedia.MediaURLHttps,
LocalFilename: local_filename,
IsDownloaded: false,

View File

@ -3,7 +3,6 @@ package scraper
import (
"time"
"fmt"
"sort"
"offline_twitter/terminal_utils"
)
@ -116,9 +115,8 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
if len(apiTweet.ExtendedEntities.Media) != 1 {
panic(fmt.Sprintf("Surprising ExtendedEntities: %v", apiTweet.ExtendedEntities.Media))
}
variants := apiTweet.ExtendedEntities.Media[0].VideoInfo.Variants
sort.Sort(variants)
ret.Videos = []Video{Video{TweetID: ret.ID, Filename: variants[0].URL}}
new_video := ParseAPIVideo(apiTweet.ExtendedEntities.Media[0], ret.ID)
ret.Videos = []Video{new_video}
ret.Images = []Image{}
}
return

View File

@ -2,17 +2,39 @@ package scraper
import (
"fmt"
"sort"
)
type VideoID int
type VideoID int64
// TODO video-source-user: extract source user information (e.g., someone shares a video
// from someone else).
type Video struct {
ID VideoID
TweetID TweetID
Filename string
Filename string // TODO video-filename: delete when it all works
RemoteURL string
LocalFilename string
IsDownloaded bool
}
func (v Video) FilenameWhenDownloaded() string {
func ParseAPIVideo(apiVideo APIExtendedMedia, tweet_id TweetID) Video {
variants := apiVideo.VideoInfo.Variants
sort.Sort(variants)
local_filename := fmt.Sprintf("%d.mp4", tweet_id)
return Video{
ID: VideoID(apiVideo.ID),
TweetID: tweet_id,
Filename: variants[0].URL,
RemoteURL: variants[0].URL,
LocalFilename: local_filename,
IsDownloaded: false,
}
}
func (v Video) FilenameWhenDownloaded() string { // TODO video-filename: delete whole method and associated test
return fmt.Sprintf("%d.mp4", v.TweetID)
}