Use VideoID given by the Twitter API instead of a rowid
This commit is contained in:
parent
e06bb4dc9a
commit
58dc223f84
@ -1,7 +1,7 @@
|
||||
package persistence
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"fmt"
|
||||
|
||||
"offline_twitter/scraper"
|
||||
)
|
||||
@ -26,22 +26,22 @@ func (p Profile) SaveImage(img scraper.Image) error {
|
||||
}
|
||||
|
||||
/**
|
||||
* Save a Video. If it's a new Video (no rowid), does an insert; otherwise, does an update.
|
||||
* Save a Video
|
||||
*
|
||||
* args:
|
||||
* - img: the Video to save
|
||||
*
|
||||
* returns:
|
||||
* - the rowid
|
||||
*/
|
||||
func (p Profile) SaveVideo(vid scraper.Video) (sql.Result, error) {
|
||||
if vid.ID == 0 {
|
||||
// New image
|
||||
return p.DB.Exec("insert into videos (tweet_id, filename) values (?, ?) on conflict do nothing", vid.TweetID, vid.Filename)
|
||||
} else {
|
||||
// Updating an existing image
|
||||
return p.DB.Exec("update videos set filename=?, is_downloaded=? where rowid=?", vid.Filename, vid.IsDownloaded, vid.ID)
|
||||
}
|
||||
func (p Profile) SaveVideo(vid scraper.Video) error {
|
||||
_, err := p.DB.Exec(`
|
||||
insert into videos (id, tweet_id, filename, is_downloaded)
|
||||
values (?, ?, ?, ?)
|
||||
on conflict do update
|
||||
set is_downloaded=?
|
||||
`,
|
||||
vid.ID, vid.TweetID, vid.Filename, vid.IsDownloaded,
|
||||
vid.IsDownloaded,
|
||||
)
|
||||
return err
|
||||
}
|
||||
|
||||
/**
|
||||
@ -75,7 +75,7 @@ func (p Profile) GetImagesForTweet(t scraper.Tweet) (imgs []scraper.Image, err e
|
||||
* Get the list of videos for a tweet
|
||||
*/
|
||||
func (p Profile) GetVideosForTweet(t scraper.Tweet) (vids []scraper.Video, err error) {
|
||||
stmt, err := p.DB.Prepare("select rowid, filename, is_downloaded from videos where tweet_id=?")
|
||||
stmt, err := p.DB.Prepare("select id, filename, is_downloaded from videos where tweet_id=?")
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
@ -3,7 +3,6 @@ package persistence_test
|
||||
import (
|
||||
"testing"
|
||||
"math/rand"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/go-test/deep"
|
||||
@ -101,19 +100,14 @@ func TestSaveAndLoadVideo(t *testing.T) {
|
||||
|
||||
// Create a fresh Video to test on
|
||||
rand.Seed(time.Now().UnixNano())
|
||||
filename := fmt.Sprint(rand.Int())
|
||||
vid := scraper.Video{TweetID: tweet.ID, Filename: filename, IsDownloaded: false}
|
||||
vid := create_video_from_id(rand.Int())
|
||||
vid.TweetID = tweet.ID
|
||||
|
||||
// Save the Video
|
||||
result, err := profile.SaveVideo(vid)
|
||||
err := profile.SaveVideo(vid)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to save the video: %s", err.Error())
|
||||
}
|
||||
last_insert, err := result.LastInsertId()
|
||||
if err != nil {
|
||||
t.Fatalf("last insert??? %s", err.Error())
|
||||
}
|
||||
vid.ID = scraper.VideoID(last_insert)
|
||||
|
||||
// Reload the Video
|
||||
vids, err := profile.GetVideosForTweet(tweet)
|
||||
@ -145,25 +139,17 @@ func TestModifyVideo(t *testing.T) {
|
||||
tweet := create_stable_tweet()
|
||||
vid := tweet.Videos[0]
|
||||
|
||||
if vid.ID != 1 {
|
||||
t.Fatalf("Got the wrong video back: wanted ID %d, got %d", 1, vid.ID)
|
||||
if vid.ID != -1 {
|
||||
t.Fatalf("Got the wrong video back: wanted ID %d, got %d", -1, vid.ID)
|
||||
}
|
||||
|
||||
vid.Filename = "local/sdfjk.jpg"
|
||||
vid.IsDownloaded = true
|
||||
|
||||
// Save the changes
|
||||
result, err := profile.SaveVideo(vid)
|
||||
err := profile.SaveVideo(vid)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
rows_affected, err := result.RowsAffected()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
if rows_affected != 1 {
|
||||
t.Errorf("Expected 1 row changed, but got %d", rows_affected)
|
||||
}
|
||||
|
||||
// Reload it
|
||||
vids, err := profile.GetVideosForTweet(tweet)
|
||||
@ -172,7 +158,7 @@ func TestModifyVideo(t *testing.T) {
|
||||
}
|
||||
new_vid := vids[0]
|
||||
if new_vid.ID != vid.ID {
|
||||
t.Fatalf("Got the wrong video back: wanted ID %d, got %d", 1, new_vid.ID)
|
||||
t.Fatalf("Got the wrong video back: wanted ID %d, got %d", -1, new_vid.ID)
|
||||
}
|
||||
|
||||
if diff := deep.Equal(vid, new_vid); diff != nil {
|
||||
|
@ -68,6 +68,7 @@ create table images (rowid integer primary key,
|
||||
);
|
||||
|
||||
create table videos (rowid integer primary key,
|
||||
id integer unique not null check(typeof(id) = 'integer'),
|
||||
tweet_id integer not null,
|
||||
filename text not null unique,
|
||||
is_downloaded boolean default 0,
|
||||
|
@ -45,7 +45,7 @@ func (p Profile) SaveTweet(t scraper.Tweet) error {
|
||||
}
|
||||
}
|
||||
for _, video := range t.Videos {
|
||||
_, err := p.SaveVideo(video)
|
||||
err := p.SaveVideo(video)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -28,10 +28,6 @@ func TestSaveAndLoadTweet(t *testing.T) {
|
||||
t.Fatalf("Failed to load the tweet: %s", err.Error())
|
||||
}
|
||||
|
||||
for i := range tweet.Videos {
|
||||
tweet.Videos[i].ID = new_tweet.Videos[i].ID
|
||||
}
|
||||
|
||||
if diff := deep.Equal(tweet, new_tweet); diff != nil {
|
||||
t.Error(diff)
|
||||
}
|
||||
|
@ -85,7 +85,6 @@ func parse_user_from_row(row *sql.Row) (scraper.User, error) {
|
||||
if err != nil {
|
||||
return u, err
|
||||
}
|
||||
|
||||
u.JoinDate = time.Unix(joinDate, 0)
|
||||
|
||||
return u, nil
|
||||
|
@ -59,7 +59,7 @@ func create_stable_user() scraper.User {
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a semi-stable image based on the given ID
|
||||
* Create a semi-stable Image based on the given ID
|
||||
*/
|
||||
func create_image_from_id(id int) scraper.Image {
|
||||
filename := fmt.Sprintf("image%d.jpg", id)
|
||||
@ -71,6 +71,19 @@ func create_image_from_id(id int) scraper.Image {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a semi-stable Video based on the given ID
|
||||
*/
|
||||
func create_video_from_id(id int) scraper.Video {
|
||||
filename := fmt.Sprintf("video%d.jpg", id)
|
||||
return scraper.Video{
|
||||
ID: scraper.VideoID(id),
|
||||
TweetID: -1,
|
||||
Filename: filename,
|
||||
IsDownloaded: false,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a stable tweet with a fixed ID and content
|
||||
*/
|
||||
@ -85,7 +98,9 @@ func create_stable_tweet() scraper.Tweet {
|
||||
NumRetweets: 10,
|
||||
NumReplies: 10,
|
||||
NumQuoteTweets: 10,
|
||||
Videos: []scraper.Video{{ID: scraper.VideoID(1), TweetID: tweet_id, Filename: "asdf", IsDownloaded: false}},
|
||||
Videos: []scraper.Video{
|
||||
create_video_from_id(-1),
|
||||
},
|
||||
Urls: []string{},
|
||||
Images: []scraper.Image{
|
||||
create_image_from_id(-1),
|
||||
@ -133,6 +148,8 @@ func create_dummy_tweet() scraper.Tweet {
|
||||
img1.TweetID = tweet_id
|
||||
img2 := create_image_from_id(rand.Int())
|
||||
img2.TweetID = tweet_id
|
||||
vid := create_video_from_id(rand.Int())
|
||||
vid.TweetID = tweet_id
|
||||
|
||||
return scraper.Tweet{
|
||||
ID: tweet_id,
|
||||
@ -143,7 +160,7 @@ func create_dummy_tweet() scraper.Tweet {
|
||||
NumRetweets: 2,
|
||||
NumReplies: 3,
|
||||
NumQuoteTweets: 4,
|
||||
Videos: []scraper.Video{scraper.Video{TweetID: tweet_id, Filename: "video" + fmt.Sprint(tweet_id), IsDownloaded: false}},
|
||||
Videos: []scraper.Video{vid},
|
||||
Urls: []string{"url1", "url2"},
|
||||
Images: []scraper.Image{img1, img2},
|
||||
Mentions: []scraper.UserHandle{"mention1", "mention2"},
|
||||
|
@ -7,6 +7,14 @@ import (
|
||||
"strconv"
|
||||
)
|
||||
|
||||
|
||||
type APIMedia struct {
|
||||
ID int64 `json:"id_str,string"`
|
||||
MediaURLHttps string `json:"media_url_https"`
|
||||
Type string `json:"type"`
|
||||
URL string `json:"url"`
|
||||
}
|
||||
|
||||
type SortableVariants []struct {
|
||||
Bitrate int `json:"bitrate,omitempty"`
|
||||
URL string `json:"url"`
|
||||
@ -15,11 +23,13 @@ func (v SortableVariants) Len() int { return len(v) }
|
||||
func (v SortableVariants) Swap(i, j int) { v[i], v[j] = v[j], v[i] }
|
||||
func (v SortableVariants) Less(i, j int) bool { return v[i].Bitrate > v[j].Bitrate }
|
||||
|
||||
type APIMedia struct {
|
||||
type APIExtendedMedia struct {
|
||||
ID int64 `json:"id_str,string"`
|
||||
MediaURLHttps string `json:"media_url_https"`
|
||||
Type string `json:"type"`
|
||||
URL string `json:"url"`
|
||||
VideoInfo struct {
|
||||
Variants SortableVariants `json:"variants"`
|
||||
} `json:"video_info"`
|
||||
}
|
||||
|
||||
type APITweet struct {
|
||||
@ -43,14 +53,7 @@ type APITweet struct {
|
||||
} `json:"user_mentions"`
|
||||
} `json:"entities"`
|
||||
ExtendedEntities struct {
|
||||
Media []struct {
|
||||
IDStr string `json:"id_str"`
|
||||
MediaURLHttps string `json:"media_url_https"`
|
||||
Type string `json:"type"`
|
||||
VideoInfo struct {
|
||||
Variants SortableVariants `json:"variants"`
|
||||
} `json:"video_info"`
|
||||
} `json:"media"`
|
||||
Media []APIExtendedMedia `json:"media"`
|
||||
} `json:"extended_entities"`
|
||||
InReplyToStatusID int64 `json:"in_reply_to_status_id_str,string"`
|
||||
InReplyToScreenName string `json:"in_reply_to_screen_name"`
|
||||
@ -123,7 +126,7 @@ type APIUser struct {
|
||||
ListedCount int `json:"listed_count"`
|
||||
Name string `json:"name"`
|
||||
Location string `json:"location"`
|
||||
PinnedTweetIdsStr []string `json:"pinned_tweet_ids_str"`
|
||||
PinnedTweetIdsStr []string `json:"pinned_tweet_ids_str"` // Dunno how to type-convert an array
|
||||
ProfileBannerURL string `json:"profile_banner_url"`
|
||||
ProfileImageURLHTTPS string `json:"profile_image_url_https"`
|
||||
Protected bool `json:"protected"`
|
||||
|
@ -32,6 +32,7 @@ func TestNormalizeContent(t *testing.T) {
|
||||
var tweet scraper.APITweet
|
||||
err = json.Unmarshal(data, &tweet)
|
||||
if err != nil {
|
||||
println("Failed at " + v.filename)
|
||||
t.Errorf(err.Error())
|
||||
}
|
||||
|
||||
|
@ -19,7 +19,7 @@ func ParseAPIMedia(apiMedia APIMedia) Image {
|
||||
local_filename := path.Base(apiMedia.MediaURLHttps)
|
||||
return Image{
|
||||
ID: ImageID(apiMedia.ID),
|
||||
Filename: apiMedia.MediaURLHttps, // XXX filename
|
||||
Filename: apiMedia.MediaURLHttps, // TODO filename
|
||||
RemoteURL: apiMedia.MediaURLHttps,
|
||||
LocalFilename: local_filename,
|
||||
IsDownloaded: false,
|
||||
|
@ -3,7 +3,6 @@ package scraper
|
||||
import (
|
||||
"time"
|
||||
"fmt"
|
||||
"sort"
|
||||
|
||||
"offline_twitter/terminal_utils"
|
||||
)
|
||||
@ -116,9 +115,8 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
|
||||
if len(apiTweet.ExtendedEntities.Media) != 1 {
|
||||
panic(fmt.Sprintf("Surprising ExtendedEntities: %v", apiTweet.ExtendedEntities.Media))
|
||||
}
|
||||
variants := apiTweet.ExtendedEntities.Media[0].VideoInfo.Variants
|
||||
sort.Sort(variants)
|
||||
ret.Videos = []Video{Video{TweetID: ret.ID, Filename: variants[0].URL}}
|
||||
new_video := ParseAPIVideo(apiTweet.ExtendedEntities.Media[0], ret.ID)
|
||||
ret.Videos = []Video{new_video}
|
||||
ret.Images = []Image{}
|
||||
}
|
||||
return
|
||||
|
@ -2,17 +2,39 @@ package scraper
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
)
|
||||
|
||||
type VideoID int
|
||||
type VideoID int64
|
||||
|
||||
// TODO video-source-user: extract source user information (e.g., someone shares a video
|
||||
// from someone else).
|
||||
|
||||
type Video struct {
|
||||
ID VideoID
|
||||
TweetID TweetID
|
||||
Filename string
|
||||
Filename string // TODO video-filename: delete when it all works
|
||||
RemoteURL string
|
||||
LocalFilename string
|
||||
IsDownloaded bool
|
||||
}
|
||||
|
||||
func (v Video) FilenameWhenDownloaded() string {
|
||||
func ParseAPIVideo(apiVideo APIExtendedMedia, tweet_id TweetID) Video {
|
||||
variants := apiVideo.VideoInfo.Variants
|
||||
sort.Sort(variants)
|
||||
|
||||
local_filename := fmt.Sprintf("%d.mp4", tweet_id)
|
||||
|
||||
return Video{
|
||||
ID: VideoID(apiVideo.ID),
|
||||
TweetID: tweet_id,
|
||||
Filename: variants[0].URL,
|
||||
RemoteURL: variants[0].URL,
|
||||
LocalFilename: local_filename,
|
||||
IsDownloaded: false,
|
||||
}
|
||||
}
|
||||
|
||||
func (v Video) FilenameWhenDownloaded() string { // TODO video-filename: delete whole method and associated test
|
||||
return fmt.Sprintf("%d.mp4", v.TweetID)
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user