Use VideoID given by the Twitter API instead of a rowid
This commit is contained in:
parent
e06bb4dc9a
commit
58dc223f84
@ -1,7 +1,7 @@
|
|||||||
package persistence
|
package persistence
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"database/sql"
|
"fmt"
|
||||||
|
|
||||||
"offline_twitter/scraper"
|
"offline_twitter/scraper"
|
||||||
)
|
)
|
||||||
@ -26,22 +26,22 @@ func (p Profile) SaveImage(img scraper.Image) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Save a Video. If it's a new Video (no rowid), does an insert; otherwise, does an update.
|
* Save a Video
|
||||||
*
|
*
|
||||||
* args:
|
* args:
|
||||||
* - img: the Video to save
|
* - img: the Video to save
|
||||||
*
|
|
||||||
* returns:
|
|
||||||
* - the rowid
|
|
||||||
*/
|
*/
|
||||||
func (p Profile) SaveVideo(vid scraper.Video) (sql.Result, error) {
|
func (p Profile) SaveVideo(vid scraper.Video) error {
|
||||||
if vid.ID == 0 {
|
_, err := p.DB.Exec(`
|
||||||
// New image
|
insert into videos (id, tweet_id, filename, is_downloaded)
|
||||||
return p.DB.Exec("insert into videos (tweet_id, filename) values (?, ?) on conflict do nothing", vid.TweetID, vid.Filename)
|
values (?, ?, ?, ?)
|
||||||
} else {
|
on conflict do update
|
||||||
// Updating an existing image
|
set is_downloaded=?
|
||||||
return p.DB.Exec("update videos set filename=?, is_downloaded=? where rowid=?", vid.Filename, vid.IsDownloaded, vid.ID)
|
`,
|
||||||
}
|
vid.ID, vid.TweetID, vid.Filename, vid.IsDownloaded,
|
||||||
|
vid.IsDownloaded,
|
||||||
|
)
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -75,7 +75,7 @@ func (p Profile) GetImagesForTweet(t scraper.Tweet) (imgs []scraper.Image, err e
|
|||||||
* Get the list of videos for a tweet
|
* Get the list of videos for a tweet
|
||||||
*/
|
*/
|
||||||
func (p Profile) GetVideosForTweet(t scraper.Tweet) (vids []scraper.Video, err error) {
|
func (p Profile) GetVideosForTweet(t scraper.Tweet) (vids []scraper.Video, err error) {
|
||||||
stmt, err := p.DB.Prepare("select rowid, filename, is_downloaded from videos where tweet_id=?")
|
stmt, err := p.DB.Prepare("select id, filename, is_downloaded from videos where tweet_id=?")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -3,7 +3,6 @@ package persistence_test
|
|||||||
import (
|
import (
|
||||||
"testing"
|
"testing"
|
||||||
"math/rand"
|
"math/rand"
|
||||||
"fmt"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/go-test/deep"
|
"github.com/go-test/deep"
|
||||||
@ -101,19 +100,14 @@ func TestSaveAndLoadVideo(t *testing.T) {
|
|||||||
|
|
||||||
// Create a fresh Video to test on
|
// Create a fresh Video to test on
|
||||||
rand.Seed(time.Now().UnixNano())
|
rand.Seed(time.Now().UnixNano())
|
||||||
filename := fmt.Sprint(rand.Int())
|
vid := create_video_from_id(rand.Int())
|
||||||
vid := scraper.Video{TweetID: tweet.ID, Filename: filename, IsDownloaded: false}
|
vid.TweetID = tweet.ID
|
||||||
|
|
||||||
// Save the Video
|
// Save the Video
|
||||||
result, err := profile.SaveVideo(vid)
|
err := profile.SaveVideo(vid)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Failed to save the video: %s", err.Error())
|
t.Fatalf("Failed to save the video: %s", err.Error())
|
||||||
}
|
}
|
||||||
last_insert, err := result.LastInsertId()
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("last insert??? %s", err.Error())
|
|
||||||
}
|
|
||||||
vid.ID = scraper.VideoID(last_insert)
|
|
||||||
|
|
||||||
// Reload the Video
|
// Reload the Video
|
||||||
vids, err := profile.GetVideosForTweet(tweet)
|
vids, err := profile.GetVideosForTweet(tweet)
|
||||||
@ -145,25 +139,17 @@ func TestModifyVideo(t *testing.T) {
|
|||||||
tweet := create_stable_tweet()
|
tweet := create_stable_tweet()
|
||||||
vid := tweet.Videos[0]
|
vid := tweet.Videos[0]
|
||||||
|
|
||||||
if vid.ID != 1 {
|
if vid.ID != -1 {
|
||||||
t.Fatalf("Got the wrong video back: wanted ID %d, got %d", 1, vid.ID)
|
t.Fatalf("Got the wrong video back: wanted ID %d, got %d", -1, vid.ID)
|
||||||
}
|
}
|
||||||
|
|
||||||
vid.Filename = "local/sdfjk.jpg"
|
|
||||||
vid.IsDownloaded = true
|
vid.IsDownloaded = true
|
||||||
|
|
||||||
// Save the changes
|
// Save the changes
|
||||||
result, err := profile.SaveVideo(vid)
|
err := profile.SaveVideo(vid)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Error(err)
|
t.Error(err)
|
||||||
}
|
}
|
||||||
rows_affected, err := result.RowsAffected()
|
|
||||||
if err != nil {
|
|
||||||
t.Error(err)
|
|
||||||
}
|
|
||||||
if rows_affected != 1 {
|
|
||||||
t.Errorf("Expected 1 row changed, but got %d", rows_affected)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Reload it
|
// Reload it
|
||||||
vids, err := profile.GetVideosForTweet(tweet)
|
vids, err := profile.GetVideosForTweet(tweet)
|
||||||
@ -172,7 +158,7 @@ func TestModifyVideo(t *testing.T) {
|
|||||||
}
|
}
|
||||||
new_vid := vids[0]
|
new_vid := vids[0]
|
||||||
if new_vid.ID != vid.ID {
|
if new_vid.ID != vid.ID {
|
||||||
t.Fatalf("Got the wrong video back: wanted ID %d, got %d", 1, new_vid.ID)
|
t.Fatalf("Got the wrong video back: wanted ID %d, got %d", -1, new_vid.ID)
|
||||||
}
|
}
|
||||||
|
|
||||||
if diff := deep.Equal(vid, new_vid); diff != nil {
|
if diff := deep.Equal(vid, new_vid); diff != nil {
|
||||||
|
@ -68,6 +68,7 @@ create table images (rowid integer primary key,
|
|||||||
);
|
);
|
||||||
|
|
||||||
create table videos (rowid integer primary key,
|
create table videos (rowid integer primary key,
|
||||||
|
id integer unique not null check(typeof(id) = 'integer'),
|
||||||
tweet_id integer not null,
|
tweet_id integer not null,
|
||||||
filename text not null unique,
|
filename text not null unique,
|
||||||
is_downloaded boolean default 0,
|
is_downloaded boolean default 0,
|
||||||
|
@ -45,7 +45,7 @@ func (p Profile) SaveTweet(t scraper.Tweet) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
for _, video := range t.Videos {
|
for _, video := range t.Videos {
|
||||||
_, err := p.SaveVideo(video)
|
err := p.SaveVideo(video)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -28,10 +28,6 @@ func TestSaveAndLoadTweet(t *testing.T) {
|
|||||||
t.Fatalf("Failed to load the tweet: %s", err.Error())
|
t.Fatalf("Failed to load the tweet: %s", err.Error())
|
||||||
}
|
}
|
||||||
|
|
||||||
for i := range tweet.Videos {
|
|
||||||
tweet.Videos[i].ID = new_tweet.Videos[i].ID
|
|
||||||
}
|
|
||||||
|
|
||||||
if diff := deep.Equal(tweet, new_tweet); diff != nil {
|
if diff := deep.Equal(tweet, new_tweet); diff != nil {
|
||||||
t.Error(diff)
|
t.Error(diff)
|
||||||
}
|
}
|
||||||
|
@ -85,7 +85,6 @@ func parse_user_from_row(row *sql.Row) (scraper.User, error) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return u, err
|
return u, err
|
||||||
}
|
}
|
||||||
|
|
||||||
u.JoinDate = time.Unix(joinDate, 0)
|
u.JoinDate = time.Unix(joinDate, 0)
|
||||||
|
|
||||||
return u, nil
|
return u, nil
|
||||||
|
@ -59,7 +59,7 @@ func create_stable_user() scraper.User {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a semi-stable image based on the given ID
|
* Create a semi-stable Image based on the given ID
|
||||||
*/
|
*/
|
||||||
func create_image_from_id(id int) scraper.Image {
|
func create_image_from_id(id int) scraper.Image {
|
||||||
filename := fmt.Sprintf("image%d.jpg", id)
|
filename := fmt.Sprintf("image%d.jpg", id)
|
||||||
@ -71,6 +71,19 @@ func create_image_from_id(id int) scraper.Image {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a semi-stable Video based on the given ID
|
||||||
|
*/
|
||||||
|
func create_video_from_id(id int) scraper.Video {
|
||||||
|
filename := fmt.Sprintf("video%d.jpg", id)
|
||||||
|
return scraper.Video{
|
||||||
|
ID: scraper.VideoID(id),
|
||||||
|
TweetID: -1,
|
||||||
|
Filename: filename,
|
||||||
|
IsDownloaded: false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a stable tweet with a fixed ID and content
|
* Create a stable tweet with a fixed ID and content
|
||||||
*/
|
*/
|
||||||
@ -85,7 +98,9 @@ func create_stable_tweet() scraper.Tweet {
|
|||||||
NumRetweets: 10,
|
NumRetweets: 10,
|
||||||
NumReplies: 10,
|
NumReplies: 10,
|
||||||
NumQuoteTweets: 10,
|
NumQuoteTweets: 10,
|
||||||
Videos: []scraper.Video{{ID: scraper.VideoID(1), TweetID: tweet_id, Filename: "asdf", IsDownloaded: false}},
|
Videos: []scraper.Video{
|
||||||
|
create_video_from_id(-1),
|
||||||
|
},
|
||||||
Urls: []string{},
|
Urls: []string{},
|
||||||
Images: []scraper.Image{
|
Images: []scraper.Image{
|
||||||
create_image_from_id(-1),
|
create_image_from_id(-1),
|
||||||
@ -133,6 +148,8 @@ func create_dummy_tweet() scraper.Tweet {
|
|||||||
img1.TweetID = tweet_id
|
img1.TweetID = tweet_id
|
||||||
img2 := create_image_from_id(rand.Int())
|
img2 := create_image_from_id(rand.Int())
|
||||||
img2.TweetID = tweet_id
|
img2.TweetID = tweet_id
|
||||||
|
vid := create_video_from_id(rand.Int())
|
||||||
|
vid.TweetID = tweet_id
|
||||||
|
|
||||||
return scraper.Tweet{
|
return scraper.Tweet{
|
||||||
ID: tweet_id,
|
ID: tweet_id,
|
||||||
@ -143,7 +160,7 @@ func create_dummy_tweet() scraper.Tweet {
|
|||||||
NumRetweets: 2,
|
NumRetweets: 2,
|
||||||
NumReplies: 3,
|
NumReplies: 3,
|
||||||
NumQuoteTweets: 4,
|
NumQuoteTweets: 4,
|
||||||
Videos: []scraper.Video{scraper.Video{TweetID: tweet_id, Filename: "video" + fmt.Sprint(tweet_id), IsDownloaded: false}},
|
Videos: []scraper.Video{vid},
|
||||||
Urls: []string{"url1", "url2"},
|
Urls: []string{"url1", "url2"},
|
||||||
Images: []scraper.Image{img1, img2},
|
Images: []scraper.Image{img1, img2},
|
||||||
Mentions: []scraper.UserHandle{"mention1", "mention2"},
|
Mentions: []scraper.UserHandle{"mention1", "mention2"},
|
||||||
|
@ -7,6 +7,14 @@ import (
|
|||||||
"strconv"
|
"strconv"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
type APIMedia struct {
|
||||||
|
ID int64 `json:"id_str,string"`
|
||||||
|
MediaURLHttps string `json:"media_url_https"`
|
||||||
|
Type string `json:"type"`
|
||||||
|
URL string `json:"url"`
|
||||||
|
}
|
||||||
|
|
||||||
type SortableVariants []struct {
|
type SortableVariants []struct {
|
||||||
Bitrate int `json:"bitrate,omitempty"`
|
Bitrate int `json:"bitrate,omitempty"`
|
||||||
URL string `json:"url"`
|
URL string `json:"url"`
|
||||||
@ -15,11 +23,13 @@ func (v SortableVariants) Len() int { return len(v) }
|
|||||||
func (v SortableVariants) Swap(i, j int) { v[i], v[j] = v[j], v[i] }
|
func (v SortableVariants) Swap(i, j int) { v[i], v[j] = v[j], v[i] }
|
||||||
func (v SortableVariants) Less(i, j int) bool { return v[i].Bitrate > v[j].Bitrate }
|
func (v SortableVariants) Less(i, j int) bool { return v[i].Bitrate > v[j].Bitrate }
|
||||||
|
|
||||||
type APIMedia struct {
|
type APIExtendedMedia struct {
|
||||||
ID int64 `json:"id_str,string"`
|
ID int64 `json:"id_str,string"`
|
||||||
MediaURLHttps string `json:"media_url_https"`
|
MediaURLHttps string `json:"media_url_https"`
|
||||||
Type string `json:"type"`
|
Type string `json:"type"`
|
||||||
URL string `json:"url"`
|
VideoInfo struct {
|
||||||
|
Variants SortableVariants `json:"variants"`
|
||||||
|
} `json:"video_info"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type APITweet struct {
|
type APITweet struct {
|
||||||
@ -43,14 +53,7 @@ type APITweet struct {
|
|||||||
} `json:"user_mentions"`
|
} `json:"user_mentions"`
|
||||||
} `json:"entities"`
|
} `json:"entities"`
|
||||||
ExtendedEntities struct {
|
ExtendedEntities struct {
|
||||||
Media []struct {
|
Media []APIExtendedMedia `json:"media"`
|
||||||
IDStr string `json:"id_str"`
|
|
||||||
MediaURLHttps string `json:"media_url_https"`
|
|
||||||
Type string `json:"type"`
|
|
||||||
VideoInfo struct {
|
|
||||||
Variants SortableVariants `json:"variants"`
|
|
||||||
} `json:"video_info"`
|
|
||||||
} `json:"media"`
|
|
||||||
} `json:"extended_entities"`
|
} `json:"extended_entities"`
|
||||||
InReplyToStatusID int64 `json:"in_reply_to_status_id_str,string"`
|
InReplyToStatusID int64 `json:"in_reply_to_status_id_str,string"`
|
||||||
InReplyToScreenName string `json:"in_reply_to_screen_name"`
|
InReplyToScreenName string `json:"in_reply_to_screen_name"`
|
||||||
@ -123,7 +126,7 @@ type APIUser struct {
|
|||||||
ListedCount int `json:"listed_count"`
|
ListedCount int `json:"listed_count"`
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
Location string `json:"location"`
|
Location string `json:"location"`
|
||||||
PinnedTweetIdsStr []string `json:"pinned_tweet_ids_str"`
|
PinnedTweetIdsStr []string `json:"pinned_tweet_ids_str"` // Dunno how to type-convert an array
|
||||||
ProfileBannerURL string `json:"profile_banner_url"`
|
ProfileBannerURL string `json:"profile_banner_url"`
|
||||||
ProfileImageURLHTTPS string `json:"profile_image_url_https"`
|
ProfileImageURLHTTPS string `json:"profile_image_url_https"`
|
||||||
Protected bool `json:"protected"`
|
Protected bool `json:"protected"`
|
||||||
|
@ -32,6 +32,7 @@ func TestNormalizeContent(t *testing.T) {
|
|||||||
var tweet scraper.APITweet
|
var tweet scraper.APITweet
|
||||||
err = json.Unmarshal(data, &tweet)
|
err = json.Unmarshal(data, &tweet)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
println("Failed at " + v.filename)
|
||||||
t.Errorf(err.Error())
|
t.Errorf(err.Error())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -19,7 +19,7 @@ func ParseAPIMedia(apiMedia APIMedia) Image {
|
|||||||
local_filename := path.Base(apiMedia.MediaURLHttps)
|
local_filename := path.Base(apiMedia.MediaURLHttps)
|
||||||
return Image{
|
return Image{
|
||||||
ID: ImageID(apiMedia.ID),
|
ID: ImageID(apiMedia.ID),
|
||||||
Filename: apiMedia.MediaURLHttps, // XXX filename
|
Filename: apiMedia.MediaURLHttps, // TODO filename
|
||||||
RemoteURL: apiMedia.MediaURLHttps,
|
RemoteURL: apiMedia.MediaURLHttps,
|
||||||
LocalFilename: local_filename,
|
LocalFilename: local_filename,
|
||||||
IsDownloaded: false,
|
IsDownloaded: false,
|
||||||
|
@ -3,7 +3,6 @@ package scraper
|
|||||||
import (
|
import (
|
||||||
"time"
|
"time"
|
||||||
"fmt"
|
"fmt"
|
||||||
"sort"
|
|
||||||
|
|
||||||
"offline_twitter/terminal_utils"
|
"offline_twitter/terminal_utils"
|
||||||
)
|
)
|
||||||
@ -116,9 +115,8 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
|
|||||||
if len(apiTweet.ExtendedEntities.Media) != 1 {
|
if len(apiTweet.ExtendedEntities.Media) != 1 {
|
||||||
panic(fmt.Sprintf("Surprising ExtendedEntities: %v", apiTweet.ExtendedEntities.Media))
|
panic(fmt.Sprintf("Surprising ExtendedEntities: %v", apiTweet.ExtendedEntities.Media))
|
||||||
}
|
}
|
||||||
variants := apiTweet.ExtendedEntities.Media[0].VideoInfo.Variants
|
new_video := ParseAPIVideo(apiTweet.ExtendedEntities.Media[0], ret.ID)
|
||||||
sort.Sort(variants)
|
ret.Videos = []Video{new_video}
|
||||||
ret.Videos = []Video{Video{TweetID: ret.ID, Filename: variants[0].URL}}
|
|
||||||
ret.Images = []Image{}
|
ret.Images = []Image{}
|
||||||
}
|
}
|
||||||
return
|
return
|
||||||
|
@ -2,17 +2,39 @@ package scraper
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"sort"
|
||||||
)
|
)
|
||||||
|
|
||||||
type VideoID int
|
type VideoID int64
|
||||||
|
|
||||||
|
// TODO video-source-user: extract source user information (e.g., someone shares a video
|
||||||
|
// from someone else).
|
||||||
|
|
||||||
type Video struct {
|
type Video struct {
|
||||||
ID VideoID
|
ID VideoID
|
||||||
TweetID TweetID
|
TweetID TweetID
|
||||||
Filename string
|
Filename string // TODO video-filename: delete when it all works
|
||||||
|
RemoteURL string
|
||||||
|
LocalFilename string
|
||||||
IsDownloaded bool
|
IsDownloaded bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func (v Video) FilenameWhenDownloaded() string {
|
func ParseAPIVideo(apiVideo APIExtendedMedia, tweet_id TweetID) Video {
|
||||||
|
variants := apiVideo.VideoInfo.Variants
|
||||||
|
sort.Sort(variants)
|
||||||
|
|
||||||
|
local_filename := fmt.Sprintf("%d.mp4", tweet_id)
|
||||||
|
|
||||||
|
return Video{
|
||||||
|
ID: VideoID(apiVideo.ID),
|
||||||
|
TweetID: tweet_id,
|
||||||
|
Filename: variants[0].URL,
|
||||||
|
RemoteURL: variants[0].URL,
|
||||||
|
LocalFilename: local_filename,
|
||||||
|
IsDownloaded: false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (v Video) FilenameWhenDownloaded() string { // TODO video-filename: delete whole method and associated test
|
||||||
return fmt.Sprintf("%d.mp4", v.TweetID)
|
return fmt.Sprintf("%d.mp4", v.TweetID)
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user