Make videos their own table; store as a slice on Tweet rather than text field
This commit is contained in:
parent
647dd8aa6b
commit
0c87428c44
@ -14,26 +14,28 @@ create table users (rowid integer primary key,
|
|||||||
is_verified boolean default 0,
|
is_verified boolean default 0,
|
||||||
profile_image_url text,
|
profile_image_url text,
|
||||||
banner_image_url text,
|
banner_image_url text,
|
||||||
pinned_tweet_id integer check(typeof(pinned_tweet_id) = 'integer' or pinned_tweet_id = '')
|
pinned_tweet_id integer check(typeof(pinned_tweet_id) = 'integer' or pinned_tweet_id = ''),
|
||||||
|
|
||||||
|
is_content_downloaded boolean default 0
|
||||||
|
|
||||||
-- foreign key(pinned_tweet_id) references tweets(id)
|
-- foreign key(pinned_tweet_id) references tweets(id)
|
||||||
);
|
);
|
||||||
|
|
||||||
create table tweets (rowid integer primary key,
|
create table tweets (rowid integer primary key,
|
||||||
id integer unique not null check(typeof(id) = 'integer'),
|
id integer unique not null check(typeof(id) = 'integer'),
|
||||||
user_id integer not null check(typeof(id) = 'integer'),
|
user_id integer not null check(typeof(user_id) = 'integer'),
|
||||||
text text not null,
|
text text not null,
|
||||||
posted_at integer,
|
posted_at integer,
|
||||||
num_likes integer,
|
num_likes integer,
|
||||||
num_retweets integer,
|
num_retweets integer,
|
||||||
num_replies integer,
|
num_replies integer,
|
||||||
num_quote_tweets integer,
|
num_quote_tweets integer,
|
||||||
video_url text,
|
|
||||||
in_reply_to integer,
|
in_reply_to integer,
|
||||||
quoted_tweet integer,
|
quoted_tweet integer,
|
||||||
mentions text, -- comma-separated
|
mentions text, -- comma-separated
|
||||||
hashtags text, -- comma-separated
|
hashtags text, -- comma-separated
|
||||||
|
|
||||||
|
is_content_downloaded boolean default 0,
|
||||||
foreign key(user_id) references users(id)
|
foreign key(user_id) references users(id)
|
||||||
-- foreign key(in_reply_to) references tweets(id),
|
-- foreign key(in_reply_to) references tweets(id),
|
||||||
-- foreign key(quoted_tweet) references tweets(id)
|
-- foreign key(quoted_tweet) references tweets(id)
|
||||||
@ -59,6 +61,16 @@ create table urls (rowid integer primary key,
|
|||||||
create table images (rowid integer primary key,
|
create table images (rowid integer primary key,
|
||||||
tweet_id integer not null,
|
tweet_id integer not null,
|
||||||
filename text not null,
|
filename text not null,
|
||||||
|
is_downloaded,
|
||||||
|
|
||||||
|
unique (tweet_id, filename)
|
||||||
|
foreign key(tweet_id) references tweets(id)
|
||||||
|
);
|
||||||
|
|
||||||
|
create table videos (rowid integer primary key,
|
||||||
|
tweet_id integer not null,
|
||||||
|
filename text not null,
|
||||||
|
is_downloaded,
|
||||||
|
|
||||||
unique (tweet_id, filename)
|
unique (tweet_id, filename)
|
||||||
foreign key(tweet_id) references tweets(id)
|
foreign key(tweet_id) references tweets(id)
|
||||||
|
@ -17,15 +17,15 @@ func (p Profile) SaveTweet(t scraper.Tweet) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
_, err = db.Exec(`
|
_, err = db.Exec(`
|
||||||
insert into tweets (id, user_id, text, posted_at, num_likes, num_retweets, num_replies, num_quote_tweets, video_url, in_reply_to, quoted_tweet, mentions, hashtags)
|
insert into tweets (id, user_id, text, posted_at, num_likes, num_retweets, num_replies, num_quote_tweets, in_reply_to, quoted_tweet, mentions, hashtags)
|
||||||
values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||||
on conflict do update
|
on conflict do update
|
||||||
set num_likes=?,
|
set num_likes=?,
|
||||||
num_retweets=?,
|
num_retweets=?,
|
||||||
num_replies=?,
|
num_replies=?,
|
||||||
num_quote_tweets=?
|
num_quote_tweets=?
|
||||||
`,
|
`,
|
||||||
t.ID, t.UserID, t.Text, t.PostedAt.Unix(), t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.Video, t.InReplyTo, t.QuotedTweet, scraper.JoinArrayOfHandles(t.Mentions), strings.Join(t.Hashtags, ","),
|
t.ID, t.UserID, t.Text, t.PostedAt.Unix(), t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.InReplyTo, t.QuotedTweet, scraper.JoinArrayOfHandles(t.Mentions), strings.Join(t.Hashtags, ","),
|
||||||
t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets,
|
t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -44,12 +44,19 @@ func (p Profile) SaveTweet(t scraper.Tweet) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
for _, video := range t.Videos {
|
||||||
|
_, err := db.Exec("insert into videos (tweet_id, filename) values (?, ?) on conflict do nothing", t.ID, video)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
for _, hashtag := range t.Hashtags {
|
for _, hashtag := range t.Hashtags {
|
||||||
_, err := db.Exec("insert into hashtags (tweet_id, text) values (?, ?) on conflict do nothing", t.ID, hashtag)
|
_, err := db.Exec("insert into hashtags (tweet_id, text) values (?, ?) on conflict do nothing", t.ID, hashtag)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
err = tx.Commit()
|
err = tx.Commit()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
@ -96,6 +103,30 @@ func (p Profile) attach_images(t *scraper.Tweet) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (p Profile) attach_videos(t *scraper.Tweet) error {
|
||||||
|
println("Attaching videos")
|
||||||
|
stmt, err := p.DB.Prepare("select filename from videos where tweet_id = ?")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer stmt.Close()
|
||||||
|
rows, err := stmt.Query(t.ID)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
var video string
|
||||||
|
for rows.Next() {
|
||||||
|
err = rows.Scan(&video)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
println(video)
|
||||||
|
t.Videos = append(t.Videos, video)
|
||||||
|
fmt.Printf("%v\n", t.Videos)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func (p Profile) attach_urls(t *scraper.Tweet) error {
|
func (p Profile) attach_urls(t *scraper.Tweet) error {
|
||||||
println("Attaching urls")
|
println("Attaching urls")
|
||||||
stmt, err := p.DB.Prepare("select text from urls where tweet_id = ?")
|
stmt, err := p.DB.Prepare("select text from urls where tweet_id = ?")
|
||||||
@ -124,7 +155,7 @@ func (p Profile) GetTweetById(id scraper.TweetID) (scraper.Tweet, error) {
|
|||||||
db := p.DB
|
db := p.DB
|
||||||
|
|
||||||
stmt, err := db.Prepare(`
|
stmt, err := db.Prepare(`
|
||||||
select id, user_id, text, posted_at, num_likes, num_retweets, num_replies, num_quote_tweets, video_url, in_reply_to, quoted_tweet, mentions, hashtags
|
select id, user_id, text, posted_at, num_likes, num_retweets, num_replies, num_quote_tweets, in_reply_to, quoted_tweet, mentions, hashtags
|
||||||
from tweets
|
from tweets
|
||||||
where id = ?
|
where id = ?
|
||||||
`)
|
`)
|
||||||
@ -142,7 +173,7 @@ func (p Profile) GetTweetById(id scraper.TweetID) (scraper.Tweet, error) {
|
|||||||
var user_id int64
|
var user_id int64
|
||||||
|
|
||||||
row := stmt.QueryRow(id)
|
row := stmt.QueryRow(id)
|
||||||
err = row.Scan(&tweet_id, &user_id, &t.Text, &postedAt, &t.NumLikes, &t.NumRetweets, &t.NumReplies, &t.NumQuoteTweets, &t.Video, &t.InReplyTo, &t.QuotedTweet, &mentions, &hashtags)
|
err = row.Scan(&tweet_id, &user_id, &t.Text, &postedAt, &t.NumLikes, &t.NumRetweets, &t.NumReplies, &t.NumQuoteTweets, &t.InReplyTo, &t.QuotedTweet, &mentions, &hashtags)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return t, err
|
return t, err
|
||||||
}
|
}
|
||||||
@ -159,6 +190,10 @@ func (p Profile) GetTweetById(id scraper.TweetID) (scraper.Tweet, error) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return t, err
|
return t, err
|
||||||
}
|
}
|
||||||
|
err = p.attach_videos(&t)
|
||||||
|
if err != nil {
|
||||||
|
return t, err
|
||||||
|
}
|
||||||
err = p.attach_urls(&t)
|
err = p.attach_urls(&t)
|
||||||
return t, err
|
return t, err
|
||||||
}
|
}
|
||||||
|
@ -28,13 +28,13 @@ func TestSaveAndLoadTweet(t *testing.T) {
|
|||||||
// Save the tweet
|
// Save the tweet
|
||||||
err = profile.SaveTweet(tweet)
|
err = profile.SaveTweet(tweet)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("Failed to save the tweet: %s", err.Error())
|
t.Fatalf("Failed to save the tweet: %s", err.Error())
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reload the tweet
|
// Reload the tweet
|
||||||
new_tweet, err := profile.GetTweetById(tweet.ID)
|
new_tweet, err := profile.GetTweetById(tweet.ID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("Failed to load the tweet: %s", err.Error())
|
t.Fatalf("Failed to load the tweet: %s", err.Error())
|
||||||
}
|
}
|
||||||
|
|
||||||
if diff := deep.Equal(tweet, new_tweet); diff != nil {
|
if diff := deep.Equal(tweet, new_tweet); diff != nil {
|
||||||
|
@ -69,7 +69,7 @@ func create_dummy_tweet() scraper.Tweet {
|
|||||||
NumRetweets: 2,
|
NumRetweets: 2,
|
||||||
NumReplies: 3,
|
NumReplies: 3,
|
||||||
NumQuoteTweets: 4,
|
NumQuoteTweets: 4,
|
||||||
Video: "video",
|
Videos: []string{"video"},
|
||||||
Urls: []string{"url1", "url2"},
|
Urls: []string{"url1", "url2"},
|
||||||
Images: []string{"image1", "image2"},
|
Images: []string{"image1", "image2"},
|
||||||
Mentions: []scraper.UserHandle{"mention1", "mention2"},
|
Mentions: []scraper.UserHandle{"mention1", "mention2"},
|
||||||
|
@ -22,11 +22,11 @@ type Tweet struct {
|
|||||||
NumRetweets int
|
NumRetweets int
|
||||||
NumReplies int
|
NumReplies int
|
||||||
NumQuoteTweets int
|
NumQuoteTweets int
|
||||||
Video string
|
|
||||||
InReplyTo TweetID
|
InReplyTo TweetID
|
||||||
|
|
||||||
Urls []string
|
Urls []string
|
||||||
Images []string
|
Images []string
|
||||||
|
Videos []string
|
||||||
Mentions []UserHandle
|
Mentions []UserHandle
|
||||||
Hashtags []string
|
Hashtags []string
|
||||||
QuotedTweet TweetID
|
QuotedTweet TweetID
|
||||||
@ -116,7 +116,7 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
|
|||||||
}
|
}
|
||||||
variants := apiTweet.ExtendedEntities.Media[0].VideoInfo.Variants
|
variants := apiTweet.ExtendedEntities.Media[0].VideoInfo.Variants
|
||||||
sort.Sort(variants)
|
sort.Sort(variants)
|
||||||
ret.Video = variants[0].URL
|
ret.Videos = []string{variants[0].URL}
|
||||||
ret.Images = []string{}
|
ret.Images = []string{}
|
||||||
}
|
}
|
||||||
return
|
return
|
||||||
|
@ -153,8 +153,8 @@ func TestParseTweetWithVideo(t *testing.T) {
|
|||||||
t.Errorf(err.Error())
|
t.Errorf(err.Error())
|
||||||
}
|
}
|
||||||
expected_video := "https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12"
|
expected_video := "https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12"
|
||||||
if tweet.Video != expected_video {
|
if len(tweet.Videos) != 1 || tweet.Videos[0] != expected_video {
|
||||||
t.Errorf("Expected video %q, but got %q", expected_video, tweet.Video)
|
t.Errorf("Expected video %q, but got %+v", expected_video, tweet.Videos)
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(tweet.Images) != 0 {
|
if len(tweet.Images) != 0 {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user