From 81392996bb81ffec156dac03776b38c2dee113ef Mon Sep 17 00:00:00 2001 From: Alessio Date: Mon, 26 Jul 2021 17:26:39 -0700 Subject: [PATCH] Add video and image types - in addition to `Filename` (string), includes IsDownloaded (bool) --- persistence/schema.sql | 4 ++-- persistence/tweet_queries.go | 30 +++++++++++++++++------------- persistence/utils_test.go | 11 +++++++---- scraper/image.go | 7 +++++++ scraper/tweet.go | 11 ++++++----- scraper/tweet_test.go | 2 +- scraper/video.go | 7 +++++++ 7 files changed, 47 insertions(+), 25 deletions(-) create mode 100644 scraper/image.go create mode 100644 scraper/video.go diff --git a/persistence/schema.sql b/persistence/schema.sql index 28a594d..8b2ad9c 100644 --- a/persistence/schema.sql +++ b/persistence/schema.sql @@ -61,7 +61,7 @@ create table urls (rowid integer primary key, create table images (rowid integer primary key, tweet_id integer not null, filename text not null, - is_downloaded, + is_downloaded boolean default 0, unique (tweet_id, filename) foreign key(tweet_id) references tweets(id) @@ -70,7 +70,7 @@ create table images (rowid integer primary key, create table videos (rowid integer primary key, tweet_id integer not null, filename text not null, - is_downloaded, + is_downloaded boolean default 0, unique (tweet_id, filename) foreign key(tweet_id) references tweets(id) diff --git a/persistence/tweet_queries.go b/persistence/tweet_queries.go index a961623..2750442 100644 --- a/persistence/tweet_queries.go +++ b/persistence/tweet_queries.go @@ -39,13 +39,13 @@ func (p Profile) SaveTweet(t scraper.Tweet) error { } } for _, image := range t.Images { - _, err := db.Exec("insert into images (tweet_id, filename) values (?, ?) on conflict do nothing", t.ID, image) + _, err := db.Exec("insert into images (tweet_id, filename) values (?, ?) on conflict do nothing", t.ID, image.Filename) if err != nil { return err } } for _, video := range t.Videos { - _, err := db.Exec("insert into videos (tweet_id, filename) values (?, ?) on conflict do nothing", t.ID, video) + _, err := db.Exec("insert into videos (tweet_id, filename) values (?, ?) on conflict do nothing", t.ID, video.Filename) if err != nil { return err } @@ -81,7 +81,7 @@ func (p Profile) IsTweetInDatabase(id scraper.TweetID) bool { func (p Profile) attach_images(t *scraper.Tweet) error { println("Attaching images") - stmt, err := p.DB.Prepare("select filename from images where tweet_id = ?") + stmt, err := p.DB.Prepare("select filename, is_downloaded from images where tweet_id = ?") if err != nil { return err } @@ -90,14 +90,15 @@ func (p Profile) attach_images(t *scraper.Tweet) error { if err != nil { return err } - var img string + var filename string + var is_downloaded bool for rows.Next() { - err = rows.Scan(&img) + err = rows.Scan(&filename, &is_downloaded) if err != nil { return err } - println(img) - t.Images = append(t.Images, img) + new_img := scraper.Image{TweetID: t.ID, Filename: filename, IsDownloaded: is_downloaded} + t.Images = append(t.Images, new_img) fmt.Printf("%v\n", t.Images) } return nil @@ -105,7 +106,7 @@ func (p Profile) attach_images(t *scraper.Tweet) error { func (p Profile) attach_videos(t *scraper.Tweet) error { println("Attaching videos") - stmt, err := p.DB.Prepare("select filename from videos where tweet_id = ?") + stmt, err := p.DB.Prepare("select filename, is_downloaded from videos where tweet_id = ?") if err != nil { return err } @@ -114,14 +115,15 @@ func (p Profile) attach_videos(t *scraper.Tweet) error { if err != nil { return err } - var video string + var filename string + var is_downloaded bool for rows.Next() { - err = rows.Scan(&video) + err = rows.Scan(&filename, &is_downloaded) if err != nil { return err } - println(video) - t.Videos = append(t.Videos, video) + new_video := scraper.Video{TweetID: t.ID, Filename: filename, IsDownloaded: is_downloaded} + t.Videos = append(t.Videos, new_video) fmt.Printf("%v\n", t.Videos) } return nil @@ -144,7 +146,6 @@ func (p Profile) attach_urls(t *scraper.Tweet) error { if err != nil { return err } - println(url) t.Urls = append(t.Urls, url) fmt.Printf("%v\n", t.Urls) } @@ -199,6 +200,9 @@ func (p Profile) GetTweetById(id scraper.TweetID) (scraper.Tweet, error) { } +/** + * Populate the `User` field on a tweet with an actual User + */ func (p Profile) LoadUserFor(t *scraper.Tweet) error { if t.User != nil { // Already there, no need to load it diff --git a/persistence/utils_test.go b/persistence/utils_test.go index 2163614..c97c34f 100644 --- a/persistence/utils_test.go +++ b/persistence/utils_test.go @@ -58,10 +58,10 @@ func create_dummy_user() scraper.User { */ func create_dummy_tweet() scraper.Tweet { rand.Seed(time.Now().UnixNano()) - tweet_id := fmt.Sprint(rand.Int()) + tweet_id := scraper.TweetID(fmt.Sprint(rand.Int())) return scraper.Tweet{ - ID: scraper.TweetID(tweet_id), + ID: tweet_id, UserID: "user", Text: "text", PostedAt: time.Now().Truncate(1e9), // Round to nearest second @@ -69,9 +69,12 @@ func create_dummy_tweet() scraper.Tweet { NumRetweets: 2, NumReplies: 3, NumQuoteTweets: 4, - Videos: []string{"video"}, + Videos: []scraper.Video{scraper.Video{TweetID: tweet_id, Filename: "video", IsDownloaded: false}}, Urls: []string{"url1", "url2"}, - Images: []string{"image1", "image2"}, + Images: []scraper.Image{ + scraper.Image{TweetID: tweet_id, Filename: "image1", IsDownloaded: false}, + scraper.Image{TweetID: tweet_id, Filename: "image2", IsDownloaded: false}, + }, Mentions: []scraper.UserHandle{"mention1", "mention2"}, Hashtags: []string{"hash1", "hash2"}, } diff --git a/scraper/image.go b/scraper/image.go new file mode 100644 index 0000000..71fd963 --- /dev/null +++ b/scraper/image.go @@ -0,0 +1,7 @@ +package scraper + +type Image struct { + TweetID TweetID + Filename string + IsDownloaded bool +} diff --git a/scraper/tweet.go b/scraper/tweet.go index b0a1d1d..29801a1 100644 --- a/scraper/tweet.go +++ b/scraper/tweet.go @@ -25,8 +25,8 @@ type Tweet struct { InReplyTo TweetID Urls []string - Images []string - Videos []string + Images []Image + Videos []Video Mentions []UserHandle Hashtags []string QuotedTweet TweetID @@ -96,7 +96,8 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) { panic_str := fmt.Sprintf("Unknown media type: %q", media.Type) panic(panic_str) } - ret.Images = append(ret.Images, media.MediaURLHttps) + new_image := Image{TweetID: ret.ID, Filename: media.MediaURLHttps, IsDownloaded: false} + ret.Images = append(ret.Images, new_image) } for _, hashtag := range apiTweet.Entities.Hashtags { ret.Hashtags = append(ret.Hashtags, hashtag.Text) @@ -116,8 +117,8 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) { } variants := apiTweet.ExtendedEntities.Media[0].VideoInfo.Variants sort.Sort(variants) - ret.Videos = []string{variants[0].URL} - ret.Images = []string{} + ret.Videos = []Video{Video{TweetID: ret.ID, Filename: variants[0].URL}} + ret.Images = []Image{} } return } diff --git a/scraper/tweet_test.go b/scraper/tweet_test.go index 63e64ea..e306ad8 100644 --- a/scraper/tweet_test.go +++ b/scraper/tweet_test.go @@ -153,7 +153,7 @@ func TestParseTweetWithVideo(t *testing.T) { t.Errorf(err.Error()) } expected_video := "https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12" - if len(tweet.Videos) != 1 || tweet.Videos[0] != expected_video { + if len(tweet.Videos) != 1 || tweet.Videos[0].Filename != expected_video { t.Errorf("Expected video %q, but got %+v", expected_video, tweet.Videos) } diff --git a/scraper/video.go b/scraper/video.go new file mode 100644 index 0000000..a847efd --- /dev/null +++ b/scraper/video.go @@ -0,0 +1,7 @@ +package scraper + +type Video struct { + TweetID TweetID + Filename string + IsDownloaded bool +}