Add video and image types
- in addition to `Filename` (string), includes IsDownloaded (bool)
This commit is contained in:
parent
0c87428c44
commit
81392996bb
@ -61,7 +61,7 @@ create table urls (rowid integer primary key,
|
|||||||
create table images (rowid integer primary key,
|
create table images (rowid integer primary key,
|
||||||
tweet_id integer not null,
|
tweet_id integer not null,
|
||||||
filename text not null,
|
filename text not null,
|
||||||
is_downloaded,
|
is_downloaded boolean default 0,
|
||||||
|
|
||||||
unique (tweet_id, filename)
|
unique (tweet_id, filename)
|
||||||
foreign key(tweet_id) references tweets(id)
|
foreign key(tweet_id) references tweets(id)
|
||||||
@ -70,7 +70,7 @@ create table images (rowid integer primary key,
|
|||||||
create table videos (rowid integer primary key,
|
create table videos (rowid integer primary key,
|
||||||
tweet_id integer not null,
|
tweet_id integer not null,
|
||||||
filename text not null,
|
filename text not null,
|
||||||
is_downloaded,
|
is_downloaded boolean default 0,
|
||||||
|
|
||||||
unique (tweet_id, filename)
|
unique (tweet_id, filename)
|
||||||
foreign key(tweet_id) references tweets(id)
|
foreign key(tweet_id) references tweets(id)
|
||||||
|
@ -39,13 +39,13 @@ func (p Profile) SaveTweet(t scraper.Tweet) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
for _, image := range t.Images {
|
for _, image := range t.Images {
|
||||||
_, err := db.Exec("insert into images (tweet_id, filename) values (?, ?) on conflict do nothing", t.ID, image)
|
_, err := db.Exec("insert into images (tweet_id, filename) values (?, ?) on conflict do nothing", t.ID, image.Filename)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for _, video := range t.Videos {
|
for _, video := range t.Videos {
|
||||||
_, err := db.Exec("insert into videos (tweet_id, filename) values (?, ?) on conflict do nothing", t.ID, video)
|
_, err := db.Exec("insert into videos (tweet_id, filename) values (?, ?) on conflict do nothing", t.ID, video.Filename)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -81,7 +81,7 @@ func (p Profile) IsTweetInDatabase(id scraper.TweetID) bool {
|
|||||||
|
|
||||||
func (p Profile) attach_images(t *scraper.Tweet) error {
|
func (p Profile) attach_images(t *scraper.Tweet) error {
|
||||||
println("Attaching images")
|
println("Attaching images")
|
||||||
stmt, err := p.DB.Prepare("select filename from images where tweet_id = ?")
|
stmt, err := p.DB.Prepare("select filename, is_downloaded from images where tweet_id = ?")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -90,14 +90,15 @@ func (p Profile) attach_images(t *scraper.Tweet) error {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
var img string
|
var filename string
|
||||||
|
var is_downloaded bool
|
||||||
for rows.Next() {
|
for rows.Next() {
|
||||||
err = rows.Scan(&img)
|
err = rows.Scan(&filename, &is_downloaded)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
println(img)
|
new_img := scraper.Image{TweetID: t.ID, Filename: filename, IsDownloaded: is_downloaded}
|
||||||
t.Images = append(t.Images, img)
|
t.Images = append(t.Images, new_img)
|
||||||
fmt.Printf("%v\n", t.Images)
|
fmt.Printf("%v\n", t.Images)
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
@ -105,7 +106,7 @@ func (p Profile) attach_images(t *scraper.Tweet) error {
|
|||||||
|
|
||||||
func (p Profile) attach_videos(t *scraper.Tweet) error {
|
func (p Profile) attach_videos(t *scraper.Tweet) error {
|
||||||
println("Attaching videos")
|
println("Attaching videos")
|
||||||
stmt, err := p.DB.Prepare("select filename from videos where tweet_id = ?")
|
stmt, err := p.DB.Prepare("select filename, is_downloaded from videos where tweet_id = ?")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -114,14 +115,15 @@ func (p Profile) attach_videos(t *scraper.Tweet) error {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
var video string
|
var filename string
|
||||||
|
var is_downloaded bool
|
||||||
for rows.Next() {
|
for rows.Next() {
|
||||||
err = rows.Scan(&video)
|
err = rows.Scan(&filename, &is_downloaded)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
println(video)
|
new_video := scraper.Video{TweetID: t.ID, Filename: filename, IsDownloaded: is_downloaded}
|
||||||
t.Videos = append(t.Videos, video)
|
t.Videos = append(t.Videos, new_video)
|
||||||
fmt.Printf("%v\n", t.Videos)
|
fmt.Printf("%v\n", t.Videos)
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
@ -144,7 +146,6 @@ func (p Profile) attach_urls(t *scraper.Tweet) error {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
println(url)
|
|
||||||
t.Urls = append(t.Urls, url)
|
t.Urls = append(t.Urls, url)
|
||||||
fmt.Printf("%v\n", t.Urls)
|
fmt.Printf("%v\n", t.Urls)
|
||||||
}
|
}
|
||||||
@ -199,6 +200,9 @@ func (p Profile) GetTweetById(id scraper.TweetID) (scraper.Tweet, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Populate the `User` field on a tweet with an actual User
|
||||||
|
*/
|
||||||
func (p Profile) LoadUserFor(t *scraper.Tweet) error {
|
func (p Profile) LoadUserFor(t *scraper.Tweet) error {
|
||||||
if t.User != nil {
|
if t.User != nil {
|
||||||
// Already there, no need to load it
|
// Already there, no need to load it
|
||||||
|
@ -58,10 +58,10 @@ func create_dummy_user() scraper.User {
|
|||||||
*/
|
*/
|
||||||
func create_dummy_tweet() scraper.Tweet {
|
func create_dummy_tweet() scraper.Tweet {
|
||||||
rand.Seed(time.Now().UnixNano())
|
rand.Seed(time.Now().UnixNano())
|
||||||
tweet_id := fmt.Sprint(rand.Int())
|
tweet_id := scraper.TweetID(fmt.Sprint(rand.Int()))
|
||||||
|
|
||||||
return scraper.Tweet{
|
return scraper.Tweet{
|
||||||
ID: scraper.TweetID(tweet_id),
|
ID: tweet_id,
|
||||||
UserID: "user",
|
UserID: "user",
|
||||||
Text: "text",
|
Text: "text",
|
||||||
PostedAt: time.Now().Truncate(1e9), // Round to nearest second
|
PostedAt: time.Now().Truncate(1e9), // Round to nearest second
|
||||||
@ -69,9 +69,12 @@ func create_dummy_tweet() scraper.Tweet {
|
|||||||
NumRetweets: 2,
|
NumRetweets: 2,
|
||||||
NumReplies: 3,
|
NumReplies: 3,
|
||||||
NumQuoteTweets: 4,
|
NumQuoteTweets: 4,
|
||||||
Videos: []string{"video"},
|
Videos: []scraper.Video{scraper.Video{TweetID: tweet_id, Filename: "video", IsDownloaded: false}},
|
||||||
Urls: []string{"url1", "url2"},
|
Urls: []string{"url1", "url2"},
|
||||||
Images: []string{"image1", "image2"},
|
Images: []scraper.Image{
|
||||||
|
scraper.Image{TweetID: tweet_id, Filename: "image1", IsDownloaded: false},
|
||||||
|
scraper.Image{TweetID: tweet_id, Filename: "image2", IsDownloaded: false},
|
||||||
|
},
|
||||||
Mentions: []scraper.UserHandle{"mention1", "mention2"},
|
Mentions: []scraper.UserHandle{"mention1", "mention2"},
|
||||||
Hashtags: []string{"hash1", "hash2"},
|
Hashtags: []string{"hash1", "hash2"},
|
||||||
}
|
}
|
||||||
|
7
scraper/image.go
Normal file
7
scraper/image.go
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
package scraper
|
||||||
|
|
||||||
|
type Image struct {
|
||||||
|
TweetID TweetID
|
||||||
|
Filename string
|
||||||
|
IsDownloaded bool
|
||||||
|
}
|
@ -25,8 +25,8 @@ type Tweet struct {
|
|||||||
InReplyTo TweetID
|
InReplyTo TweetID
|
||||||
|
|
||||||
Urls []string
|
Urls []string
|
||||||
Images []string
|
Images []Image
|
||||||
Videos []string
|
Videos []Video
|
||||||
Mentions []UserHandle
|
Mentions []UserHandle
|
||||||
Hashtags []string
|
Hashtags []string
|
||||||
QuotedTweet TweetID
|
QuotedTweet TweetID
|
||||||
@ -96,7 +96,8 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
|
|||||||
panic_str := fmt.Sprintf("Unknown media type: %q", media.Type)
|
panic_str := fmt.Sprintf("Unknown media type: %q", media.Type)
|
||||||
panic(panic_str)
|
panic(panic_str)
|
||||||
}
|
}
|
||||||
ret.Images = append(ret.Images, media.MediaURLHttps)
|
new_image := Image{TweetID: ret.ID, Filename: media.MediaURLHttps, IsDownloaded: false}
|
||||||
|
ret.Images = append(ret.Images, new_image)
|
||||||
}
|
}
|
||||||
for _, hashtag := range apiTweet.Entities.Hashtags {
|
for _, hashtag := range apiTweet.Entities.Hashtags {
|
||||||
ret.Hashtags = append(ret.Hashtags, hashtag.Text)
|
ret.Hashtags = append(ret.Hashtags, hashtag.Text)
|
||||||
@ -116,8 +117,8 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
|
|||||||
}
|
}
|
||||||
variants := apiTweet.ExtendedEntities.Media[0].VideoInfo.Variants
|
variants := apiTweet.ExtendedEntities.Media[0].VideoInfo.Variants
|
||||||
sort.Sort(variants)
|
sort.Sort(variants)
|
||||||
ret.Videos = []string{variants[0].URL}
|
ret.Videos = []Video{Video{TweetID: ret.ID, Filename: variants[0].URL}}
|
||||||
ret.Images = []string{}
|
ret.Images = []Image{}
|
||||||
}
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -153,7 +153,7 @@ func TestParseTweetWithVideo(t *testing.T) {
|
|||||||
t.Errorf(err.Error())
|
t.Errorf(err.Error())
|
||||||
}
|
}
|
||||||
expected_video := "https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12"
|
expected_video := "https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12"
|
||||||
if len(tweet.Videos) != 1 || tweet.Videos[0] != expected_video {
|
if len(tweet.Videos) != 1 || tweet.Videos[0].Filename != expected_video {
|
||||||
t.Errorf("Expected video %q, but got %+v", expected_video, tweet.Videos)
|
t.Errorf("Expected video %q, but got %+v", expected_video, tweet.Videos)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
7
scraper/video.go
Normal file
7
scraper/video.go
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
package scraper
|
||||||
|
|
||||||
|
type Video struct {
|
||||||
|
TweetID TweetID
|
||||||
|
Filename string
|
||||||
|
IsDownloaded bool
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user