Add persistence for new Url type
This commit is contained in:
parent
79f098450e
commit
05c3f2289b
16
cmd/tests.sh
16
cmd/tests.sh
@ -114,4 +114,20 @@ tw fetch_user HbdNrx
|
|||||||
test $(sqlite3 twitter.db "select is_private from users where handle = 'HbdNrx'") = "1"
|
test $(sqlite3 twitter.db "select is_private from users where handle = 'HbdNrx'") = "1"
|
||||||
|
|
||||||
|
|
||||||
|
# Test tweets with URLs
|
||||||
|
urls_count=$(sqlite3 twitter.db "select count(*) from urls")
|
||||||
|
tw fetch_tweet https://twitter.com/CovfefeAnon/status/1428904664645394433
|
||||||
|
urls_count_after=$(sqlite3 twitter.db "select count(*) from urls")
|
||||||
|
test $urls_count_after = $(($urls_count + 1))
|
||||||
|
test "$(sqlite3 twitter.db "select title from urls where tweet_id = 1428904664645394433")" = "Justice Department investigating Elon Musk's SpaceX following complaint of hiring discrimination"
|
||||||
|
test $(sqlite3 twitter.db "select thumbnail_remote_url from urls where tweet_id = 1428904664645394433") = "https://pbs.twimg.com/card_img/1436430370946392064/WX1Rv2AJ?format=jpg&name=800x320_1"
|
||||||
|
|
||||||
|
# Try to double-fetch it; shouldn't duplicate the URL
|
||||||
|
tw fetch_tweet https://twitter.com/CovfefeAnon/status/1428904664645394433
|
||||||
|
urls_count_after_2x=$(sqlite3 twitter.db "select count(*) from urls")
|
||||||
|
test $urls_count_after_2x = $urls_count_after
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: Maybe this file should be broken up into multiple test scripts
|
||||||
|
|
||||||
echo -e "\033[32mAll tests passed. Finished successfully.\033[0m"
|
echo -e "\033[32mAll tests passed. Finished successfully.\033[0m"
|
||||||
|
@ -42,6 +42,22 @@ func (p Profile) SaveVideo(vid scraper.Video) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Save an Url
|
||||||
|
*/
|
||||||
|
func (p Profile) SaveUrl(url scraper.Url) error {
|
||||||
|
_, err := p.DB.Exec(`
|
||||||
|
insert into urls (tweet_id, domain, text, title, description, creator_id, site_id, thumbnail_remote_url, thumbnail_local_path, has_card, is_content_downloaded)
|
||||||
|
values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||||
|
on conflict do update
|
||||||
|
set is_content_downloaded=?
|
||||||
|
`,
|
||||||
|
url.TweetID, url.Domain, url.Text, url.Title, url.Description, url.CreatorID, url.SiteID, url.ThumbnailRemoteUrl, url.ThumbnailLocalPath, url.HasCard, url.IsContentDownloaded,
|
||||||
|
url.IsContentDownloaded,
|
||||||
|
)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the list of images for a tweet
|
* Get the list of images for a tweet
|
||||||
*/
|
*/
|
||||||
@ -93,3 +109,28 @@ func (p Profile) GetVideosForTweet(t scraper.Tweet) (vids []scraper.Video, err e
|
|||||||
}
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the list of Urls for a Tweet
|
||||||
|
*/
|
||||||
|
func (p Profile) GetUrlsForTweet(t scraper.Tweet) (urls []scraper.Url, err error) {
|
||||||
|
stmt, err := p.DB.Prepare("select domain, text, title, description, creator_id, site_id, thumbnail_remote_url, thumbnail_local_path, has_card, is_content_downloaded from urls where tweet_id=? order by rowid")
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer stmt.Close()
|
||||||
|
rows, err := stmt.Query(t.ID)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
var url scraper.Url
|
||||||
|
for rows.Next() {
|
||||||
|
err = rows.Scan(&url.Domain, &url.Text, &url.Title, &url.Description, &url.CreatorID, &url.SiteID, &url.ThumbnailRemoteUrl, &url.ThumbnailLocalPath, &url.HasCard, &url.IsContentDownloaded)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
url.TweetID = t.ID
|
||||||
|
urls = append(urls, url)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
@ -165,3 +165,81 @@ func TestModifyVideo(t *testing.T) {
|
|||||||
t.Error(diff)
|
t.Error(diff)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create an Url, save it, reload it, and make sure it comes back the same
|
||||||
|
*/
|
||||||
|
func TestSaveAndLoadUrl(t *testing.T) {
|
||||||
|
profile_path := "test_profiles/TestMediaQueries"
|
||||||
|
profile := create_or_load_profile(profile_path)
|
||||||
|
|
||||||
|
tweet := create_stable_tweet()
|
||||||
|
|
||||||
|
// Create a fresh Url to test on
|
||||||
|
rand.Seed(time.Now().UnixNano())
|
||||||
|
url := create_url_from_id(rand.Int())
|
||||||
|
url.TweetID = tweet.ID
|
||||||
|
|
||||||
|
// Save the Url
|
||||||
|
err := profile.SaveUrl(url)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Failed to save the url: %s", err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reload the Url
|
||||||
|
urls, err := profile.GetUrlsForTweet(tweet)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Could not load urls: %s", err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
var new_url scraper.Url
|
||||||
|
for index := range urls {
|
||||||
|
if urls[index].Text == url.Text {
|
||||||
|
new_url = urls[index]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if new_url.Text != url.Text {
|
||||||
|
t.Fatalf("Could not find url for some reason: %s, %s; %+v", new_url.Text, url.Text, urls)
|
||||||
|
}
|
||||||
|
if diff := deep.Equal(url, new_url); diff != nil {
|
||||||
|
t.Error(diff)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Change an Url, save the changes, reload it, and check if it comes back the same
|
||||||
|
*/
|
||||||
|
func TestModifyUrl(t *testing.T) {
|
||||||
|
profile_path := "test_profiles/TestMediaQueries"
|
||||||
|
profile := create_or_load_profile(profile_path)
|
||||||
|
|
||||||
|
tweet := create_stable_tweet()
|
||||||
|
url := tweet.Urls[0]
|
||||||
|
|
||||||
|
if url.Text != "-1text" {
|
||||||
|
t.Fatalf("Got the wrong url back: wanted %s, got %s!", "-1text", url.Text)
|
||||||
|
}
|
||||||
|
|
||||||
|
url.IsContentDownloaded = true
|
||||||
|
|
||||||
|
// Save the changes
|
||||||
|
err := profile.SaveUrl(url)
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reload it
|
||||||
|
urls, err := profile.GetUrlsForTweet(tweet)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Could not load urls: %s", err.Error())
|
||||||
|
}
|
||||||
|
new_url := urls[0]
|
||||||
|
if new_url.Text != "-1text" {
|
||||||
|
t.Fatalf("Got the wrong url back: wanted %s, got %s!", "-1text", new_url.Text)
|
||||||
|
}
|
||||||
|
|
||||||
|
if diff := deep.Equal(url, new_url); diff != nil {
|
||||||
|
t.Error(diff)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -54,10 +54,21 @@ create table retweets(rowid integer primary key,
|
|||||||
|
|
||||||
create table urls (rowid integer primary key,
|
create table urls (rowid integer primary key,
|
||||||
tweet_id integer not null,
|
tweet_id integer not null,
|
||||||
|
domain text,
|
||||||
text text not null,
|
text text not null,
|
||||||
|
title text,
|
||||||
|
description text,
|
||||||
|
creator_id integer,
|
||||||
|
site_id integer,
|
||||||
|
thumbnail_remote_url text,
|
||||||
|
thumbnail_local_path text,
|
||||||
|
has_card boolean,
|
||||||
|
is_content_downloaded boolean default 0,
|
||||||
|
|
||||||
unique (tweet_id, text)
|
unique (tweet_id, text)
|
||||||
foreign key(tweet_id) references tweets(id)
|
foreign key(tweet_id) references tweets(id)
|
||||||
|
-- foreign key(creator_id) references users(id)
|
||||||
|
-- foreign key(site_id) references users(id)
|
||||||
);
|
);
|
||||||
|
|
||||||
create table images (rowid integer primary key,
|
create table images (rowid integer primary key,
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
package persistence
|
package persistence
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
|
||||||
"time"
|
"time"
|
||||||
"strings"
|
"strings"
|
||||||
"database/sql"
|
"database/sql"
|
||||||
@ -34,7 +33,7 @@ func (p Profile) SaveTweet(t scraper.Tweet) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
for _, url := range t.Urls {
|
for _, url := range t.Urls {
|
||||||
_, err := db.Exec("insert into urls (tweet_id, text) values (?, ?) on conflict do nothing", t.ID, url)
|
err := p.SaveUrl(url)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -80,29 +79,6 @@ func (p Profile) IsTweetInDatabase(id scraper.TweetID) bool {
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p Profile) attach_urls(t *scraper.Tweet) error {
|
|
||||||
println("Attaching urls")
|
|
||||||
stmt, err := p.DB.Prepare("select text from urls where tweet_id = ?")
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
defer stmt.Close()
|
|
||||||
rows, err := stmt.Query(t.ID)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
var url string
|
|
||||||
for rows.Next() {
|
|
||||||
err = rows.Scan(&url)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
t.Urls = append(t.Urls, url)
|
|
||||||
fmt.Printf("%v\n", t.Urls)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p Profile) GetTweetById(id scraper.TweetID) (scraper.Tweet, error) {
|
func (p Profile) GetTweetById(id scraper.TweetID) (scraper.Tweet, error) {
|
||||||
db := p.DB
|
db := p.DB
|
||||||
|
|
||||||
@ -146,7 +122,9 @@ func (p Profile) GetTweetById(id scraper.TweetID) (scraper.Tweet, error) {
|
|||||||
}
|
}
|
||||||
t.Videos = vids
|
t.Videos = vids
|
||||||
|
|
||||||
err = p.attach_urls(&t)
|
urls, err := p.GetUrlsForTweet(t)
|
||||||
|
t.Urls = urls
|
||||||
|
|
||||||
return t, err
|
return t, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -29,12 +29,6 @@ func TestSaveAndLoadTweet(t *testing.T) {
|
|||||||
t.Fatalf("Failed to load the tweet: %s", err.Error())
|
t.Fatalf("Failed to load the tweet: %s", err.Error())
|
||||||
}
|
}
|
||||||
|
|
||||||
if diff := deep.Equal(tweet.Images, new_tweet.Images); diff != nil {
|
|
||||||
t.Error(diff)
|
|
||||||
}
|
|
||||||
if diff := deep.Equal(tweet.Videos, new_tweet.Videos); diff != nil {
|
|
||||||
t.Error(diff)
|
|
||||||
}
|
|
||||||
if diff := deep.Equal(tweet, new_tweet); diff != nil {
|
if diff := deep.Equal(tweet, new_tweet); diff != nil {
|
||||||
t.Error(diff)
|
t.Error(diff)
|
||||||
}
|
}
|
||||||
|
@ -92,6 +92,26 @@ func create_video_from_id(id int) scraper.Video {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a semi-stable Url based on the given ID
|
||||||
|
*/
|
||||||
|
func create_url_from_id(id int) scraper.Url {
|
||||||
|
s := fmt.Sprint(id)
|
||||||
|
return scraper.Url {
|
||||||
|
TweetID: -1,
|
||||||
|
Domain: s + "domain",
|
||||||
|
Text: s + "text",
|
||||||
|
Title: s + "title",
|
||||||
|
Description: s + "description",
|
||||||
|
ThumbnailRemoteUrl: s + "remote url",
|
||||||
|
ThumbnailLocalPath: s + "local path",
|
||||||
|
CreatorID: scraper.UserID(id),
|
||||||
|
SiteID: scraper.UserID(id),
|
||||||
|
HasCard: true,
|
||||||
|
IsContentDownloaded: false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a stable tweet with a fixed ID and content
|
* Create a stable tweet with a fixed ID and content
|
||||||
*/
|
*/
|
||||||
@ -109,7 +129,9 @@ func create_stable_tweet() scraper.Tweet {
|
|||||||
Videos: []scraper.Video{
|
Videos: []scraper.Video{
|
||||||
create_video_from_id(-1),
|
create_video_from_id(-1),
|
||||||
},
|
},
|
||||||
Urls: []string{},
|
Urls: []scraper.Url{
|
||||||
|
create_url_from_id(-1),
|
||||||
|
},
|
||||||
Images: []scraper.Image{
|
Images: []scraper.Image{
|
||||||
create_image_from_id(-1),
|
create_image_from_id(-1),
|
||||||
},
|
},
|
||||||
@ -173,6 +195,11 @@ func create_dummy_tweet() scraper.Tweet {
|
|||||||
vid := create_video_from_id(rand.Int())
|
vid := create_video_from_id(rand.Int())
|
||||||
vid.TweetID = tweet_id
|
vid.TweetID = tweet_id
|
||||||
|
|
||||||
|
url1 := create_url_from_id(rand.Int())
|
||||||
|
url1.TweetID = tweet_id
|
||||||
|
url2 := create_url_from_id(rand.Int())
|
||||||
|
url2.TweetID = tweet_id
|
||||||
|
|
||||||
return scraper.Tweet{
|
return scraper.Tweet{
|
||||||
ID: tweet_id,
|
ID: tweet_id,
|
||||||
UserID: -1,
|
UserID: -1,
|
||||||
@ -183,7 +210,7 @@ func create_dummy_tweet() scraper.Tweet {
|
|||||||
NumReplies: 3,
|
NumReplies: 3,
|
||||||
NumQuoteTweets: 4,
|
NumQuoteTweets: 4,
|
||||||
Videos: []scraper.Video{vid},
|
Videos: []scraper.Video{vid},
|
||||||
Urls: []string{"url1", "url2"},
|
Urls: []scraper.Url{url1, url2},
|
||||||
Images: []scraper.Image{img1, img2},
|
Images: []scraper.Image{img1, img2},
|
||||||
Mentions: []scraper.UserHandle{"mention1", "mention2"},
|
Mentions: []scraper.UserHandle{"mention1", "mention2"},
|
||||||
Hashtags: []string{"hash1", "hash2"},
|
Hashtags: []string{"hash1", "hash2"},
|
||||||
|
1
scraper/test_responses/tweet_with_url_but_no_card.json
Normal file
1
scraper/test_responses/tweet_with_url_but_no_card.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
{"created_at":"Fri Sep 17 00:03:26 +0000 2021","id_str":"1438654793384353793","full_text":"NEW: columnist Jennifer Rubin was one of the Obama administration’s most reactionary critics\n\nNow she’s the Biden admin’s favorite columnist\nW/ @NickNiedz\n\nWe reached out to Rubin her columns and divisions at Wapo over them\n\nHer response—>\nhttps://t.co/ZigZyLctwt https://t.co/KZZAK1tXhq","display_text_range":[0,266],"entities":{"user_mentions":[{"screen_name":"NickNiedz","name":"Nick Niedzwiadek","id_str":"548501303","indices":[144,154]}],"urls":[{"url":"https://t.co/ZigZyLctwt","expanded_url":"https://www.politico.com/newsletters/west-wing-playbook/2021/09/16/the-jennifer-rubin-wh-symbiosis-494364","display_url":"politico.com/newsletters/we…","indices":[243,266]}],"media":[{"id_str":"1438654789596942336","indices":[267,290],"media_url":"http://pbs.twimg.com/media/E_cg6KhXEAAyPjY.jpg","media_url_https":"https://pbs.twimg.com/media/E_cg6KhXEAAyPjY.jpg","url":"https://t.co/KZZAK1tXhq","display_url":"pic.twitter.com/KZZAK1tXhq","expanded_url":"https://twitter.com/AlexThomp/status/1438654793384353793/photo/1","type":"photo","original_info":{"width":1170,"height":1809,"focus_rects":[{"x":0,"y":0,"h":655,"w":1170},{"x":0,"y":0,"h":1170,"w":1170},{"x":0,"y":0,"h":1334,"w":1170},{"x":45,"y":0,"h":1809,"w":905},{"x":0,"y":0,"h":1809,"w":1170}]},"sizes":{"small":{"w":440,"h":680,"resize":"fit"},"medium":{"w":776,"h":1200,"resize":"fit"},"thumb":{"w":150,"h":150,"resize":"crop"},"large":{"w":1170,"h":1809,"resize":"fit"}}}]},"extended_entities":{"media":[{"id_str":"1438654789596942336","indices":[267,290],"media_url":"http://pbs.twimg.com/media/E_cg6KhXEAAyPjY.jpg","media_url_https":"https://pbs.twimg.com/media/E_cg6KhXEAAyPjY.jpg","url":"https://t.co/KZZAK1tXhq","display_url":"pic.twitter.com/KZZAK1tXhq","expanded_url":"https://twitter.com/AlexThomp/status/1438654793384353793/photo/1","type":"photo","original_info":{"width":1170,"height":1809,"focus_rects":[{"x":0,"y":0,"h":655,"w":1170},{"x":0,"y":0,"h":1170,"w":1170},{"x":0,"y":0,"h":1334,"w":1170},{"x":45,"y":0,"h":1809,"w":905},{"x":0,"y":0,"h":1809,"w":1170}]},"sizes":{"small":{"w":440,"h":680,"resize":"fit"},"medium":{"w":776,"h":1200,"resize":"fit"},"thumb":{"w":150,"h":150,"resize":"crop"},"large":{"w":1170,"h":1809,"resize":"fit"}},"media_key":"3_1438654789596942336","ext_alt_text":null,"ext_media_availability":{"status":"available"},"ext_media_color":{"palette":[{"rgb":{"red":252,"green":252,"blue":252},"percentage":99.67},{"rgb":{"red":145,"green":145,"blue":145},"percentage":0.33}]},"ext":{"mediaStats":{"r":"Missing","ttl":-1}}}]},"source":"<a href=\"http://twitter.com/download/iphone\" rel=\"nofollow\">Twitter for iPhone</a>","user_id_str":"370982639","retweet_count":407,"favorite_count":1802,"reply_count":2642,"quote_count":884,"conversation_id_str":"1438654793384353793","possibly_sensitive_editable":true,"lang":"en","self_thread":{"id_str":"1438654793384353793"}}
|
1
scraper/test_responses/tweet_with_url_card.json
Normal file
1
scraper/test_responses/tweet_with_url_card.json
Normal file
File diff suppressed because one or more lines are too long
@ -23,7 +23,7 @@ type Tweet struct {
|
|||||||
NumQuoteTweets int
|
NumQuoteTweets int
|
||||||
InReplyTo TweetID
|
InReplyTo TweetID
|
||||||
|
|
||||||
Urls []string
|
Urls []Url
|
||||||
Images []Image
|
Images []Image
|
||||||
Videos []Video
|
Videos []Video
|
||||||
Mentions []UserHandle
|
Mentions []UserHandle
|
||||||
@ -63,7 +63,7 @@ Replies: %d RT: %d QT: %d Likes: %d
|
|||||||
if len(t.Urls) > 0 {
|
if len(t.Urls) > 0 {
|
||||||
ret += "urls: [\n"
|
ret += "urls: [\n"
|
||||||
for _, url := range(t.Urls) {
|
for _, url := range(t.Urls) {
|
||||||
ret += " " + url + "\n"
|
ret += " " + url.Text + "\n"
|
||||||
}
|
}
|
||||||
ret += "]"
|
ret += "]"
|
||||||
}
|
}
|
||||||
@ -89,8 +89,18 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
|
|||||||
ret.NumQuoteTweets = apiTweet.QuoteCount
|
ret.NumQuoteTweets = apiTweet.QuoteCount
|
||||||
ret.InReplyTo = TweetID(apiTweet.InReplyToStatusID)
|
ret.InReplyTo = TweetID(apiTweet.InReplyToStatusID)
|
||||||
|
|
||||||
for _, url := range apiTweet.Entities.URLs {
|
for i, url := range apiTweet.Entities.URLs {
|
||||||
ret.Urls = append(ret.Urls, url.ExpandedURL)
|
if i != 0 {
|
||||||
|
panic(fmt.Sprintf("Tweet with multiple embedded URLs: %d", apiTweet.ID))
|
||||||
|
}
|
||||||
|
var url_object Url
|
||||||
|
if apiTweet.Card.BindingValues.Domain.Value != "" {
|
||||||
|
// Using the "Domain" field to detect if there is a card
|
||||||
|
url_object = ParseAPIUrlCard(apiTweet.Card)
|
||||||
|
}
|
||||||
|
url_object.Text = url.ExpandedURL
|
||||||
|
url_object.TweetID = ret.ID
|
||||||
|
ret.Urls = append(ret.Urls, url_object)
|
||||||
}
|
}
|
||||||
for _, media := range apiTweet.Entities.Media {
|
for _, media := range apiTweet.Entities.Media {
|
||||||
if media.Type != "photo" { // TODO: remove this eventually
|
if media.Type != "photo" { // TODO: remove this eventually
|
||||||
|
@ -52,6 +52,10 @@ func TestParseSingleTweet(t *testing.T) {
|
|||||||
t.Errorf("Expected %v, got %v", []string{"michaelmalice"}, tweet.Mentions)
|
t.Errorf("Expected %v, got %v", []string{"michaelmalice"}, tweet.Mentions)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if len(tweet.Urls) != 0 {
|
||||||
|
t.Errorf("Expected %d urls, but got %d", 0, len(tweet.Urls))
|
||||||
|
}
|
||||||
|
|
||||||
if tweet.PostedAt.Unix() != 1621639105 {
|
if tweet.PostedAt.Unix() != 1621639105 {
|
||||||
t.Errorf("Expected %d, got %d", 1621639105, tweet.PostedAt.Unix())
|
t.Errorf("Expected %d, got %d", 1621639105, tweet.PostedAt.Unix())
|
||||||
}
|
}
|
||||||
@ -162,6 +166,66 @@ func TestParseTweetWithVideo(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestParseTweetWithUrl(t *testing.T) {
|
||||||
|
data, err := ioutil.ReadFile("test_responses/tweet_with_url_card.json")
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
var apitweet scraper.APITweet
|
||||||
|
err = json.Unmarshal(data, &apitweet)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf(err.Error())
|
||||||
|
}
|
||||||
|
tweet, err := scraper.ParseSingleTweet(apitweet)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf(err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(tweet.Urls) != 1 {
|
||||||
|
t.Errorf("Expected %d urls, but got %d", 1, len(tweet.Urls))
|
||||||
|
}
|
||||||
|
|
||||||
|
expected_url_text := "https://reason.com/2021/08/30/la-teachers-union-cecily-myart-cruz-learning-loss/"
|
||||||
|
if tweet.Urls[0].Text != expected_url_text {
|
||||||
|
t.Errorf("Expected Url text to be %q, but got %q", expected_url_text, tweet.Urls[0].Text)
|
||||||
|
}
|
||||||
|
if !tweet.Urls[0].HasCard {
|
||||||
|
t.Errorf("Expected it to have a card, but it doesn't")
|
||||||
|
}
|
||||||
|
expected_url_domain := "reason.com"
|
||||||
|
if tweet.Urls[0].Domain != expected_url_domain {
|
||||||
|
t.Errorf("Expected Url text to be %q, but got %q", expected_url_domain, tweet.Urls[0].Domain)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseTweetWithUrlButNoCard(t *testing.T) {
|
||||||
|
data, err := ioutil.ReadFile("test_responses/tweet_with_url_but_no_card.json")
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
var apitweet scraper.APITweet
|
||||||
|
err = json.Unmarshal(data, &apitweet)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf(err.Error())
|
||||||
|
}
|
||||||
|
tweet, err := scraper.ParseSingleTweet(apitweet)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf(err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(tweet.Urls) != 1 {
|
||||||
|
t.Errorf("Expected %d urls, but got %d", 1, len(tweet.Urls))
|
||||||
|
}
|
||||||
|
|
||||||
|
expected_url_text := "https://www.politico.com/newsletters/west-wing-playbook/2021/09/16/the-jennifer-rubin-wh-symbiosis-494364"
|
||||||
|
if tweet.Urls[0].Text != expected_url_text {
|
||||||
|
t.Errorf("Expected Url text to be %q, but got %q", expected_url_text, tweet.Urls[0].Text)
|
||||||
|
}
|
||||||
|
if tweet.Urls[0].HasCard {
|
||||||
|
t.Errorf("Expected url not to have a card, but it thinks it has one")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestParseTweetResponse(t *testing.T) {
|
func TestParseTweetResponse(t *testing.T) {
|
||||||
data, err := ioutil.ReadFile("test_responses/michael_malice_feed.json")
|
data, err := ioutil.ReadFile("test_responses/michael_malice_feed.json")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -7,6 +7,8 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
type Url struct {
|
type Url struct {
|
||||||
|
TweetID TweetID
|
||||||
|
|
||||||
Domain string
|
Domain string
|
||||||
Text string
|
Text string
|
||||||
Title string
|
Title string
|
||||||
|
Loading…
x
Reference in New Issue
Block a user