diff --git a/persistence/media_queries.go b/persistence/media_queries.go index 50b35d3..109a2db 100644 --- a/persistence/media_queries.go +++ b/persistence/media_queries.go @@ -50,12 +50,12 @@ func (p Profile) SaveVideo(vid scraper.Video) error { */ func (p Profile) SaveUrl(url scraper.Url) error { _, err := p.DB.Exec(` - insert into urls (tweet_id, domain, text, title, description, creator_id, site_id, thumbnail_width, thumbnail_height, thumbnail_remote_url, thumbnail_local_path, has_card, has_thumbnail, is_content_downloaded) - values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + insert into urls (tweet_id, domain, text, short_text, title, description, creator_id, site_id, thumbnail_width, thumbnail_height, thumbnail_remote_url, thumbnail_local_path, has_card, has_thumbnail, is_content_downloaded) + values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) on conflict do update set is_content_downloaded=(is_content_downloaded or ?) `, - url.TweetID, url.Domain, url.Text, url.Title, url.Description, url.CreatorID, url.SiteID, url.ThumbnailWidth, url.ThumbnailHeight, url.ThumbnailRemoteUrl, url.ThumbnailLocalPath, url.HasCard, url.HasThumbnail, url.IsContentDownloaded, + url.TweetID, url.Domain, url.Text, url.ShortText, url.Title, url.Description, url.CreatorID, url.SiteID, url.ThumbnailWidth, url.ThumbnailHeight, url.ThumbnailRemoteUrl, url.ThumbnailLocalPath, url.HasCard, url.HasThumbnail, url.IsContentDownloaded, url.IsContentDownloaded, ) return err @@ -138,7 +138,7 @@ func (p Profile) GetVideosForTweet(t scraper.Tweet) (vids []scraper.Video, err e * Get the list of Urls for a Tweet */ func (p Profile) GetUrlsForTweet(t scraper.Tweet) (urls []scraper.Url, err error) { - stmt, err := p.DB.Prepare("select domain, text, title, description, creator_id, site_id, thumbnail_width, thumbnail_height, thumbnail_remote_url, thumbnail_local_path, has_card, has_thumbnail, is_content_downloaded from urls where tweet_id=? order by rowid") + stmt, err := p.DB.Prepare("select domain, text, short_text, title, description, creator_id, site_id, thumbnail_width, thumbnail_height, thumbnail_remote_url, thumbnail_local_path, has_card, has_thumbnail, is_content_downloaded from urls where tweet_id=? order by rowid") if err != nil { return } @@ -149,7 +149,7 @@ func (p Profile) GetUrlsForTweet(t scraper.Tweet) (urls []scraper.Url, err error } var url scraper.Url for rows.Next() { - err = rows.Scan(&url.Domain, &url.Text, &url.Title, &url.Description, &url.CreatorID, &url.SiteID, &url.ThumbnailWidth, &url.ThumbnailHeight, &url.ThumbnailRemoteUrl, &url.ThumbnailLocalPath, &url.HasCard, &url.HasThumbnail, &url.IsContentDownloaded) + err = rows.Scan(&url.Domain, &url.Text, &url.ShortText, &url.Title, &url.Description, &url.CreatorID, &url.SiteID, &url.ThumbnailWidth, &url.ThumbnailHeight, &url.ThumbnailRemoteUrl, &url.ThumbnailLocalPath, &url.HasCard, &url.HasThumbnail, &url.IsContentDownloaded) if err != nil { return } diff --git a/persistence/schema.sql b/persistence/schema.sql index 3c6b5e9..7045ebb 100644 --- a/persistence/schema.sql +++ b/persistence/schema.sql @@ -70,6 +70,7 @@ create table urls (rowid integer primary key, tweet_id integer not null, domain text, text text not null, + short_text text not null default "", title text, description text, creator_id integer, diff --git a/persistence/utils_test.go b/persistence/utils_test.go index 36d9af2..fcf1c4f 100644 --- a/persistence/utils_test.go +++ b/persistence/utils_test.go @@ -110,6 +110,7 @@ func create_url_from_id(id int) scraper.Url { TweetID: -1, Domain: s + "domain", Text: s + "text", + ShortText: s + "shorttext", Title: s + "title", Description: s + "description", ThumbnailWidth: id * 23, diff --git a/persistence/versions.go b/persistence/versions.go index 808130f..f105f30 100644 --- a/persistence/versions.go +++ b/persistence/versions.go @@ -8,7 +8,7 @@ import ( ) -const ENGINE_DATABASE_VERSION = 6 +const ENGINE_DATABASE_VERSION = 7 type VersionMismatchError struct { @@ -59,6 +59,7 @@ var MIGRATIONS = []string{ `alter table videos add column duration integer not null default 0; alter table videos add column view_count integer not null default 0`, `alter table users add column is_banned boolean default 0`, +`alter table urls add column short_text text not null default ""`, } /** diff --git a/scraper/tweet.go b/scraper/tweet.go index e7259ad..b4c7a59 100644 --- a/scraper/tweet.go +++ b/scraper/tweet.go @@ -108,6 +108,7 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) { url_object = ParseAPIUrlCard(apiTweet.Card) } url_object.Text = url.ExpandedURL + url_object.ShortText = url.ShortenedUrl url_object.TweetID = ret.ID ret.Urls = append(ret.Urls, url_object) } diff --git a/scraper/tweet_test.go b/scraper/tweet_test.go index 98bff58..621abd8 100644 --- a/scraper/tweet_test.go +++ b/scraper/tweet_test.go @@ -90,6 +90,7 @@ func TestParseTweetWithUrl(t *testing.T) { u := tweet.Urls[0] assert.Equal("https://reason.com/2021/08/30/la-teachers-union-cecily-myart-cruz-learning-loss/", u.Text) + assert.Equal("https://t.co/Y1lWjNEiPK", u.ShortText) assert.True(u.HasCard) assert.Equal("reason.com", u.Domain) } @@ -101,6 +102,7 @@ func TestParseTweetWithUrlButNoCard(t *testing.T) { u := tweet.Urls[0] assert.Equal("https://www.politico.com/newsletters/west-wing-playbook/2021/09/16/the-jennifer-rubin-wh-symbiosis-494364", u.Text) + assert.Equal("https://t.co/ZigZyLctwt", u.ShortText) assert.False(u.HasCard) } diff --git a/scraper/url.go b/scraper/url.go index 3a8eb3b..b812cfe 100644 --- a/scraper/url.go +++ b/scraper/url.go @@ -11,6 +11,7 @@ type Url struct { Domain string Text string + ShortText string Title string Description string ThumbnailWidth int diff --git a/scraper/user_test.go b/scraper/user_test.go index 623f146..9abb651 100644 --- a/scraper/user_test.go +++ b/scraper/user_test.go @@ -118,7 +118,7 @@ func TestParseHandleFromShortenedTweetUrl(t *testing.T) { return &http.Response{StatusCode: 301, Header: header}, nil }) - // Check the httmock interceptor is working correctly + // Check the httpmock interceptor is working correctly require.Equal(t, expanded_url, ExpandShortUrl(short_url), "httpmock didn't intercept the request") result, err := ParseHandleFromTweetUrl(short_url)