From 0868d8d6d80f3ff05463cb25ac252e6bcf7fab9c Mon Sep 17 00:00:00 2001 From: Alessio Date: Thu, 8 Jun 2023 18:51:50 -0300 Subject: [PATCH] Implement expandable ("Show more...") tweets --- persistence/schema.sql | 1 + persistence/tweet_queries.go | 11 +++++++++-- persistence/tweet_queries_test.go | 33 +++++++++++++++++++++++++++++++ persistence/versions.go | 3 ++- scraper/api_types.go | 1 + scraper/api_types_v2.go | 17 ++++++++++++++++ scraper/api_types_v2_test.go | 17 ++++++++++++++++ scraper/tweet.go | 1 + 8 files changed, 81 insertions(+), 3 deletions(-) diff --git a/persistence/schema.sql b/persistence/schema.sql index 2debee4..148857c 100644 --- a/persistence/schema.sql +++ b/persistence/schema.sql @@ -41,6 +41,7 @@ create table tweets (rowid integer primary key, id integer unique not null check(typeof(id) = 'integer'), user_id integer not null check(typeof(user_id) = 'integer'), text text not null, + is_expandable bool not null default 0, posted_at integer, num_likes integer, num_retweets integer, diff --git a/persistence/tweet_queries.go b/persistence/tweet_queries.go index 6322885..f09625e 100644 --- a/persistence/tweet_queries.go +++ b/persistence/tweet_queries.go @@ -23,16 +23,21 @@ func (p Profile) SaveTweet(t scraper.Tweet) error { _, err := db.NamedExec(` insert into tweets (id, user_id, text, posted_at, num_likes, num_retweets, num_replies, num_quote_tweets, in_reply_to_id, - quoted_tweet_id, mentions, reply_mentions, hashtags, space_id, tombstone_type, is_stub, is_content_downloaded, + quoted_tweet_id, mentions, reply_mentions, hashtags, space_id, tombstone_type, is_expandable, + is_stub, is_content_downloaded, is_conversation_scraped, last_scraped_at) values (:id, :user_id, :text, :posted_at, :num_likes, :num_retweets, :num_replies, :num_quote_tweets, :in_reply_to_id, :quoted_tweet_id, :mentions, :reply_mentions, :hashtags, nullif(:space_id, ''), - (select rowid from tombstone_types where short_name=:tombstone_type), :is_stub, :is_content_downloaded, + (select rowid from tombstone_types where short_name=:tombstone_type), + :is_expandable, + :is_stub, :is_content_downloaded, :is_conversation_scraped, :last_scraped_at) on conflict do update set text=(case when is_stub then :text + when not is_expandable and :is_expandable then + :text else text end @@ -49,6 +54,7 @@ func (p Profile) SaveTweet(t scraper.Tweet) error { (select rowid from tombstone_types where short_name=:tombstone_type) end ), + is_expandable=is_expandable or :is_expandable, is_content_downloaded=(is_content_downloaded or :is_content_downloaded), is_conversation_scraped=(is_conversation_scraped or :is_conversation_scraped), last_scraped_at=max(last_scraped_at, :last_scraped_at) @@ -119,6 +125,7 @@ func (p Profile) GetTweetById(id scraper.TweetID) (scraper.Tweet, error) { err := db.Get(&t, ` select id, user_id, text, posted_at, num_likes, num_retweets, num_replies, num_quote_tweets, in_reply_to_id, quoted_tweet_id, mentions, reply_mentions, hashtags, ifnull(space_id, '') space_id, ifnull(tombstone_types.short_name, "") tombstone_type, + is_expandable, is_stub, is_content_downloaded, is_conversation_scraped, last_scraped_at from tweets left join tombstone_types on tweets.tombstone_type = tombstone_types.rowid where id = ? diff --git a/persistence/tweet_queries_test.go b/persistence/tweet_queries_test.go index df537ca..5da2925 100644 --- a/persistence/tweet_queries_test.go +++ b/persistence/tweet_queries_test.go @@ -75,6 +75,7 @@ func TestNoWorseningTweet(t *testing.T) { tweet.IsContentDownloaded = true tweet.IsStub = false tweet.IsConversationScraped = true + tweet.IsExpandable = true tweet.LastScrapedAt = scraper.TimestampFromUnix(1000) tweet.Text = "Yes text" tweet.NumLikes = 10 @@ -90,6 +91,7 @@ func TestNoWorseningTweet(t *testing.T) { tweet.IsContentDownloaded = false tweet.IsStub = true tweet.IsConversationScraped = false + tweet.IsExpandable = false tweet.LastScrapedAt = scraper.TimestampFromUnix(500) tweet.Text = "" err = profile.SaveTweet(tweet) @@ -106,6 +108,7 @@ func TestNoWorseningTweet(t *testing.T) { assert.False(new_tweet.IsStub, "Should have preserved non-stub status") assert.True(new_tweet.IsContentDownloaded, "Should have preserved is-content-downloaded status") assert.True(new_tweet.IsConversationScraped, "Should have preserved is-conversation-scraped status") + assert.True(new_tweet.IsExpandable) assert.Equal(int64(1000), new_tweet.LastScrapedAt.Unix(), "Should have preserved last-scraped-at time") assert.Equal(new_tweet.Text, "Yes text", "Text should not get clobbered if it becomes unavailable") assert.Equal(10, new_tweet.NumLikes) @@ -149,6 +152,36 @@ func TestUntombstoningTweet(t *testing.T) { assert.Equal(new_tweet.Text, "Some text", "Should have created the text") } +// The tweet is an expanding tweet, but was saved before expanding tweets were implemented +func TestUpgradingExpandingTweet(t *testing.T) { + assert := assert.New(t) + require := require.New(t) + + profile_path := "test_profiles/TestTweetQueries" + profile := create_or_load_profile(profile_path) + + tweet := create_dummy_tweet() + tweet.IsExpandable = false + tweet.Text = "Some long but cut-off text..." + + // Save the tweet + err := profile.SaveTweet(tweet) + require.NoError(err) + + // Now that we have expanding tweets + tweet.IsExpandable = true + tweet.Text = "Some long but cut-off text, but now it no longer is cut off!" + err = profile.SaveTweet(tweet) + require.NoError(err) + + // Reload the tweet + new_tweet, err := profile.GetTweetById(tweet.ID) + require.NoError(err) + + assert.True(new_tweet.IsExpandable, "Should now be is_expanding after re-scrape") + assert.Equal(new_tweet.Text, "Some long but cut-off text, but now it no longer is cut off!", "Should have extended the text") +} + /** * The "unavailable" tombstone type is not reliable, you should be able to update away from it but * not toward it diff --git a/persistence/versions.go b/persistence/versions.go index 816fd89..3b86154 100644 --- a/persistence/versions.go +++ b/persistence/versions.go @@ -8,7 +8,7 @@ import ( "offline_twitter/terminal_utils" ) -const ENGINE_DATABASE_VERSION = 16 +const ENGINE_DATABASE_VERSION = 17 type VersionMismatchError struct { EngineVersion int @@ -97,6 +97,7 @@ var MIGRATIONS = []string{ foreign key(space_id) references spaces(id) );`, `create index if not exists index_tweets_user_id on tweets (user_id);`, + `alter table tweets add column is_expandable bool not null default 0;`, } /** diff --git a/scraper/api_types.go b/scraper/api_types.go index 0ac37e2..35702f8 100644 --- a/scraper/api_types.go +++ b/scraper/api_types.go @@ -176,6 +176,7 @@ type APITweet struct { UserHandle string Card APICard `json:"card"` TombstoneText string + IsExpandable bool } func (t *APITweet) NormalizeContent() { diff --git a/scraper/api_types_v2.go b/scraper/api_types_v2.go index 8a0d6e4..0cfb134 100644 --- a/scraper/api_types_v2.go +++ b/scraper/api_types_v2.go @@ -151,6 +151,15 @@ type _Result struct { Core *APIV2UserResult `json:"core"` Card APIV2Card `json:"card"` QuotedStatusResult *APIV2Result `json:"quoted_status_result"` + NoteTweet struct { + IsExpandable bool `json:"is_expandable"` + NoteTweetResults struct { + Result struct { + ID string `json:"id"` + Text string `json:"text"` + } `json:"result"` + } `json:"note_tweet_results"` + } `json:"note_tweet"` } type APIV2Result struct { @@ -177,6 +186,14 @@ func (api_result APIV2Result) ToTweetTrove(ignore_null_entries bool) TweetTrove api_result.Result._Result = api_result.Result.Tweet } + // Handle expandable tweets + if api_result.Result.NoteTweet.IsExpandable { + api_result.Result.Legacy.FullText = api_result.Result.NoteTweet.NoteTweetResults.Result.Text + api_result.Result.Legacy.DisplayTextRange = []int{} // Override the "display text" + api_result.Result.Legacy.IsExpandable = true + } + + // Process the tweet itself main_tweet_trove := api_result.Result.Legacy.ToTweetTrove() ret.MergeWith(main_tweet_trove) diff --git a/scraper/api_types_v2_test.go b/scraper/api_types_v2_test.go index 0fc2f94..71a7181 100644 --- a/scraper/api_types_v2_test.go +++ b/scraper/api_types_v2_test.go @@ -637,6 +637,23 @@ func TestRetweetWithVisibilityResults(t *testing.T) { assert.Equal(rt.TweetID, TweetID(1595973736833892356)) } +func TestExpandableTweet(t *testing.T) { + assert := assert.New(t) + require := require.New(t) + data, err := os.ReadFile("test_responses/api_v2/expandable_tweet.json") + require.NoError(err) + var tweet_result APIV2Result + err = json.Unmarshal(data, &tweet_result) + require.NoError(err) + + trove := tweet_result.ToTweetTrove(true) + main_tweet, is_ok := trove.Tweets[TweetID(1649600354747572225)] + require.True(is_ok) + + assert.True(main_tweet.IsExpandable) + assert.Equal(main_tweet.Text, "This entire millenial media era has come and gone. Where are the lindy articles from all these websites? The ideas? \n\nIt was just a decade and a half of nothing. \n\na complete waste of time. \n\nAnd it ends with the blue checks being stripped. \n\nA fitting ending to a time not worth saving") //nolint:lll // It's a string +} + // In a user feed, an "entry" can contain multiple tweets when making authenticated requests. // They should parse out as all the tweets. func TestEntryWithConversationThread(t *testing.T) { diff --git a/scraper/tweet.go b/scraper/tweet.go index 4eadfa6..7548806 100644 --- a/scraper/tweet.go +++ b/scraper/tweet.go @@ -114,6 +114,7 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) { ret.UserID = UserID(apiTweet.UserID) ret.UserHandle = UserHandle(apiTweet.UserHandle) ret.Text = apiTweet.FullText + ret.IsExpandable = apiTweet.IsExpandable // Process "posted-at" date and time if apiTweet.TombstoneText == "" { // Skip time parsing for tombstones