From 3b4b3fceb97f0e2e45442761956f07304ce84b92 Mon Sep 17 00:00:00 2001 From: Alessio Date: Sat, 7 May 2022 16:47:46 -0700 Subject: [PATCH] Improve tombstone handling significantly - Tombstone type can now be updated - If a tombstoned tweet becomes available, now updates the text and removes the tombstone - Non-stub tweets can now have tombstones attached to them without clobbering the tweet content --- cmd/tests.sh | 7 +++ persistence/tweet_queries.go | 20 ++++++-- persistence/tweet_queries_test.go | 83 +++++++++++++++++++++++++++++++ 3 files changed, 107 insertions(+), 3 deletions(-) diff --git a/cmd/tests.sh b/cmd/tests.sh index 9c98c6a..86a0963 100755 --- a/cmd/tests.sh +++ b/cmd/tests.sh @@ -203,6 +203,13 @@ test $(find link_preview_images | wc -l) = $initial_link_preview_images_count # # test $(sqlite3 twitter.db "select is_stub from tweets where id = 1454521424144654344") = 0 # TODO this guy got banned # test $(sqlite3 twitter.db "select is_stub from tweets where id = 1454522147750260742") = 1 +# Test updating a tombstone (e.g., the QT-ing user is blocked but acct is not priv) +tw fetch_tweet https://twitter.com/michaelmalice/status/1479540552081326085 +test "$(sqlite3 twitter.db "select tombstone_type, text from tweets where id = 1479540319410696192")" = "4|" + +tw fetch_tweet_only 1479540319410696192 # Should remove the tombstone type and update the text +test "$(sqlite3 twitter.db "select tombstone_type, text from tweets where id = 1479540319410696192")" = "|Eyyy! Look! Another one on my block list! Well done @michaelmalice, you silck person." + # Test a tweet thread with a deleted account; should generate a user with a fake ID tw fetch_tweet https://twitter.com/CovfefeAnon/status/1365278017233313795 diff --git a/persistence/tweet_queries.go b/persistence/tweet_queries.go index 31bfebf..c199ad9 100644 --- a/persistence/tweet_queries.go +++ b/persistence/tweet_queries.go @@ -20,11 +20,25 @@ func (p Profile) SaveTweet(t scraper.Tweet) error { is_conversation_scraped, last_scraped_at) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, (select rowid from tombstone_types where short_name=?), ?, ?, ?, ?) on conflict do update - set num_likes=?, + set text=(case + when is_stub then + ? + else + text + end + ), + num_likes=?, num_retweets=?, num_replies=?, num_quote_tweets=?, is_stub=(is_stub and ?), + tombstone_type=(case + when ?='unavailable' and tombstone_type not in (0, 4) then + tombstone_type + else + (select rowid from tombstone_types where short_name=?) + end + ), is_content_downloaded=(is_content_downloaded or ?), is_conversation_scraped=(is_conversation_scraped or ?), last_scraped_at=max(last_scraped_at, ?) @@ -33,8 +47,8 @@ func (p Profile) SaveTweet(t scraper.Tweet) error { t.QuotedTweetID, scraper.JoinArrayOfHandles(t.Mentions), scraper.JoinArrayOfHandles(t.ReplyMentions), strings.Join(t.Hashtags, ","), t.TombstoneType, t.IsStub, t.IsContentDownloaded, t.IsConversationScraped, t.LastScrapedAt, - t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.IsStub, t.IsContentDownloaded, t.IsConversationScraped, - t.LastScrapedAt, + t.Text, t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.IsStub, t.TombstoneType, t.TombstoneType, + t.IsContentDownloaded, t.IsConversationScraped, t.LastScrapedAt, ) if err != nil { diff --git a/persistence/tweet_queries_test.go b/persistence/tweet_queries_test.go index 114af09..dc91eac 100644 --- a/persistence/tweet_queries_test.go +++ b/persistence/tweet_queries_test.go @@ -76,6 +76,7 @@ func TestNoWorseningTweet(t *testing.T) { tweet.IsStub = false tweet.IsConversationScraped = true tweet.LastScrapedAt = scraper.TimestampFromUnix(1000) + tweet.Text = "Yes text" // Save the tweet err := profile.SaveTweet(tweet) @@ -86,6 +87,7 @@ func TestNoWorseningTweet(t *testing.T) { tweet.IsStub = true tweet.IsConversationScraped = false tweet.LastScrapedAt = scraper.TimestampFromUnix(500) + tweet.Text = "" err = profile.SaveTweet(tweet) require.NoError(err) @@ -97,6 +99,85 @@ func TestNoWorseningTweet(t *testing.T) { assert.True(new_tweet.IsContentDownloaded, "Should have preserved is-content-downloaded status") assert.True(new_tweet.IsConversationScraped, "Should have preserved is-conversation-scraped status") assert.Equal(int64(1000), new_tweet.LastScrapedAt.Unix(), "Should have preserved last-scraped-at time") + assert.Equal(new_tweet.Text, "Yes text", "Text should not get clobbered if it becomes unavailable") +} + +/** + * The tweet was a tombstone and is now available; it should be updated + */ +func TestUntombstoningTweet(t *testing.T) { + assert := assert.New(t) + require := require.New(t) + + profile_path := "test_profiles/TestTweetQueries" + profile := create_or_load_profile(profile_path) + + tweet := create_dummy_tweet() + tweet.TombstoneType = "hidden" // e.g., account was priv + tweet.IsStub = true + tweet.Text = "" + + // Save the tweet + err := profile.SaveTweet(tweet) + require.NoError(err) + + // Tweet suddenly becomes available + tweet.TombstoneType = "" + tweet.IsStub = false + tweet.Text = "Some text" + err = profile.SaveTweet(tweet) + require.NoError(err) + + // Reload the tweet + new_tweet, err := profile.GetTweetById(tweet.ID) + require.NoError(err) + + assert.False(new_tweet.IsStub, "Should no longer be a stub after re-scrape") + assert.Equal(new_tweet.TombstoneType, "", "Tweet shouldn't be a tombstone anymore") + assert.Equal(new_tweet.Text, "Some text", "Should have created the text") +} + +/** + * The "unavailable" tombstone type is not reliable, you should be able to update away from it but + * not toward it + */ +func TestChangingTombstoningTweet(t *testing.T) { + assert := assert.New(t) + require := require.New(t) + + profile_path := "test_profiles/TestTweetQueries" + profile := create_or_load_profile(profile_path) + + tweet := create_dummy_tweet() + tweet.TombstoneType = "unavailable" + tweet.IsStub = true + tweet.Text = "" + + // Save the tweet + err := profile.SaveTweet(tweet) + require.NoError(err) + + // New tombstone type + tweet.TombstoneType = "hidden" + err = profile.SaveTweet(tweet) + require.NoError(err) + + // Reload the tweet + new_tweet, err := profile.GetTweetById(tweet.ID) + require.NoError(err) + + assert.Equal(new_tweet.TombstoneType, "hidden", "Should be able to overwrite 'unavailable' tombstone") + + // New tombstone type + new_tweet.TombstoneType = "hidden" + err = profile.SaveTweet(new_tweet) + require.NoError(err) + + // Reload the tweet + new_tweet2, err := profile.GetTweetById(new_tweet.ID) + require.NoError(err) + + assert.Equal(new_tweet2.TombstoneType, "hidden", "'Unavailable' shouldn't clobber other tombstone types") } func TestModifyTweet(t *testing.T) { @@ -127,6 +208,7 @@ func TestModifyTweet(t *testing.T) { tweet.IsContentDownloaded = true tweet.IsConversationScraped = true tweet.LastScrapedAt = scraper.TimestampFromUnix(2000) + tweet.TombstoneType = "deleted" err = profile.SaveTweet(tweet) require.NoError(err) @@ -143,6 +225,7 @@ func TestModifyTweet(t *testing.T) { assert.True(new_tweet.IsContentDownloaded) assert.True(new_tweet.IsConversationScraped) assert.Equal(int64(2000), new_tweet.LastScrapedAt.Unix()) + assert.Equal(new_tweet.TombstoneType, "deleted") } /**