Improve tombstone handling significantly

- Tombstone type can now be updated
- If a tombstoned tweet becomes available, now updates the text and removes the tombstone
- Non-stub tweets can now have tombstones attached to them without clobbering the tweet content
This commit is contained in:
Alessio 2022-05-07 16:47:46 -07:00
parent 780577d724
commit 3b4b3fceb9
3 changed files with 107 additions and 3 deletions

View File

@ -203,6 +203,13 @@ test $(find link_preview_images | wc -l) = $initial_link_preview_images_count #
# test $(sqlite3 twitter.db "select is_stub from tweets where id = 1454521424144654344") = 0 # TODO this guy got banned # test $(sqlite3 twitter.db "select is_stub from tweets where id = 1454521424144654344") = 0 # TODO this guy got banned
# test $(sqlite3 twitter.db "select is_stub from tweets where id = 1454522147750260742") = 1 # test $(sqlite3 twitter.db "select is_stub from tweets where id = 1454522147750260742") = 1
# Test updating a tombstone (e.g., the QT-ing user is blocked but acct is not priv)
tw fetch_tweet https://twitter.com/michaelmalice/status/1479540552081326085
test "$(sqlite3 twitter.db "select tombstone_type, text from tweets where id = 1479540319410696192")" = "4|"
tw fetch_tweet_only 1479540319410696192 # Should remove the tombstone type and update the text
test "$(sqlite3 twitter.db "select tombstone_type, text from tweets where id = 1479540319410696192")" = "|Eyyy! Look! Another one on my block list! Well done @michaelmalice, you silck person."
# Test a tweet thread with a deleted account; should generate a user with a fake ID # Test a tweet thread with a deleted account; should generate a user with a fake ID
tw fetch_tweet https://twitter.com/CovfefeAnon/status/1365278017233313795 tw fetch_tweet https://twitter.com/CovfefeAnon/status/1365278017233313795

View File

@ -20,11 +20,25 @@ func (p Profile) SaveTweet(t scraper.Tweet) error {
is_conversation_scraped, last_scraped_at) is_conversation_scraped, last_scraped_at)
values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, (select rowid from tombstone_types where short_name=?), ?, ?, ?, ?) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, (select rowid from tombstone_types where short_name=?), ?, ?, ?, ?)
on conflict do update on conflict do update
set num_likes=?, set text=(case
when is_stub then
?
else
text
end
),
num_likes=?,
num_retweets=?, num_retweets=?,
num_replies=?, num_replies=?,
num_quote_tweets=?, num_quote_tweets=?,
is_stub=(is_stub and ?), is_stub=(is_stub and ?),
tombstone_type=(case
when ?='unavailable' and tombstone_type not in (0, 4) then
tombstone_type
else
(select rowid from tombstone_types where short_name=?)
end
),
is_content_downloaded=(is_content_downloaded or ?), is_content_downloaded=(is_content_downloaded or ?),
is_conversation_scraped=(is_conversation_scraped or ?), is_conversation_scraped=(is_conversation_scraped or ?),
last_scraped_at=max(last_scraped_at, ?) last_scraped_at=max(last_scraped_at, ?)
@ -33,8 +47,8 @@ func (p Profile) SaveTweet(t scraper.Tweet) error {
t.QuotedTweetID, scraper.JoinArrayOfHandles(t.Mentions), scraper.JoinArrayOfHandles(t.ReplyMentions), t.QuotedTweetID, scraper.JoinArrayOfHandles(t.Mentions), scraper.JoinArrayOfHandles(t.ReplyMentions),
strings.Join(t.Hashtags, ","), t.TombstoneType, t.IsStub, t.IsContentDownloaded, t.IsConversationScraped, t.LastScrapedAt, strings.Join(t.Hashtags, ","), t.TombstoneType, t.IsStub, t.IsContentDownloaded, t.IsConversationScraped, t.LastScrapedAt,
t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.IsStub, t.IsContentDownloaded, t.IsConversationScraped, t.Text, t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.IsStub, t.TombstoneType, t.TombstoneType,
t.LastScrapedAt, t.IsContentDownloaded, t.IsConversationScraped, t.LastScrapedAt,
) )
if err != nil { if err != nil {

View File

@ -76,6 +76,7 @@ func TestNoWorseningTweet(t *testing.T) {
tweet.IsStub = false tweet.IsStub = false
tweet.IsConversationScraped = true tweet.IsConversationScraped = true
tweet.LastScrapedAt = scraper.TimestampFromUnix(1000) tweet.LastScrapedAt = scraper.TimestampFromUnix(1000)
tweet.Text = "Yes text"
// Save the tweet // Save the tweet
err := profile.SaveTweet(tweet) err := profile.SaveTweet(tweet)
@ -86,6 +87,7 @@ func TestNoWorseningTweet(t *testing.T) {
tweet.IsStub = true tweet.IsStub = true
tweet.IsConversationScraped = false tweet.IsConversationScraped = false
tweet.LastScrapedAt = scraper.TimestampFromUnix(500) tweet.LastScrapedAt = scraper.TimestampFromUnix(500)
tweet.Text = ""
err = profile.SaveTweet(tweet) err = profile.SaveTweet(tweet)
require.NoError(err) require.NoError(err)
@ -97,6 +99,85 @@ func TestNoWorseningTweet(t *testing.T) {
assert.True(new_tweet.IsContentDownloaded, "Should have preserved is-content-downloaded status") assert.True(new_tweet.IsContentDownloaded, "Should have preserved is-content-downloaded status")
assert.True(new_tweet.IsConversationScraped, "Should have preserved is-conversation-scraped status") assert.True(new_tweet.IsConversationScraped, "Should have preserved is-conversation-scraped status")
assert.Equal(int64(1000), new_tweet.LastScrapedAt.Unix(), "Should have preserved last-scraped-at time") assert.Equal(int64(1000), new_tweet.LastScrapedAt.Unix(), "Should have preserved last-scraped-at time")
assert.Equal(new_tweet.Text, "Yes text", "Text should not get clobbered if it becomes unavailable")
}
/**
* The tweet was a tombstone and is now available; it should be updated
*/
func TestUntombstoningTweet(t *testing.T) {
assert := assert.New(t)
require := require.New(t)
profile_path := "test_profiles/TestTweetQueries"
profile := create_or_load_profile(profile_path)
tweet := create_dummy_tweet()
tweet.TombstoneType = "hidden" // e.g., account was priv
tweet.IsStub = true
tweet.Text = ""
// Save the tweet
err := profile.SaveTweet(tweet)
require.NoError(err)
// Tweet suddenly becomes available
tweet.TombstoneType = ""
tweet.IsStub = false
tweet.Text = "Some text"
err = profile.SaveTweet(tweet)
require.NoError(err)
// Reload the tweet
new_tweet, err := profile.GetTweetById(tweet.ID)
require.NoError(err)
assert.False(new_tweet.IsStub, "Should no longer be a stub after re-scrape")
assert.Equal(new_tweet.TombstoneType, "", "Tweet shouldn't be a tombstone anymore")
assert.Equal(new_tweet.Text, "Some text", "Should have created the text")
}
/**
* The "unavailable" tombstone type is not reliable, you should be able to update away from it but
* not toward it
*/
func TestChangingTombstoningTweet(t *testing.T) {
assert := assert.New(t)
require := require.New(t)
profile_path := "test_profiles/TestTweetQueries"
profile := create_or_load_profile(profile_path)
tweet := create_dummy_tweet()
tweet.TombstoneType = "unavailable"
tweet.IsStub = true
tweet.Text = ""
// Save the tweet
err := profile.SaveTweet(tweet)
require.NoError(err)
// New tombstone type
tweet.TombstoneType = "hidden"
err = profile.SaveTweet(tweet)
require.NoError(err)
// Reload the tweet
new_tweet, err := profile.GetTweetById(tweet.ID)
require.NoError(err)
assert.Equal(new_tweet.TombstoneType, "hidden", "Should be able to overwrite 'unavailable' tombstone")
// New tombstone type
new_tweet.TombstoneType = "hidden"
err = profile.SaveTweet(new_tweet)
require.NoError(err)
// Reload the tweet
new_tweet2, err := profile.GetTweetById(new_tweet.ID)
require.NoError(err)
assert.Equal(new_tweet2.TombstoneType, "hidden", "'Unavailable' shouldn't clobber other tombstone types")
} }
func TestModifyTweet(t *testing.T) { func TestModifyTweet(t *testing.T) {
@ -127,6 +208,7 @@ func TestModifyTweet(t *testing.T) {
tweet.IsContentDownloaded = true tweet.IsContentDownloaded = true
tweet.IsConversationScraped = true tweet.IsConversationScraped = true
tweet.LastScrapedAt = scraper.TimestampFromUnix(2000) tweet.LastScrapedAt = scraper.TimestampFromUnix(2000)
tweet.TombstoneType = "deleted"
err = profile.SaveTweet(tweet) err = profile.SaveTweet(tweet)
require.NoError(err) require.NoError(err)
@ -143,6 +225,7 @@ func TestModifyTweet(t *testing.T) {
assert.True(new_tweet.IsContentDownloaded) assert.True(new_tweet.IsContentDownloaded)
assert.True(new_tweet.IsConversationScraped) assert.True(new_tweet.IsConversationScraped)
assert.Equal(int64(2000), new_tweet.LastScrapedAt.Unix()) assert.Equal(int64(2000), new_tweet.LastScrapedAt.Unix())
assert.Equal(new_tweet.TombstoneType, "deleted")
} }
/** /**