Improve tombstone handling significantly
- Tombstone type can now be updated - If a tombstoned tweet becomes available, now updates the text and removes the tombstone - Non-stub tweets can now have tombstones attached to them without clobbering the tweet content
This commit is contained in:
parent
780577d724
commit
3b4b3fceb9
@ -203,6 +203,13 @@ test $(find link_preview_images | wc -l) = $initial_link_preview_images_count #
|
|||||||
# test $(sqlite3 twitter.db "select is_stub from tweets where id = 1454521424144654344") = 0 # TODO this guy got banned
|
# test $(sqlite3 twitter.db "select is_stub from tweets where id = 1454521424144654344") = 0 # TODO this guy got banned
|
||||||
# test $(sqlite3 twitter.db "select is_stub from tweets where id = 1454522147750260742") = 1
|
# test $(sqlite3 twitter.db "select is_stub from tweets where id = 1454522147750260742") = 1
|
||||||
|
|
||||||
|
# Test updating a tombstone (e.g., the QT-ing user is blocked but acct is not priv)
|
||||||
|
tw fetch_tweet https://twitter.com/michaelmalice/status/1479540552081326085
|
||||||
|
test "$(sqlite3 twitter.db "select tombstone_type, text from tweets where id = 1479540319410696192")" = "4|"
|
||||||
|
|
||||||
|
tw fetch_tweet_only 1479540319410696192 # Should remove the tombstone type and update the text
|
||||||
|
test "$(sqlite3 twitter.db "select tombstone_type, text from tweets where id = 1479540319410696192")" = "|Eyyy! Look! Another one on my block list! Well done @michaelmalice, you silck person."
|
||||||
|
|
||||||
|
|
||||||
# Test a tweet thread with a deleted account; should generate a user with a fake ID
|
# Test a tweet thread with a deleted account; should generate a user with a fake ID
|
||||||
tw fetch_tweet https://twitter.com/CovfefeAnon/status/1365278017233313795
|
tw fetch_tweet https://twitter.com/CovfefeAnon/status/1365278017233313795
|
||||||
|
@ -20,11 +20,25 @@ func (p Profile) SaveTweet(t scraper.Tweet) error {
|
|||||||
is_conversation_scraped, last_scraped_at)
|
is_conversation_scraped, last_scraped_at)
|
||||||
values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, (select rowid from tombstone_types where short_name=?), ?, ?, ?, ?)
|
values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, (select rowid from tombstone_types where short_name=?), ?, ?, ?, ?)
|
||||||
on conflict do update
|
on conflict do update
|
||||||
set num_likes=?,
|
set text=(case
|
||||||
|
when is_stub then
|
||||||
|
?
|
||||||
|
else
|
||||||
|
text
|
||||||
|
end
|
||||||
|
),
|
||||||
|
num_likes=?,
|
||||||
num_retweets=?,
|
num_retweets=?,
|
||||||
num_replies=?,
|
num_replies=?,
|
||||||
num_quote_tweets=?,
|
num_quote_tweets=?,
|
||||||
is_stub=(is_stub and ?),
|
is_stub=(is_stub and ?),
|
||||||
|
tombstone_type=(case
|
||||||
|
when ?='unavailable' and tombstone_type not in (0, 4) then
|
||||||
|
tombstone_type
|
||||||
|
else
|
||||||
|
(select rowid from tombstone_types where short_name=?)
|
||||||
|
end
|
||||||
|
),
|
||||||
is_content_downloaded=(is_content_downloaded or ?),
|
is_content_downloaded=(is_content_downloaded or ?),
|
||||||
is_conversation_scraped=(is_conversation_scraped or ?),
|
is_conversation_scraped=(is_conversation_scraped or ?),
|
||||||
last_scraped_at=max(last_scraped_at, ?)
|
last_scraped_at=max(last_scraped_at, ?)
|
||||||
@ -33,8 +47,8 @@ func (p Profile) SaveTweet(t scraper.Tweet) error {
|
|||||||
t.QuotedTweetID, scraper.JoinArrayOfHandles(t.Mentions), scraper.JoinArrayOfHandles(t.ReplyMentions),
|
t.QuotedTweetID, scraper.JoinArrayOfHandles(t.Mentions), scraper.JoinArrayOfHandles(t.ReplyMentions),
|
||||||
strings.Join(t.Hashtags, ","), t.TombstoneType, t.IsStub, t.IsContentDownloaded, t.IsConversationScraped, t.LastScrapedAt,
|
strings.Join(t.Hashtags, ","), t.TombstoneType, t.IsStub, t.IsContentDownloaded, t.IsConversationScraped, t.LastScrapedAt,
|
||||||
|
|
||||||
t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.IsStub, t.IsContentDownloaded, t.IsConversationScraped,
|
t.Text, t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.IsStub, t.TombstoneType, t.TombstoneType,
|
||||||
t.LastScrapedAt,
|
t.IsContentDownloaded, t.IsConversationScraped, t.LastScrapedAt,
|
||||||
)
|
)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -76,6 +76,7 @@ func TestNoWorseningTweet(t *testing.T) {
|
|||||||
tweet.IsStub = false
|
tweet.IsStub = false
|
||||||
tweet.IsConversationScraped = true
|
tweet.IsConversationScraped = true
|
||||||
tweet.LastScrapedAt = scraper.TimestampFromUnix(1000)
|
tweet.LastScrapedAt = scraper.TimestampFromUnix(1000)
|
||||||
|
tweet.Text = "Yes text"
|
||||||
|
|
||||||
// Save the tweet
|
// Save the tweet
|
||||||
err := profile.SaveTweet(tweet)
|
err := profile.SaveTweet(tweet)
|
||||||
@ -86,6 +87,7 @@ func TestNoWorseningTweet(t *testing.T) {
|
|||||||
tweet.IsStub = true
|
tweet.IsStub = true
|
||||||
tweet.IsConversationScraped = false
|
tweet.IsConversationScraped = false
|
||||||
tweet.LastScrapedAt = scraper.TimestampFromUnix(500)
|
tweet.LastScrapedAt = scraper.TimestampFromUnix(500)
|
||||||
|
tweet.Text = ""
|
||||||
err = profile.SaveTweet(tweet)
|
err = profile.SaveTweet(tweet)
|
||||||
require.NoError(err)
|
require.NoError(err)
|
||||||
|
|
||||||
@ -97,6 +99,85 @@ func TestNoWorseningTweet(t *testing.T) {
|
|||||||
assert.True(new_tweet.IsContentDownloaded, "Should have preserved is-content-downloaded status")
|
assert.True(new_tweet.IsContentDownloaded, "Should have preserved is-content-downloaded status")
|
||||||
assert.True(new_tweet.IsConversationScraped, "Should have preserved is-conversation-scraped status")
|
assert.True(new_tweet.IsConversationScraped, "Should have preserved is-conversation-scraped status")
|
||||||
assert.Equal(int64(1000), new_tweet.LastScrapedAt.Unix(), "Should have preserved last-scraped-at time")
|
assert.Equal(int64(1000), new_tweet.LastScrapedAt.Unix(), "Should have preserved last-scraped-at time")
|
||||||
|
assert.Equal(new_tweet.Text, "Yes text", "Text should not get clobbered if it becomes unavailable")
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The tweet was a tombstone and is now available; it should be updated
|
||||||
|
*/
|
||||||
|
func TestUntombstoningTweet(t *testing.T) {
|
||||||
|
assert := assert.New(t)
|
||||||
|
require := require.New(t)
|
||||||
|
|
||||||
|
profile_path := "test_profiles/TestTweetQueries"
|
||||||
|
profile := create_or_load_profile(profile_path)
|
||||||
|
|
||||||
|
tweet := create_dummy_tweet()
|
||||||
|
tweet.TombstoneType = "hidden" // e.g., account was priv
|
||||||
|
tweet.IsStub = true
|
||||||
|
tweet.Text = ""
|
||||||
|
|
||||||
|
// Save the tweet
|
||||||
|
err := profile.SaveTweet(tweet)
|
||||||
|
require.NoError(err)
|
||||||
|
|
||||||
|
// Tweet suddenly becomes available
|
||||||
|
tweet.TombstoneType = ""
|
||||||
|
tweet.IsStub = false
|
||||||
|
tweet.Text = "Some text"
|
||||||
|
err = profile.SaveTweet(tweet)
|
||||||
|
require.NoError(err)
|
||||||
|
|
||||||
|
// Reload the tweet
|
||||||
|
new_tweet, err := profile.GetTweetById(tweet.ID)
|
||||||
|
require.NoError(err)
|
||||||
|
|
||||||
|
assert.False(new_tweet.IsStub, "Should no longer be a stub after re-scrape")
|
||||||
|
assert.Equal(new_tweet.TombstoneType, "", "Tweet shouldn't be a tombstone anymore")
|
||||||
|
assert.Equal(new_tweet.Text, "Some text", "Should have created the text")
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The "unavailable" tombstone type is not reliable, you should be able to update away from it but
|
||||||
|
* not toward it
|
||||||
|
*/
|
||||||
|
func TestChangingTombstoningTweet(t *testing.T) {
|
||||||
|
assert := assert.New(t)
|
||||||
|
require := require.New(t)
|
||||||
|
|
||||||
|
profile_path := "test_profiles/TestTweetQueries"
|
||||||
|
profile := create_or_load_profile(profile_path)
|
||||||
|
|
||||||
|
tweet := create_dummy_tweet()
|
||||||
|
tweet.TombstoneType = "unavailable"
|
||||||
|
tweet.IsStub = true
|
||||||
|
tweet.Text = ""
|
||||||
|
|
||||||
|
// Save the tweet
|
||||||
|
err := profile.SaveTweet(tweet)
|
||||||
|
require.NoError(err)
|
||||||
|
|
||||||
|
// New tombstone type
|
||||||
|
tweet.TombstoneType = "hidden"
|
||||||
|
err = profile.SaveTweet(tweet)
|
||||||
|
require.NoError(err)
|
||||||
|
|
||||||
|
// Reload the tweet
|
||||||
|
new_tweet, err := profile.GetTweetById(tweet.ID)
|
||||||
|
require.NoError(err)
|
||||||
|
|
||||||
|
assert.Equal(new_tweet.TombstoneType, "hidden", "Should be able to overwrite 'unavailable' tombstone")
|
||||||
|
|
||||||
|
// New tombstone type
|
||||||
|
new_tweet.TombstoneType = "hidden"
|
||||||
|
err = profile.SaveTweet(new_tweet)
|
||||||
|
require.NoError(err)
|
||||||
|
|
||||||
|
// Reload the tweet
|
||||||
|
new_tweet2, err := profile.GetTweetById(new_tweet.ID)
|
||||||
|
require.NoError(err)
|
||||||
|
|
||||||
|
assert.Equal(new_tweet2.TombstoneType, "hidden", "'Unavailable' shouldn't clobber other tombstone types")
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestModifyTweet(t *testing.T) {
|
func TestModifyTweet(t *testing.T) {
|
||||||
@ -127,6 +208,7 @@ func TestModifyTweet(t *testing.T) {
|
|||||||
tweet.IsContentDownloaded = true
|
tweet.IsContentDownloaded = true
|
||||||
tweet.IsConversationScraped = true
|
tweet.IsConversationScraped = true
|
||||||
tweet.LastScrapedAt = scraper.TimestampFromUnix(2000)
|
tweet.LastScrapedAt = scraper.TimestampFromUnix(2000)
|
||||||
|
tweet.TombstoneType = "deleted"
|
||||||
|
|
||||||
err = profile.SaveTweet(tweet)
|
err = profile.SaveTweet(tweet)
|
||||||
require.NoError(err)
|
require.NoError(err)
|
||||||
@ -143,6 +225,7 @@ func TestModifyTweet(t *testing.T) {
|
|||||||
assert.True(new_tweet.IsContentDownloaded)
|
assert.True(new_tweet.IsContentDownloaded)
|
||||||
assert.True(new_tweet.IsConversationScraped)
|
assert.True(new_tweet.IsConversationScraped)
|
||||||
assert.Equal(int64(2000), new_tweet.LastScrapedAt.Unix())
|
assert.Equal(int64(2000), new_tweet.LastScrapedAt.Unix())
|
||||||
|
assert.Equal(new_tweet.TombstoneType, "deleted")
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
Loading…
x
Reference in New Issue
Block a user