Fix handling of empty entries in the feed

- e.g., retweets where the author then deleted the original tweet
- I think this is a bug in the Twitter API (it doesn't make sense to return this data) but we have to handle it anyway
This commit is contained in:
Alessio 2022-03-01 11:51:34 -08:00
parent 18eafe6e3d
commit aa961b9ff4
3 changed files with 40 additions and 21 deletions

View File

@ -152,9 +152,15 @@ type APIV2Result struct {
Tweet _Result `json:"tweet"` Tweet _Result `json:"tweet"`
} `json:"result"` } `json:"result"`
} }
func (api_result APIV2Result) ToTweetTrove() TweetTrove { func (api_result APIV2Result) ToTweetTrove(ignore_null_entries bool) TweetTrove {
ret := NewTweetTrove() ret := NewTweetTrove()
// Start by checking if this is a null entry in a feed
if api_result.Result.Tombstone != nil && ignore_null_entries{
// TODO: this is becoming really spaghetti. Why do we need a separate execution path for this?
return ret
}
if api_result.Result.Legacy.ID == 0 && api_result.Result.Tweet.Legacy.ID != 0 { if api_result.Result.Legacy.ID == 0 && api_result.Result.Tweet.Legacy.ID != 0 {
// If the tweet has "__typename" of "TweetWithVisibilityResults", it uses a new structure with // If the tweet has "__typename" of "TweetWithVisibilityResults", it uses a new structure with
// a "tweet" field with the regular data, alongside a "tweetInterstitial" field which is ignored // a "tweet" field with the regular data, alongside a "tweetInterstitial" field which is ignored
@ -194,7 +200,7 @@ func (api_result APIV2Result) ToTweetTrove() TweetTrove {
ret.TombstoneUsers = append(ret.TombstoneUsers, handle) ret.TombstoneUsers = append(ret.TombstoneUsers, handle)
} }
quoted_trove := api_result.Result.QuotedStatusResult.ToTweetTrove() quoted_trove := quoted_api_result.ToTweetTrove(false)
ret.MergeWith(quoted_trove) ret.MergeWith(quoted_trove)
} }
@ -248,7 +254,7 @@ func (api_v2_tweet APIV2Tweet) ToTweetTrove() TweetTrove {
// If there's a retweet, we ignore the main tweet except for posted_at and retweeting UserID // If there's a retweet, we ignore the main tweet except for posted_at and retweeting UserID
if api_v2_tweet.RetweetedStatusResult != nil { if api_v2_tweet.RetweetedStatusResult != nil {
orig_tweet_trove := api_v2_tweet.RetweetedStatusResult.ToTweetTrove() orig_tweet_trove := api_v2_tweet.RetweetedStatusResult.ToTweetTrove(false)
ret.MergeWith(orig_tweet_trove) ret.MergeWith(orig_tweet_trove)
@ -354,7 +360,7 @@ func (api_response APIV2Response) ToTweetTrove() (TweetTrove, error) {
result := entry.Content.ItemContent.TweetResults result := entry.Content.ItemContent.TweetResults
main_trove := result.ToTweetTrove() main_trove := result.ToTweetTrove(true)
ret.MergeWith(main_trove) ret.MergeWith(main_trove)
} }

View File

@ -7,6 +7,7 @@ import (
"fmt" "fmt"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
. "offline_twitter/scraper" . "offline_twitter/scraper"
) )
@ -61,7 +62,7 @@ func TestAPIV2ParseTweet(t *testing.T) {
err = json.Unmarshal(data, &tweet_result) err = json.Unmarshal(data, &tweet_result)
assert.NoError(err) assert.NoError(err)
trove := tweet_result.ToTweetTrove() trove := tweet_result.ToTweetTrove(true)
assert.Equal(1, len(trove.Tweets)) assert.Equal(1, len(trove.Tweets))
tweet, ok := trove.Tweets[1485708879174508550] tweet, ok := trove.Tweets[1485708879174508550]
@ -108,7 +109,7 @@ func TestAPIV2ParseTweetWithQuotedTweet(t *testing.T) {
err = json.Unmarshal(data, &tweet_result) err = json.Unmarshal(data, &tweet_result)
assert.NoError(err) assert.NoError(err)
trove := tweet_result.ToTweetTrove() trove := tweet_result.ToTweetTrove(true)
// Should be 2 tweets: quote-tweet and quoted-tweet // Should be 2 tweets: quote-tweet and quoted-tweet
assert.Equal(2, len(trove.Tweets)) assert.Equal(2, len(trove.Tweets))
@ -161,7 +162,7 @@ func TestAPIV2ParseRetweet(t *testing.T) {
err = json.Unmarshal(data, &tweet_result) err = json.Unmarshal(data, &tweet_result)
assert.NoError(err) assert.NoError(err)
trove := tweet_result.ToTweetTrove() trove := tweet_result.ToTweetTrove(true)
// Should only be 1 tweet, the retweeted one // Should only be 1 tweet, the retweeted one
assert.Equal(1, len(trove.Tweets)) assert.Equal(1, len(trove.Tweets))
@ -221,7 +222,7 @@ func TestAPIV2ParseRetweetedQuoteTweet(t *testing.T) {
err = json.Unmarshal(data, &tweet_result) err = json.Unmarshal(data, &tweet_result)
assert.NoError(err) assert.NoError(err)
trove := tweet_result.ToTweetTrove() trove := tweet_result.ToTweetTrove(true)
// Quoted tweet and quoting tweet // Quoted tweet and quoting tweet
assert.Equal(2, len(trove.Tweets)) assert.Equal(2, len(trove.Tweets))
@ -281,7 +282,7 @@ func TestAPIV2ParseTweetWithQuotedTombstone(t *testing.T) {
err = json.Unmarshal(data, &tweet_result) err = json.Unmarshal(data, &tweet_result)
assert.NoError(err) assert.NoError(err)
trove := tweet_result.ToTweetTrove() trove := tweet_result.ToTweetTrove(true)
assert.Equal(1, len(trove.Users)) assert.Equal(1, len(trove.Users))
user, ok := trove.Users[44067298] user, ok := trove.Users[44067298]
@ -317,7 +318,7 @@ func TestAPIV2ParseTweetWithURL(t *testing.T) {
err = json.Unmarshal(data, &tweet_result) err = json.Unmarshal(data, &tweet_result)
assert.NoError(err) assert.NoError(err)
trove := tweet_result.ToTweetTrove() trove := tweet_result.ToTweetTrove(true)
assert.Equal(1, len(trove.Tweets)) assert.Equal(1, len(trove.Tweets))
tweet, ok := trove.Tweets[1485695695025803264] tweet, ok := trove.Tweets[1485695695025803264]
@ -352,7 +353,7 @@ func TestAPIV2ParseTweetWithURLPlayerCard(t *testing.T) {
err = json.Unmarshal(data, &tweet_result) err = json.Unmarshal(data, &tweet_result)
assert.NoError(err) assert.NoError(err)
trove := tweet_result.ToTweetTrove() trove := tweet_result.ToTweetTrove(true)
assert.Equal(1, len(trove.Tweets)) assert.Equal(1, len(trove.Tweets))
tweet, ok := trove.Tweets[1485504913614327808] tweet, ok := trove.Tweets[1485504913614327808]
@ -384,7 +385,7 @@ func TestAPIV2ParseTweetWithURLRetweet(t *testing.T) {
err = json.Unmarshal(data, &tweet_result) err = json.Unmarshal(data, &tweet_result)
assert.NoError(err) assert.NoError(err)
trove := tweet_result.ToTweetTrove() trove := tweet_result.ToTweetTrove(true)
assert.Equal(1, len(trove.Tweets)) assert.Equal(1, len(trove.Tweets))
tweet, ok := trove.Tweets[1488605073588559873] tweet, ok := trove.Tweets[1488605073588559873]
@ -411,7 +412,7 @@ func TestAPIV2ParseTweetWithPoll(t *testing.T) {
err = json.Unmarshal(data, &tweet_result) err = json.Unmarshal(data, &tweet_result)
assert.NoError(err) assert.NoError(err)
trove := tweet_result.ToTweetTrove() trove := tweet_result.ToTweetTrove(true)
assert.Len(trove.Tweets, 1) assert.Len(trove.Tweets, 1)
tweet, ok := trove.Tweets[1485692111106285571] tweet, ok := trove.Tweets[1485692111106285571]
@ -541,26 +542,37 @@ func TestAPIV2GetMainInstructionFromFeed(t *testing.T) {
} }
/** /**
* Should handle an entry in the feed that's just a tombstone * Should handle an entry in the feed that's a tombstone by just ignoring it
* Expectation: random tombstones in the feed with no context should parse as empty TweetTroves.
*
* The indication that it's from a feed (i.e., not in a comments thread) is 'ToTweetTrove(true)'.
* On a reply thread, it would be 'ToTweetTrove(false)'.
*/ */
func TestAPIV2TombstoneEntry(t *testing.T) { func TestAPIV2TombstoneEntry(t *testing.T) {
assert := assert.New(t)
data, err := ioutil.ReadFile("test_responses/api_v2/tombstone_tweet.json")
require.NoError(t, err)
var tweet_result APIV2Result
err = json.Unmarshal(data, &tweet_result)
require.NoError(t, err)
trove := tweet_result.ToTweetTrove(true) // 'true' indicates to ignore empty entries
assert.Len(trove.Tweets, 0)
assert.Len(trove.Users, 0)
assert.Len(trove.Retweets, 0)
} }
func TestTweetWithWarning(t *testing.T) { func TestTweetWithWarning(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)
data, err := ioutil.ReadFile("test_responses/api_v2/tweet_with_warning.json") data, err := ioutil.ReadFile("test_responses/api_v2/tweet_with_warning.json")
if err != nil { require.NoError(t, err)
panic(err)
}
var tweet_result APIV2Result var tweet_result APIV2Result
err = json.Unmarshal(data, &tweet_result) err = json.Unmarshal(data, &tweet_result)
if err != nil { require.NoError(t, err)
t.Errorf(err.Error())
}
trove := tweet_result.ToTweetTrove() trove := tweet_result.ToTweetTrove(true)
assert.Len(trove.Retweets, 1) assert.Len(trove.Retweets, 1)
assert.Len(trove.Tweets, 2) assert.Len(trove.Tweets, 2)

View File

@ -0,0 +1 @@
{"result":{"__typename":"TweetTombstone","tombstone":{"__typename":"TextTombstone","text":{"rtl":false,"text":"This Tweet was deleted by the Tweet author. Learn more","entities":[{"fromIndex":44,"toIndex":54,"ref":{"type":"TimelineUrl","url":"https://help.twitter.com/rules-and-policies/notices-on-twitter","urlType":"ExternalUrl"}}]}}}}