Fix handling of empty entries in the feed
- e.g., retweets where the author then deleted the original tweet - I think this is a bug in the Twitter API (it doesn't make sense to return this data) but we have to handle it anyway
This commit is contained in:
parent
18eafe6e3d
commit
aa961b9ff4
@ -152,9 +152,15 @@ type APIV2Result struct {
|
||||
Tweet _Result `json:"tweet"`
|
||||
} `json:"result"`
|
||||
}
|
||||
func (api_result APIV2Result) ToTweetTrove() TweetTrove {
|
||||
func (api_result APIV2Result) ToTweetTrove(ignore_null_entries bool) TweetTrove {
|
||||
ret := NewTweetTrove()
|
||||
|
||||
// Start by checking if this is a null entry in a feed
|
||||
if api_result.Result.Tombstone != nil && ignore_null_entries{
|
||||
// TODO: this is becoming really spaghetti. Why do we need a separate execution path for this?
|
||||
return ret
|
||||
}
|
||||
|
||||
if api_result.Result.Legacy.ID == 0 && api_result.Result.Tweet.Legacy.ID != 0 {
|
||||
// If the tweet has "__typename" of "TweetWithVisibilityResults", it uses a new structure with
|
||||
// a "tweet" field with the regular data, alongside a "tweetInterstitial" field which is ignored
|
||||
@ -194,7 +200,7 @@ func (api_result APIV2Result) ToTweetTrove() TweetTrove {
|
||||
ret.TombstoneUsers = append(ret.TombstoneUsers, handle)
|
||||
}
|
||||
|
||||
quoted_trove := api_result.Result.QuotedStatusResult.ToTweetTrove()
|
||||
quoted_trove := quoted_api_result.ToTweetTrove(false)
|
||||
ret.MergeWith(quoted_trove)
|
||||
}
|
||||
|
||||
@ -248,7 +254,7 @@ func (api_v2_tweet APIV2Tweet) ToTweetTrove() TweetTrove {
|
||||
|
||||
// If there's a retweet, we ignore the main tweet except for posted_at and retweeting UserID
|
||||
if api_v2_tweet.RetweetedStatusResult != nil {
|
||||
orig_tweet_trove := api_v2_tweet.RetweetedStatusResult.ToTweetTrove()
|
||||
orig_tweet_trove := api_v2_tweet.RetweetedStatusResult.ToTweetTrove(false)
|
||||
ret.MergeWith(orig_tweet_trove)
|
||||
|
||||
|
||||
@ -354,7 +360,7 @@ func (api_response APIV2Response) ToTweetTrove() (TweetTrove, error) {
|
||||
|
||||
result := entry.Content.ItemContent.TweetResults
|
||||
|
||||
main_trove := result.ToTweetTrove()
|
||||
main_trove := result.ToTweetTrove(true)
|
||||
ret.MergeWith(main_trove)
|
||||
}
|
||||
|
||||
|
@ -7,6 +7,7 @@ import (
|
||||
"fmt"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
. "offline_twitter/scraper"
|
||||
)
|
||||
@ -61,7 +62,7 @@ func TestAPIV2ParseTweet(t *testing.T) {
|
||||
err = json.Unmarshal(data, &tweet_result)
|
||||
assert.NoError(err)
|
||||
|
||||
trove := tweet_result.ToTweetTrove()
|
||||
trove := tweet_result.ToTweetTrove(true)
|
||||
|
||||
assert.Equal(1, len(trove.Tweets))
|
||||
tweet, ok := trove.Tweets[1485708879174508550]
|
||||
@ -108,7 +109,7 @@ func TestAPIV2ParseTweetWithQuotedTweet(t *testing.T) {
|
||||
err = json.Unmarshal(data, &tweet_result)
|
||||
assert.NoError(err)
|
||||
|
||||
trove := tweet_result.ToTweetTrove()
|
||||
trove := tweet_result.ToTweetTrove(true)
|
||||
|
||||
// Should be 2 tweets: quote-tweet and quoted-tweet
|
||||
assert.Equal(2, len(trove.Tweets))
|
||||
@ -161,7 +162,7 @@ func TestAPIV2ParseRetweet(t *testing.T) {
|
||||
err = json.Unmarshal(data, &tweet_result)
|
||||
assert.NoError(err)
|
||||
|
||||
trove := tweet_result.ToTweetTrove()
|
||||
trove := tweet_result.ToTweetTrove(true)
|
||||
|
||||
// Should only be 1 tweet, the retweeted one
|
||||
assert.Equal(1, len(trove.Tweets))
|
||||
@ -221,7 +222,7 @@ func TestAPIV2ParseRetweetedQuoteTweet(t *testing.T) {
|
||||
err = json.Unmarshal(data, &tweet_result)
|
||||
assert.NoError(err)
|
||||
|
||||
trove := tweet_result.ToTweetTrove()
|
||||
trove := tweet_result.ToTweetTrove(true)
|
||||
|
||||
// Quoted tweet and quoting tweet
|
||||
assert.Equal(2, len(trove.Tweets))
|
||||
@ -281,7 +282,7 @@ func TestAPIV2ParseTweetWithQuotedTombstone(t *testing.T) {
|
||||
err = json.Unmarshal(data, &tweet_result)
|
||||
assert.NoError(err)
|
||||
|
||||
trove := tweet_result.ToTweetTrove()
|
||||
trove := tweet_result.ToTweetTrove(true)
|
||||
|
||||
assert.Equal(1, len(trove.Users))
|
||||
user, ok := trove.Users[44067298]
|
||||
@ -317,7 +318,7 @@ func TestAPIV2ParseTweetWithURL(t *testing.T) {
|
||||
err = json.Unmarshal(data, &tweet_result)
|
||||
assert.NoError(err)
|
||||
|
||||
trove := tweet_result.ToTweetTrove()
|
||||
trove := tweet_result.ToTweetTrove(true)
|
||||
|
||||
assert.Equal(1, len(trove.Tweets))
|
||||
tweet, ok := trove.Tweets[1485695695025803264]
|
||||
@ -352,7 +353,7 @@ func TestAPIV2ParseTweetWithURLPlayerCard(t *testing.T) {
|
||||
err = json.Unmarshal(data, &tweet_result)
|
||||
assert.NoError(err)
|
||||
|
||||
trove := tweet_result.ToTweetTrove()
|
||||
trove := tweet_result.ToTweetTrove(true)
|
||||
|
||||
assert.Equal(1, len(trove.Tweets))
|
||||
tweet, ok := trove.Tweets[1485504913614327808]
|
||||
@ -384,7 +385,7 @@ func TestAPIV2ParseTweetWithURLRetweet(t *testing.T) {
|
||||
err = json.Unmarshal(data, &tweet_result)
|
||||
assert.NoError(err)
|
||||
|
||||
trove := tweet_result.ToTweetTrove()
|
||||
trove := tweet_result.ToTweetTrove(true)
|
||||
|
||||
assert.Equal(1, len(trove.Tweets))
|
||||
tweet, ok := trove.Tweets[1488605073588559873]
|
||||
@ -411,7 +412,7 @@ func TestAPIV2ParseTweetWithPoll(t *testing.T) {
|
||||
err = json.Unmarshal(data, &tweet_result)
|
||||
assert.NoError(err)
|
||||
|
||||
trove := tweet_result.ToTweetTrove()
|
||||
trove := tweet_result.ToTweetTrove(true)
|
||||
|
||||
assert.Len(trove.Tweets, 1)
|
||||
tweet, ok := trove.Tweets[1485692111106285571]
|
||||
@ -541,26 +542,37 @@ func TestAPIV2GetMainInstructionFromFeed(t *testing.T) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Should handle an entry in the feed that's just a tombstone
|
||||
* Should handle an entry in the feed that's a tombstone by just ignoring it
|
||||
* Expectation: random tombstones in the feed with no context should parse as empty TweetTroves.
|
||||
*
|
||||
* The indication that it's from a feed (i.e., not in a comments thread) is 'ToTweetTrove(true)'.
|
||||
* On a reply thread, it would be 'ToTweetTrove(false)'.
|
||||
*/
|
||||
func TestAPIV2TombstoneEntry(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
data, err := ioutil.ReadFile("test_responses/api_v2/tombstone_tweet.json")
|
||||
require.NoError(t, err)
|
||||
|
||||
var tweet_result APIV2Result
|
||||
err = json.Unmarshal(data, &tweet_result)
|
||||
require.NoError(t, err)
|
||||
|
||||
trove := tweet_result.ToTweetTrove(true) // 'true' indicates to ignore empty entries
|
||||
assert.Len(trove.Tweets, 0)
|
||||
assert.Len(trove.Users, 0)
|
||||
assert.Len(trove.Retweets, 0)
|
||||
}
|
||||
|
||||
|
||||
func TestTweetWithWarning(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
data, err := ioutil.ReadFile("test_responses/api_v2/tweet_with_warning.json")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
require.NoError(t, err)
|
||||
var tweet_result APIV2Result
|
||||
err = json.Unmarshal(data, &tweet_result)
|
||||
if err != nil {
|
||||
t.Errorf(err.Error())
|
||||
}
|
||||
require.NoError(t, err)
|
||||
|
||||
trove := tweet_result.ToTweetTrove()
|
||||
trove := tweet_result.ToTweetTrove(true)
|
||||
|
||||
assert.Len(trove.Retweets, 1)
|
||||
assert.Len(trove.Tweets, 2)
|
||||
|
1
scraper/test_responses/api_v2/tombstone_tweet.json
Normal file
1
scraper/test_responses/api_v2/tombstone_tweet.json
Normal file
@ -0,0 +1 @@
|
||||
{"result":{"__typename":"TweetTombstone","tombstone":{"__typename":"TextTombstone","text":{"rtl":false,"text":"This Tweet was deleted by the Tweet author. Learn more","entities":[{"fromIndex":44,"toIndex":54,"ref":{"type":"TimelineUrl","url":"https://help.twitter.com/rules-and-policies/notices-on-twitter","urlType":"ExternalUrl"}}]}}}}
|
Loading…
x
Reference in New Issue
Block a user