Fix handling of empty entries in the feed
- e.g., retweets where the author then deleted the original tweet - I think this is a bug in the Twitter API (it doesn't make sense to return this data) but we have to handle it anyway
This commit is contained in:
parent
18eafe6e3d
commit
aa961b9ff4
@ -152,9 +152,15 @@ type APIV2Result struct {
|
|||||||
Tweet _Result `json:"tweet"`
|
Tweet _Result `json:"tweet"`
|
||||||
} `json:"result"`
|
} `json:"result"`
|
||||||
}
|
}
|
||||||
func (api_result APIV2Result) ToTweetTrove() TweetTrove {
|
func (api_result APIV2Result) ToTweetTrove(ignore_null_entries bool) TweetTrove {
|
||||||
ret := NewTweetTrove()
|
ret := NewTweetTrove()
|
||||||
|
|
||||||
|
// Start by checking if this is a null entry in a feed
|
||||||
|
if api_result.Result.Tombstone != nil && ignore_null_entries{
|
||||||
|
// TODO: this is becoming really spaghetti. Why do we need a separate execution path for this?
|
||||||
|
return ret
|
||||||
|
}
|
||||||
|
|
||||||
if api_result.Result.Legacy.ID == 0 && api_result.Result.Tweet.Legacy.ID != 0 {
|
if api_result.Result.Legacy.ID == 0 && api_result.Result.Tweet.Legacy.ID != 0 {
|
||||||
// If the tweet has "__typename" of "TweetWithVisibilityResults", it uses a new structure with
|
// If the tweet has "__typename" of "TweetWithVisibilityResults", it uses a new structure with
|
||||||
// a "tweet" field with the regular data, alongside a "tweetInterstitial" field which is ignored
|
// a "tweet" field with the regular data, alongside a "tweetInterstitial" field which is ignored
|
||||||
@ -194,7 +200,7 @@ func (api_result APIV2Result) ToTweetTrove() TweetTrove {
|
|||||||
ret.TombstoneUsers = append(ret.TombstoneUsers, handle)
|
ret.TombstoneUsers = append(ret.TombstoneUsers, handle)
|
||||||
}
|
}
|
||||||
|
|
||||||
quoted_trove := api_result.Result.QuotedStatusResult.ToTweetTrove()
|
quoted_trove := quoted_api_result.ToTweetTrove(false)
|
||||||
ret.MergeWith(quoted_trove)
|
ret.MergeWith(quoted_trove)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -248,7 +254,7 @@ func (api_v2_tweet APIV2Tweet) ToTweetTrove() TweetTrove {
|
|||||||
|
|
||||||
// If there's a retweet, we ignore the main tweet except for posted_at and retweeting UserID
|
// If there's a retweet, we ignore the main tweet except for posted_at and retweeting UserID
|
||||||
if api_v2_tweet.RetweetedStatusResult != nil {
|
if api_v2_tweet.RetweetedStatusResult != nil {
|
||||||
orig_tweet_trove := api_v2_tweet.RetweetedStatusResult.ToTweetTrove()
|
orig_tweet_trove := api_v2_tweet.RetweetedStatusResult.ToTweetTrove(false)
|
||||||
ret.MergeWith(orig_tweet_trove)
|
ret.MergeWith(orig_tweet_trove)
|
||||||
|
|
||||||
|
|
||||||
@ -354,7 +360,7 @@ func (api_response APIV2Response) ToTweetTrove() (TweetTrove, error) {
|
|||||||
|
|
||||||
result := entry.Content.ItemContent.TweetResults
|
result := entry.Content.ItemContent.TweetResults
|
||||||
|
|
||||||
main_trove := result.ToTweetTrove()
|
main_trove := result.ToTweetTrove(true)
|
||||||
ret.MergeWith(main_trove)
|
ret.MergeWith(main_trove)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -7,6 +7,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
. "offline_twitter/scraper"
|
. "offline_twitter/scraper"
|
||||||
)
|
)
|
||||||
@ -61,7 +62,7 @@ func TestAPIV2ParseTweet(t *testing.T) {
|
|||||||
err = json.Unmarshal(data, &tweet_result)
|
err = json.Unmarshal(data, &tweet_result)
|
||||||
assert.NoError(err)
|
assert.NoError(err)
|
||||||
|
|
||||||
trove := tweet_result.ToTweetTrove()
|
trove := tweet_result.ToTweetTrove(true)
|
||||||
|
|
||||||
assert.Equal(1, len(trove.Tweets))
|
assert.Equal(1, len(trove.Tweets))
|
||||||
tweet, ok := trove.Tweets[1485708879174508550]
|
tweet, ok := trove.Tweets[1485708879174508550]
|
||||||
@ -108,7 +109,7 @@ func TestAPIV2ParseTweetWithQuotedTweet(t *testing.T) {
|
|||||||
err = json.Unmarshal(data, &tweet_result)
|
err = json.Unmarshal(data, &tweet_result)
|
||||||
assert.NoError(err)
|
assert.NoError(err)
|
||||||
|
|
||||||
trove := tweet_result.ToTweetTrove()
|
trove := tweet_result.ToTweetTrove(true)
|
||||||
|
|
||||||
// Should be 2 tweets: quote-tweet and quoted-tweet
|
// Should be 2 tweets: quote-tweet and quoted-tweet
|
||||||
assert.Equal(2, len(trove.Tweets))
|
assert.Equal(2, len(trove.Tweets))
|
||||||
@ -161,7 +162,7 @@ func TestAPIV2ParseRetweet(t *testing.T) {
|
|||||||
err = json.Unmarshal(data, &tweet_result)
|
err = json.Unmarshal(data, &tweet_result)
|
||||||
assert.NoError(err)
|
assert.NoError(err)
|
||||||
|
|
||||||
trove := tweet_result.ToTweetTrove()
|
trove := tweet_result.ToTweetTrove(true)
|
||||||
|
|
||||||
// Should only be 1 tweet, the retweeted one
|
// Should only be 1 tweet, the retweeted one
|
||||||
assert.Equal(1, len(trove.Tweets))
|
assert.Equal(1, len(trove.Tweets))
|
||||||
@ -221,7 +222,7 @@ func TestAPIV2ParseRetweetedQuoteTweet(t *testing.T) {
|
|||||||
err = json.Unmarshal(data, &tweet_result)
|
err = json.Unmarshal(data, &tweet_result)
|
||||||
assert.NoError(err)
|
assert.NoError(err)
|
||||||
|
|
||||||
trove := tweet_result.ToTweetTrove()
|
trove := tweet_result.ToTweetTrove(true)
|
||||||
|
|
||||||
// Quoted tweet and quoting tweet
|
// Quoted tweet and quoting tweet
|
||||||
assert.Equal(2, len(trove.Tweets))
|
assert.Equal(2, len(trove.Tweets))
|
||||||
@ -281,7 +282,7 @@ func TestAPIV2ParseTweetWithQuotedTombstone(t *testing.T) {
|
|||||||
err = json.Unmarshal(data, &tweet_result)
|
err = json.Unmarshal(data, &tweet_result)
|
||||||
assert.NoError(err)
|
assert.NoError(err)
|
||||||
|
|
||||||
trove := tweet_result.ToTweetTrove()
|
trove := tweet_result.ToTweetTrove(true)
|
||||||
|
|
||||||
assert.Equal(1, len(trove.Users))
|
assert.Equal(1, len(trove.Users))
|
||||||
user, ok := trove.Users[44067298]
|
user, ok := trove.Users[44067298]
|
||||||
@ -317,7 +318,7 @@ func TestAPIV2ParseTweetWithURL(t *testing.T) {
|
|||||||
err = json.Unmarshal(data, &tweet_result)
|
err = json.Unmarshal(data, &tweet_result)
|
||||||
assert.NoError(err)
|
assert.NoError(err)
|
||||||
|
|
||||||
trove := tweet_result.ToTweetTrove()
|
trove := tweet_result.ToTweetTrove(true)
|
||||||
|
|
||||||
assert.Equal(1, len(trove.Tweets))
|
assert.Equal(1, len(trove.Tweets))
|
||||||
tweet, ok := trove.Tweets[1485695695025803264]
|
tweet, ok := trove.Tweets[1485695695025803264]
|
||||||
@ -352,7 +353,7 @@ func TestAPIV2ParseTweetWithURLPlayerCard(t *testing.T) {
|
|||||||
err = json.Unmarshal(data, &tweet_result)
|
err = json.Unmarshal(data, &tweet_result)
|
||||||
assert.NoError(err)
|
assert.NoError(err)
|
||||||
|
|
||||||
trove := tweet_result.ToTweetTrove()
|
trove := tweet_result.ToTweetTrove(true)
|
||||||
|
|
||||||
assert.Equal(1, len(trove.Tweets))
|
assert.Equal(1, len(trove.Tweets))
|
||||||
tweet, ok := trove.Tweets[1485504913614327808]
|
tweet, ok := trove.Tweets[1485504913614327808]
|
||||||
@ -384,7 +385,7 @@ func TestAPIV2ParseTweetWithURLRetweet(t *testing.T) {
|
|||||||
err = json.Unmarshal(data, &tweet_result)
|
err = json.Unmarshal(data, &tweet_result)
|
||||||
assert.NoError(err)
|
assert.NoError(err)
|
||||||
|
|
||||||
trove := tweet_result.ToTweetTrove()
|
trove := tweet_result.ToTweetTrove(true)
|
||||||
|
|
||||||
assert.Equal(1, len(trove.Tweets))
|
assert.Equal(1, len(trove.Tweets))
|
||||||
tweet, ok := trove.Tweets[1488605073588559873]
|
tweet, ok := trove.Tweets[1488605073588559873]
|
||||||
@ -411,7 +412,7 @@ func TestAPIV2ParseTweetWithPoll(t *testing.T) {
|
|||||||
err = json.Unmarshal(data, &tweet_result)
|
err = json.Unmarshal(data, &tweet_result)
|
||||||
assert.NoError(err)
|
assert.NoError(err)
|
||||||
|
|
||||||
trove := tweet_result.ToTweetTrove()
|
trove := tweet_result.ToTweetTrove(true)
|
||||||
|
|
||||||
assert.Len(trove.Tweets, 1)
|
assert.Len(trove.Tweets, 1)
|
||||||
tweet, ok := trove.Tweets[1485692111106285571]
|
tweet, ok := trove.Tweets[1485692111106285571]
|
||||||
@ -541,26 +542,37 @@ func TestAPIV2GetMainInstructionFromFeed(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Should handle an entry in the feed that's just a tombstone
|
* Should handle an entry in the feed that's a tombstone by just ignoring it
|
||||||
|
* Expectation: random tombstones in the feed with no context should parse as empty TweetTroves.
|
||||||
|
*
|
||||||
|
* The indication that it's from a feed (i.e., not in a comments thread) is 'ToTweetTrove(true)'.
|
||||||
|
* On a reply thread, it would be 'ToTweetTrove(false)'.
|
||||||
*/
|
*/
|
||||||
func TestAPIV2TombstoneEntry(t *testing.T) {
|
func TestAPIV2TombstoneEntry(t *testing.T) {
|
||||||
|
assert := assert.New(t)
|
||||||
|
data, err := ioutil.ReadFile("test_responses/api_v2/tombstone_tweet.json")
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
var tweet_result APIV2Result
|
||||||
|
err = json.Unmarshal(data, &tweet_result)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
trove := tweet_result.ToTweetTrove(true) // 'true' indicates to ignore empty entries
|
||||||
|
assert.Len(trove.Tweets, 0)
|
||||||
|
assert.Len(trove.Users, 0)
|
||||||
|
assert.Len(trove.Retweets, 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
func TestTweetWithWarning(t *testing.T) {
|
func TestTweetWithWarning(t *testing.T) {
|
||||||
assert := assert.New(t)
|
assert := assert.New(t)
|
||||||
data, err := ioutil.ReadFile("test_responses/api_v2/tweet_with_warning.json")
|
data, err := ioutil.ReadFile("test_responses/api_v2/tweet_with_warning.json")
|
||||||
if err != nil {
|
require.NoError(t, err)
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
var tweet_result APIV2Result
|
var tweet_result APIV2Result
|
||||||
err = json.Unmarshal(data, &tweet_result)
|
err = json.Unmarshal(data, &tweet_result)
|
||||||
if err != nil {
|
require.NoError(t, err)
|
||||||
t.Errorf(err.Error())
|
|
||||||
}
|
|
||||||
|
|
||||||
trove := tweet_result.ToTweetTrove()
|
trove := tweet_result.ToTweetTrove(true)
|
||||||
|
|
||||||
assert.Len(trove.Retweets, 1)
|
assert.Len(trove.Retweets, 1)
|
||||||
assert.Len(trove.Tweets, 2)
|
assert.Len(trove.Tweets, 2)
|
||||||
|
1
scraper/test_responses/api_v2/tombstone_tweet.json
Normal file
1
scraper/test_responses/api_v2/tombstone_tweet.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
{"result":{"__typename":"TweetTombstone","tombstone":{"__typename":"TextTombstone","text":{"rtl":false,"text":"This Tweet was deleted by the Tweet author. Learn more","entities":[{"fromIndex":44,"toIndex":54,"ref":{"type":"TimelineUrl","url":"https://help.twitter.com/rules-and-policies/notices-on-twitter","urlType":"ExternalUrl"}}]}}}}
|
Loading…
x
Reference in New Issue
Block a user