Scraper now handles implicit tombstones like '"quoted_status_result": {}'
This commit is contained in:
parent
535f28c278
commit
63ddaaeafb
@ -143,14 +143,15 @@ func (u APIV2UserResult) ToUser() User {
|
||||
return user
|
||||
}
|
||||
|
||||
type _Result struct {
|
||||
ID int64 `json:"rest_id,string"`
|
||||
Legacy APIV2Tweet `json:"legacy"`
|
||||
Tombstone *struct {
|
||||
type Tombstone struct {
|
||||
Text struct {
|
||||
Text string `json:"text"`
|
||||
} `json:"text"`
|
||||
} `json:"tombstone"`
|
||||
}
|
||||
type _Result struct {
|
||||
ID int64 `json:"rest_id,string"`
|
||||
Legacy APIV2Tweet `json:"legacy"`
|
||||
Tombstone *Tombstone `json:"tombstone"`
|
||||
Core *APIV2UserResult `json:"core"`
|
||||
Card APIV2Card `json:"card"`
|
||||
QuotedStatusResult *APIV2Result `json:"quoted_status_result"`
|
||||
@ -213,6 +214,16 @@ func (api_result APIV2Result) ToTweetTrove() (TweetTrove, error) {
|
||||
quoted_api_result := api_result.Result.QuotedStatusResult
|
||||
quoted_trove, err := quoted_api_result.ToTweetTrove()
|
||||
|
||||
// Handle `"quoted_status_result": {}` results
|
||||
if errors.Is(err, ERR_NO_TWEET) {
|
||||
// Replace it with a tombstone
|
||||
err = ErrorIsTombstone
|
||||
if quoted_api_result.Result.Tombstone == nil {
|
||||
quoted_api_result.Result.Tombstone = &Tombstone{}
|
||||
}
|
||||
quoted_api_result.Result.Tombstone.Text.Text = "This Post is unavailable. Learn more"
|
||||
}
|
||||
|
||||
// Quoted tombstones can be handled here since we already have the ID and user handle
|
||||
if errors.Is(err, ErrorIsTombstone) {
|
||||
tombstoned_tweet := quoted_api_result.Result.Legacy.APITweet
|
||||
@ -256,9 +267,9 @@ func (api_result APIV2Result) ToTweetTrove() (TweetTrove, error) {
|
||||
if api_result.Result.Legacy.RetweetedStatusResult == nil {
|
||||
// We have to filter out retweets. For some reason, retweets have a copy of the card in both the retweeting
|
||||
// and the retweeted TweetResults; it should only be parsed for the real Tweet, not the Retweet
|
||||
main_tweet, is_ok := ret.Tweets[TweetID(api_result.Result.Legacy.ID)]
|
||||
main_tweet, is_ok := ret.Tweets[TweetID(api_result.Result.ID)]
|
||||
if !is_ok {
|
||||
panic(fmt.Errorf("Tweet trove didn't contain its own tweet with ID %d:\n %w", api_result.Result.Legacy.ID, EXTERNAL_API_ERROR))
|
||||
return TweetTrove{}, ERR_NO_TWEET
|
||||
}
|
||||
if api_result.Result.Card.Legacy.Name == "summary_large_image" || api_result.Result.Card.Legacy.Name == "player" {
|
||||
url := api_result.Result.Card.ParseAsUrl()
|
||||
@ -595,9 +606,7 @@ func (api_response APIV2Response) GetCursorBottom() string {
|
||||
return ""
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns `true` if there's no non-cursor entries in this response, false otherwise
|
||||
*/
|
||||
// Returns `true` if there's no non-cursor entries in this response, false otherwise
|
||||
func (api_response APIV2Response) IsEmpty() bool {
|
||||
for _, e := range api_response.GetMainInstruction().Entries {
|
||||
if !strings.Contains(e.EntryID, "cursor") {
|
||||
@ -607,9 +616,7 @@ func (api_response APIV2Response) IsEmpty() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse the collected API response and turn it into a TweetTrove
|
||||
*/
|
||||
// Parse the collected API response and turn it into a TweetTrove
|
||||
func (api_response APIV2Response) ToTweetTrove() (TweetTrove, error) {
|
||||
ret := NewTweetTrove()
|
||||
|
||||
|
@ -595,7 +595,6 @@ func TestAPIV2UserFeedTombstoneEntry(t *testing.T) {
|
||||
require.NoError(t, err)
|
||||
|
||||
trove := entry.ToTweetTrove()
|
||||
assert.NoError(err)
|
||||
// assert.Len(trove.Tweets, 1)
|
||||
// assert.Len(trove.Users, 1)
|
||||
assert.Len(trove.Retweets, 0)
|
||||
@ -917,3 +916,25 @@ func TestParseResultAsLikes(t *testing.T) {
|
||||
assert.True(is_ok, "Like (%#v) didn't have its Tweet in the trove", l)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTweetWithImplicitQuotedTombstone(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
require := require.New(t)
|
||||
data, err := os.ReadFile("test_responses/api_v2/tweet_with_implicit_quoted_tombstone.json")
|
||||
require.NoError(err)
|
||||
var entry_result APIV2Entry
|
||||
err = json.Unmarshal(data, &entry_result)
|
||||
require.NoError(err)
|
||||
|
||||
trove := entry_result.ToTweetTrove()
|
||||
|
||||
assert.Len(trove.Tweets, 2)
|
||||
|
||||
t1, is_ok := trove.Tweets[TweetID(1586033916367904768)]
|
||||
assert.True(is_ok)
|
||||
assert.False(t1.IsStub)
|
||||
t2, is_ok := trove.Tweets[TweetID(1586033437806305280)]
|
||||
assert.True(is_ok)
|
||||
assert.True(t2.IsStub)
|
||||
assert.Equal(t2.TombstoneType, "unavailable")
|
||||
}
|
||||
|
@ -0,0 +1 @@
|
||||
{"entryId":"tweet-1586033916367904768","sortIndex":"1713285565408870280","content":{"entryType":"TimelineTimelineItem","__typename":"TimelineTimelineItem","itemContent":{"itemType":"TimelineTweet","__typename":"TimelineTweet","tweet_results":{"result":{"__typename":"Tweet","rest_id":"1586033916367904768","core":{"user_results":{"result":{"__typename":"User","id":"VXNlcjo0NDA2NzI5OA==","rest_id":"44067298","affiliates_highlighted_label":{},"has_graduated_access":true,"is_blue_verified":true,"profile_image_shape":"Circle","legacy":{"blocked_by":false,"blocking":false,"follow_request_sent":false,"followed_by":false,"following":false,"muting":false,"notifications":false,"protected":false,"can_dm":false,"can_media_tag":false,"created_at":"Tue Jun 02 05:35:52 +0000 2009","default_profile":false,"default_profile_image":false,"description":"Author: Dear Reader, The New Right, The Anarchist Handbook & The White Pill \nHost: \"YOUR WELCOME\" \nSubject: Ego & Hubris by Harvey Pekar\nHe/Him ⚑","entities":{"description":{"urls":[]},"url":{"urls":[{"display_url":"amzn.to/3oInafv","expanded_url":"https://amzn.to/3oInafv","url":"https://t.co/7VDFOOtFK2","indices":[0,23]}]}},"fast_followers_count":0,"favourites_count":4831,"followers_count":658993,"friends_count":1040,"has_custom_timelines":true,"is_translator":false,"listed_count":2221,"location":"Austin","media_count":13701,"name":"Michael Malice","normal_followers_count":658993,"pinned_tweet_ids_str":["1712211529905353086"],"possibly_sensitive":false,"profile_banner_url":"https://pbs.twimg.com/profile_banners/44067298/1664774013","profile_image_url_https":"https://pbs.twimg.com/profile_images/1415820415314931715/_VVX4GI8_normal.jpg","profile_interstitial_type":"","screen_name":"michaelmalice","statuses_count":162731,"translator_type":"none","url":"https://t.co/7VDFOOtFK2","verified":false,"want_retweets":false,"withheld_in_countries":[]},"has_nft_avatar":false,"super_follow_eligible":false,"super_followed_by":false,"super_following":false}}},"edit_control":{"edit_tweet_ids":["1586033916367904768"],"editable_until_msecs":"1666976727000","is_edit_eligible":false,"edits_remaining":"5"},"is_translatable":false,"source":"<a href=\"https://mobile.twitter.com\" rel=\"nofollow\">Twitter Web App</a>","quoted_status_result":{},"legacy":{"bookmark_count":3,"bookmarked":false,"created_at":"Fri Oct 28 16:35:27 +0000 2022","conversation_id_str":"1586033916367904768","display_text_range":[0,65],"entities":{"user_mentions":[],"urls":[],"hashtags":[],"symbols":[]},"favorite_count":436,"favorited":false,"full_text":"the Constitution does not grant rights, it claims to protect them","is_quote_status":true,"lang":"en","quote_count":1,"quoted_status_id_str":"1586033437806305280","quoted_status_permalink":{"url":"https://t.co/r6ZFruksos","expanded":"https://twitter.com/SloCobruh/status/1586033437806305280","display":"twitter.com/SloCobruh/stat…"},"reply_count":16,"retweet_count":27,"retweeted":false,"user_id_str":"44067298","id_str":"1586033916367904768"}}},"tweetDisplayType":"Tweet","highlights":{"textHighlights":[{"startIndex":4,"endIndex":16}]}},"feedbackInfo":{"feedbackKeys":["1060665035"]},"clientEventInfo":{"component":"result","element":"tweet","details":{"timelinesDetails":{"controllerData":"DAACDAAFDAABDAABDAABCgABAAAAAAAAAAAAAAwAAgoAAQAAAAAAAABACgAC+2hCBb/AmpQKAAVD6ekwp16RtQgABgAAAAAKAAdChvNiX2bzlgAAAAAA"}}}}}
|
@ -131,7 +131,7 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
|
||||
ret.PostedAt, err = TimestampFromString(apiTweet.CreatedAt)
|
||||
if err != nil {
|
||||
if ret.ID == 0 {
|
||||
return Tweet{}, fmt.Errorf("unable to parse tweet:\n %w", ERR_NO_TWEET)
|
||||
return Tweet{}, fmt.Errorf("unable to parse tweet: %w", ERR_NO_TWEET)
|
||||
}
|
||||
return Tweet{}, fmt.Errorf("Error parsing time on tweet ID %d:\n %w", ret.ID, err)
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user