diff --git a/scraper/api_types_v2.go b/scraper/api_types_v2.go index 31f5507..8a0d6e4 100644 --- a/scraper/api_types_v2.go +++ b/scraper/api_types_v2.go @@ -330,6 +330,14 @@ type InnerAPIV2Entry struct { Content struct { ItemContent ItemContent `json:"itemContent"` + Items []struct { + EntryId string + Dispensable bool + Item struct { + ItemContent ItemContent `json:"itemContent"` + } + } + // Cursors EntryType string `json:"entryType"` Value string `json:"value"` @@ -353,7 +361,26 @@ func (e APIV2Entry) ToTweetTrove(ignore_null_entries bool) TweetTrove { panic(obj) } }() - return e.Content.ItemContent.TweetResults.ToTweetTrove(ignore_null_entries) + if e.Content.EntryType == "TimelineTimelineCursor" { + // Ignore cursor entries + return NewTweetTrove() + } else if e.Content.EntryType == "TimelineTimelineModule" { + ret := NewTweetTrove() + + // If it's a "Who To Follow", ignore it (return empty tweet trove) + if !strings.HasPrefix(e.EntryID, "homeConversation-") { + log.Warn("Skipping entry with EntryID " + e.EntryID) + return ret + } + + for _, item := range e.Content.Items { + ret.MergeWith(item.Item.ItemContent.TweetResults.ToTweetTrove(ignore_null_entries)) + } + return ret + } else if e.Content.EntryType == "TimelineTimelineItem" { + return e.Content.ItemContent.TweetResults.ToTweetTrove(ignore_null_entries) + } + panic("Unknown EntryType: " + e.Content.EntryType) } type APIV2Instruction struct { @@ -413,10 +440,6 @@ func (api_response APIV2Response) IsEmpty() bool { func (api_response APIV2Response) ToTweetTrove() (TweetTrove, error) { ret := NewTweetTrove() for _, entry := range api_response.GetMainInstruction().Entries { // TODO: the second Instruction is the pinned tweet - if !strings.HasPrefix(entry.EntryID, "tweet-") { - continue - } - main_trove := entry.ToTweetTrove(true) ret.MergeWith(main_trove) } diff --git a/scraper/api_types_v2_test.go b/scraper/api_types_v2_test.go index 4ba313a..0fc2f94 100644 --- a/scraper/api_types_v2_test.go +++ b/scraper/api_types_v2_test.go @@ -636,3 +636,30 @@ func TestRetweetWithVisibilityResults(t *testing.T) { require.True(is_ok) assert.Equal(rt.TweetID, TweetID(1595973736833892356)) } + +// In a user feed, an "entry" can contain multiple tweets when making authenticated requests. +// They should parse out as all the tweets. +func TestEntryWithConversationThread(t *testing.T) { + assert := assert.New(t) + require := require.New(t) + data, err := os.ReadFile("test_responses/api_v2/home_conversation_entry.json") + require.NoError(err) + var entry_result APIV2Entry + err = json.Unmarshal(data, &entry_result) + require.NoError(err) + + trove := entry_result.ToTweetTrove(true) + + assert.Len(trove.Tweets, 4) // 3 tweets in the thread plus the quoted tweet + t1, is_ok := trove.Tweets[1624966566264680448] + assert.True(is_ok) + assert.Equal(TweetID(1624953636253208578), t1.QuotedTweetID) + assert.Equal(13, t1.NumLikes) + + _, is_ok = trove.Tweets[1624953636253208578] // Quoted tweet + assert.True(is_ok) + _, is_ok = trove.Tweets[1624989022589427712] // Tweet 2 + assert.True(is_ok) + _, is_ok = trove.Tweets[1624990170670850053] // Tweet 3 + assert.True(is_ok) +}