Add support for parsing Tweet Detail in APIv2, including its unusual cursor format and conversation threads

This commit is contained in:
Alessio 2023-06-08 23:19:50 -03:00
parent 693831704d
commit 21581b325a
3 changed files with 62 additions and 14 deletions

View File

@ -334,6 +334,10 @@ func (api_v2_tweet APIV2Tweet) ToTweetTrove() TweetTrove {
type ItemContent struct {
ItemType string `json:"itemType"`
TweetResults APIV2Result `json:"tweet_results"`
// Cursors (conversation view format)
CursorType string `json:"cursorType"`
Value string `json:"value"`
}
// Wraps InnerAPIV2Entry to implement `json.Unmarshal`. Does the normal unmarshal but also saves the original JSON.
@ -355,7 +359,7 @@ type InnerAPIV2Entry struct {
}
}
// Cursors
// Cursors (user feed format)
EntryType string `json:"entryType"`
Value string `json:"value"`
CursorType string `json:"cursorType"`
@ -378,22 +382,29 @@ func (e APIV2Entry) ToTweetTrove(ignore_null_entries bool) TweetTrove {
panic(obj)
}
}()
if e.Content.EntryType == "TimelineTimelineCursor" {
// Ignore cursor entries
if e.Content.EntryType == "TimelineTimelineCursor" || e.Content.ItemContent.ItemType == "TimelineTimelineCursor" {
// Ignore cursor entries.
// - e.Content.EntryType -> User Feed itself
// - e.Content.ItemContent.ItemType -> conversation thread in a user feed
return NewTweetTrove()
} else if e.Content.EntryType == "TimelineTimelineModule" {
ret := NewTweetTrove()
switch strings.Split(e.EntryID, "-")[0] {
case "homeConversation":
// Process it
case "homeConversation", "conversationthread":
// Process it.
// - "homeConversation": conversation thread on a user feed
// - "conversationthread": conversation thread in the replies under a TweetDetail view
for _, item := range e.Content.Items {
if item.Item.ItemContent.ItemType == "TimelineTimelineCursor" {
// "Show More" replies button in a thread on Tweet Detail page
continue
}
ret.MergeWith(item.Item.ItemContent.TweetResults.ToTweetTrove(ignore_null_entries))
}
case "whoToFollow":
case "TopicsModule":
// Ignore "Who to follow" and "Topics" modules.
case "whoToFollow", "TopicsModule", "tweetdetailrelatedtweets":
// Ignore "Who to follow", "Topics" and "Related Tweets" modules.
// TODO: maybe we can capture these eventually
log.Debug(fmt.Sprintf("Skipping %s entry", e.EntryID))
@ -424,6 +435,9 @@ type APIV2Response struct {
} `json:"timeline"`
} `json:"result"`
} `json:"user"`
ThreadedConversationWithInjectionsV2 struct {
Instructions []APIV2Instruction `json:"instructions"`
} `json:"threaded_conversation_with_injections_v2"`
} `json:"data"`
}
@ -434,17 +448,28 @@ func (api_response APIV2Response) GetMainInstruction() *APIV2Instruction {
return &instructions[i]
}
}
instructions = api_response.Data.ThreadedConversationWithInjectionsV2.Instructions
for i := range instructions {
if instructions[i].Type == "TimelineAddEntries" {
return &instructions[i]
}
}
panic("No 'TimelineAddEntries' found")
}
func (api_response APIV2Response) GetCursorBottom() string {
entries := api_response.GetMainInstruction().Entries
last_entry := entries[len(entries)-1]
if last_entry.Content.CursorType != "Bottom" {
panic("No bottom cursor found")
for _, entry := range api_response.GetMainInstruction().Entries {
// For a user feed:
if entry.Content.CursorType == "Bottom" {
return entry.Content.Value
}
return last_entry.Content.Value
// For a Tweet Detail page:
if entry.Content.ItemContent.CursorType == "Bottom" {
return entry.Content.ItemContent.Value
}
}
return ""
}
/**

View File

@ -680,3 +680,25 @@ func TestEntryWithConversationThread(t *testing.T) {
_, is_ok = trove.Tweets[1624990170670850053] // Tweet 3
assert.True(is_ok)
}
// On a Tweet Detail page, there's a thread of replies, and then it says "Show more..." underneath
// to extend the conversation. This is different from the "Show more..." button to load more
// replies to the original tweet!
func TestConversationThreadEntryWithShowMoreButton(t *testing.T) {
assert := assert.New(t)
require := require.New(t)
data, err := os.ReadFile("test_responses/api_v2/conversation_thread_entry_with_show_more_button.json")
require.NoError(err)
var entry_result APIV2Entry
err = json.Unmarshal(data, &entry_result)
require.NoError(err)
trove := entry_result.ToTweetTrove(true)
assert.Len(trove.Tweets, 1)
t1, is_ok := trove.Tweets[1649803385485377536]
assert.True(is_ok)
assert.Equal(TweetID(1649600354747572225), t1.InReplyToID)
assert.Len(trove.Users, 1)
}

View File

@ -0,0 +1 @@
{"entryId":"conversationthread-1649803385485377536","sortIndex":"7573771682107203542","content":{"entryType":"TimelineTimelineModule","__typename":"TimelineTimelineModule","items":[{"entryId":"conversationthread-1649803385485377536-tweet-1649803385485377536","item":{"itemContent":{"itemType":"TimelineTweet","__typename":"TimelineTweet","tweet_results":{"result":{"__typename":"Tweet","rest_id":"1649803385485377536","has_birdwatch_notes":false,"core":{"user_results":{"result":{"__typename":"User","id":"VXNlcjoxNDAzMTAzMg==","rest_id":"14031032","affiliates_highlighted_label":{},"is_blue_verified":true,"profile_image_shape":"Circle","legacy":{"created_at":"Tue Feb 26 22:01:28 +0000 2008","default_profile":false,"default_profile_image":false,"description":"VP marketing @AdQuick, out of home advertising made simple & measurable. Prev work: Google, Invitae, Marketo, etc","entities":{"description":{"urls":[]},"url":{"urls":[{"display_url":"adamsinger.substack.com/welcome","expanded_url":"https://adamsinger.substack.com/welcome","url":"https://t.co/6kI9bzQ2eV","indices":[0,23]}]}},"fast_followers_count":0,"favourites_count":355788,"followers_count":81891,"friends_count":4200,"has_custom_timelines":true,"is_translator":false,"listed_count":3083,"location":"Austin, TX","media_count":31104,"name":"Adam Singer","normal_followers_count":81891,"pinned_tweet_ids_str":[],"possibly_sensitive":false,"profile_banner_url":"https://pbs.twimg.com/profile_banners/14031032/1663584125","profile_image_url_https":"https://pbs.twimg.com/profile_images/1526507327574220804/vDv7S4U7_normal.jpg","profile_interstitial_type":"","screen_name":"AdamSinger","statuses_count":313313,"translator_type":"none","url":"https://t.co/6kI9bzQ2eV","verified":false,"withheld_in_countries":[]}}}},"edit_control":{"edit_tweet_ids":["1649803385485377536"],"editable_until_msecs":"1682180553000","is_edit_eligible":false,"edits_remaining":"5"},"is_translatable":false,"views":{"count":"1755","state":"EnabledWithCount"},"source":"<a href=\"https://mobile.twitter.com\" rel=\"nofollow\">Twitter Web App</a>","legacy":{"bookmark_count":0,"bookmarked":false,"created_at":"Sat Apr 22 15:52:33 +0000 2023","conversation_id_str":"1649600354747572225","display_text_range":[13,143],"entities":{"user_mentions":[{"id_str":"886358633646350340","name":"LindyMan","screen_name":"PaulSkallas","indices":[0,12]}],"urls":[],"hashtags":[],"symbols":[]},"favorite_count":16,"favorited":false,"full_text":"@PaulSkallas Clickabait = fast/cheap attention = forgotten just as quickly. What do you think happens to all the pop music of the same variety?","in_reply_to_screen_name":"PaulSkallas","in_reply_to_status_id_str":"1649600354747572225","in_reply_to_user_id_str":"886358633646350340","is_quote_status":false,"lang":"en","quote_count":0,"reply_count":1,"retweet_count":0,"retweeted":false,"user_id_str":"14031032","id_str":"1649803385485377536"},"quick_promote_eligibility":{"eligibility":"IneligibleUserUnauthorized"}}},"tweetDisplayType":"Tweet"},"clientEventInfo":{"details":{"conversationDetails":{"conversationSection":"HighQuality"},"timelinesDetails":{"controllerData":"DAACDAAEDAABCgABFSACDDADgAUKAAIAAAAAGADACAAAAAA="}}}}},{"entryId":"conversationthread-1649803385485377536-cursor-showmore-6525681801715054743","item":{"itemContent":{"itemType":"TimelineTimelineCursor","__typename":"TimelineTimelineCursor","value":"PAAAAPAtPBwcFoCAvtGE3KPlLRUCAAAYJmNvbnZlcnNhdGlvbnRocmVhZC0xNjQ5ODAzMzg1NDg1Mzc3NTM2IgAA","cursorType":"ShowMore","displayTreatment":{"actionText":"Show replies"}},"clientEventInfo":{"details":{"conversationDetails":{"conversationSection":"HighQuality"}}}}}],"displayType":"VerticalConversation","clientEventInfo":{"details":{"conversationDetails":{"conversationSection":"HighQuality"}}}}}