diff --git a/cmd/tests.sh b/cmd/tests.sh index 3c0d252..342c70d 100755 --- a/cmd/tests.sh +++ b/cmd/tests.sh @@ -377,6 +377,8 @@ tw fetch_inbox test $(sqlite3 twitter.db "select count(*) from chat_rooms") -ge "1" test $(sqlite3 twitter.db "select count(*) from chat_messages where chat_room_id = '1458284524761075714-1488963321701171204'") -ge "5" +# Test fetch a DM conversation +tw fetch_dm "1458284524761075714-1488963321701171204" # TODO: Maybe this file should be broken up into multiple test scripts diff --git a/cmd/twitter/main.go b/cmd/twitter/main.go index b3619ce..45b7241 100644 --- a/cmd/twitter/main.go +++ b/cmd/twitter/main.go @@ -161,6 +161,8 @@ func main() { start_webserver(*addr) case "fetch_inbox": fetch_inbox(*how_many) + case "fetch_dm": + fetch_dm(target, *how_many) default: die(fmt.Sprintf("Invalid operation: %s", operation), true, 3) } @@ -403,3 +405,14 @@ func fetch_inbox(how_many int) { profile.SaveDMTrove(trove) happy_exit(fmt.Sprintf("Saved %d messages from %d chats", len(trove.Messages), len(trove.Rooms))) } + +func fetch_dm(id string, how_many int) { + room, err := profile.GetChatRoom(scraper.DMChatRoomID(id)) + if err != nil { + panic(err) + } + max_id := scraper.DMMessageID(^uint(0) >> 1) + trove := scraper.GetConversation(room.ID, max_id, how_many) + profile.SaveDMTrove(trove) + happy_exit(fmt.Sprintf("Saved %d messages from %d chats", len(trove.Messages), len(trove.Rooms))) +} diff --git a/pkg/scraper/api_types_dms.go b/pkg/scraper/api_types_dms.go index 2d13b37..ccf30db 100644 --- a/pkg/scraper/api_types_dms.go +++ b/pkg/scraper/api_types_dms.go @@ -67,8 +67,9 @@ type APIInbox struct { } type APIDMResponse struct { - InboxInitialState APIInbox `json:"inbox_initial_state"` - InboxTimeline APIInbox `json:"inbox_timeline"` + InboxInitialState APIInbox `json:"inbox_initial_state"` + InboxTimeline APIInbox `json:"inbox_timeline"` + ConversationTimeline APIInbox `json:"conversation_timeline"` } func (r APIInbox) ToDMTrove() DMTrove { @@ -212,3 +213,61 @@ func (api *API) GetInboxTrusted(oldest_id int) (APIInbox, error) { err = api.do_http(url.String(), "", &result) return result.InboxTimeline, err } + +func (api *API) GetDMConversation(id DMChatRoomID, max_id DMMessageID) (APIInbox, error) { + url, err := url.Parse("https://twitter.com/i/api/1.1/dm/conversation/" + string(id) + ".json") + if err != nil { + panic(err) + } + query := url.Query() + query.Add("max_id", fmt.Sprint(max_id)) + query.Add("context", "FETCH_DM_CONVERSATION_HISTORY") + query.Add("include_profile_interstitial_type", "1") + query.Add("include_blocking", "1") + query.Add("include_blocked_by", "1") + query.Add("include_followed_by", "1") + query.Add("include_want_retweets", "1") + query.Add("include_mute_edge", "1") + query.Add("include_can_dm", "1") + query.Add("include_can_media_tag", "1") + query.Add("include_ext_has_nft_avatar", "1") + query.Add("include_ext_is_blue_verified", "1") + query.Add("include_ext_verified_type", "1") + query.Add("include_ext_profile_image_shape", "1") + query.Add("skip_status", "1") + query.Add("dm_secret_conversations_enabled", "false") + query.Add("krs_registration_enabled", "true") + query.Add("cards_platform", "Web-12") + query.Add("include_cards", "1") + query.Add("include_ext_alt_text", "true") + query.Add("include_ext_limited_action_results", "true") + query.Add("include_quote_count", "true") + query.Add("include_reply_count", "1") + query.Add("tweet_mode", "extended") + query.Add("include_ext_views", "true") + query.Add("dm_users", "false") + query.Add("include_groups", "true") + query.Add("include_inbox_timelines", "true") + query.Add("include_ext_media_color", "true") + query.Add("supports_reactions", "true") + query.Add("include_conversation_info", "true") + query.Add("ext", strings.Join([]string{ + "mediaColor", + "altText", + "mediaStats", + "highlightedLabel", + "hasNftAvatar", + "voiceInfo", + "birdwatchPivot", + "enrichments", + "superFollowMetadata", + "unmentionInfo", + "editControl", + "vibe", + }, ",")) + url.RawQuery = query.Encode() + + var result APIDMResponse + err = api.do_http(url.String(), "", &result) + return result.ConversationTimeline, err +} diff --git a/pkg/scraper/api_types_dms_test.go b/pkg/scraper/api_types_dms_test.go index 77eae48..96a3fc2 100644 --- a/pkg/scraper/api_types_dms_test.go +++ b/pkg/scraper/api_types_dms_test.go @@ -116,3 +116,37 @@ func TestParseInbox(t *testing.T) { assert.True(is_ok) assert.Equal(room.ID, room_id) } + +func TestParseDMRoomResponse(t *testing.T) { + assert := assert.New(t) + data, err := os.ReadFile("test_responses/dms/dm_conversation_response.json") + require.NoError(t, err) + + var inbox APIDMResponse + err = json.Unmarshal(data, &inbox) + require.NoError(t, err) + + trove := inbox.ConversationTimeline.ToDMTrove() + + for _, id := range []DMMessageID{ + 1663623062195957773, + 1663623203644751885, + 1665922180176044037, + 1665936253483614212, + 1726009944393372005, + } { + m, is_ok := trove.Messages[id] + assert.True(is_ok, "Message with ID %d not in the trove!") + assert.Equal(m.ID, id) + } + for _, id := range []UserID{1458284524761075714, 1488963321701171204} { + u, is_ok := trove.TweetTrove.Users[id] + assert.True(is_ok, "User with ID %d not in the trove!") + assert.Equal(u.ID, id) + } + room_id := DMChatRoomID("1458284524761075714-1488963321701171204") + room, is_ok := trove.Rooms[room_id] + assert.True(is_ok) + assert.Equal(room.ID, room_id) + assert.Equal(trove.GetOldestMessage(room_id), DMMessageID(1663623062195957773)) +} diff --git a/pkg/scraper/dm_trove.go b/pkg/scraper/dm_trove.go index b6a1562..e9b1845 100644 --- a/pkg/scraper/dm_trove.go +++ b/pkg/scraper/dm_trove.go @@ -28,6 +28,16 @@ func (t1 *DMTrove) MergeWith(t2 DMTrove) { t1.TweetTrove.MergeWith(t2.TweetTrove) } +func (t DMTrove) GetOldestMessage(id DMChatRoomID) DMMessageID { + oldest := DMMessageID(^uint(0) >> 1) // Max integer + for _, m := range t.Messages { + if m.ID < oldest && m.DMChatRoomID == id { + oldest = m.ID + } + } + return oldest +} + // Returns a DMTrove and the cursor for the next update func GetInbox(how_many int) (DMTrove, string) { if !the_api.IsAuthenticated { @@ -53,3 +63,27 @@ func GetInbox(how_many int) (DMTrove, string) { return trove, cursor } + +func GetConversation(id DMChatRoomID, max_id DMMessageID, how_many int) DMTrove { + if !the_api.IsAuthenticated { + log.Fatalf("Fetching DMs can only be done when authenticated. Please provide `--session [user]`") + } + dm_response, err := the_api.GetDMConversation(id, max_id) + if err != nil { + panic(err) + } + + trove := dm_response.ToDMTrove() + oldest := trove.GetOldestMessage(id) + for len(trove.Messages) < how_many && dm_response.Status != "AT_END" { + dm_response, err = the_api.GetDMConversation(id, oldest) + if err != nil { + panic(err) + } + next_trove := dm_response.ToDMTrove() + oldest = next_trove.GetOldestMessage(id) + trove.MergeWith(next_trove) + } + + return trove +} diff --git a/pkg/scraper/test_responses/dms/dm_conversation_response.json b/pkg/scraper/test_responses/dms/dm_conversation_response.json new file mode 100644 index 0000000..f16d38e --- /dev/null +++ b/pkg/scraper/test_responses/dms/dm_conversation_response.json @@ -0,0 +1 @@ +{"conversation_timeline":{"status":"AT_END","min_entry_id":"1663623062195957773","max_entry_id":"1726009944393372005","entries":[{"message":{"id":"1726009944393372005","time":"1700347813147","request_id":"d545b130-8664-11ee-a1bb-51758e4ac1af","conversation_id":"1458284524761075714-1488963321701171204","message_data":{"id":"1726009944393372005","time":"1700347813000","recipient_id":"1458284524761075714","sender_id":"1488963321701171204","text":"Bruh"}}},{"message":{"id":"1665936253483614212","time":"1686025129132","request_id":"3c761eb0-0421-11ee-a411-33a998da3888","conversation_id":"1458284524761075714-1488963321701171204","message_data":{"id":"1665936253483614212","time":"1686025129000","recipient_id":"1488963321701171204","sender_id":"1458284524761075714","text":"Check this out\nhttps:\/\/t.co\/rHeWGgNIZ1","entities":{"hashtags":[],"symbols":[],"user_mentions":[],"urls":[{"url":"https:\/\/t.co\/rHeWGgNIZ1","expanded_url":"https:\/\/twitter.com\/wispem_wantex\/status\/1665509126737129472","display_url":"twitter.com\/wispem_wantex\/\u2026","indices":[15,38]}]},"attachment":{"tweet":{"id":"1665509126737129472","url":"https:\/\/t.co\/rHeWGgNIZ1","display_url":"twitter.com\/wispem_wantex\/\u2026","expanded_url":"https:\/\/twitter.com\/wispem_wantex\/status\/1665509126737129472","indices":[15,38],"status":{"created_at":"Mon Jun 05 00:01:34 +0000 2023","id":1665509126737129472,"id_str":"1665509126737129472","full_text":"Btw, to the extent this has happened, it's partly thanks to the Golden One (@TheGloriousLion) who invented #fizeekfriday and the \"post physique\" rejoinder. Everyone should follow him if they don't already.\n\nSince I forgot last week, and since it's topical, here's a leg poast https:\/\/t.co\/4rmddQ6yrp","truncated":false,"display_text_range":[0,276],"entities":{"hashtags":[{"text":"fizeekfriday","indices":[107,120]}],"symbols":[],"user_mentions":[{"screen_name":"TheGloriousLion","name":"The Golden One","id":1900293397,"id_str":"1900293397","indices":[76,92]}],"urls":[],"media":[{"id":1665509124572946432,"id_str":"1665509124572946432","indices":[277,300],"media_url":"http:\/\/pbs.twimg.com\/media\/Fx0TuXxXoAAJI4f.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/Fx0TuXxXoAAJI4f.jpg","url":"https:\/\/t.co\/4rmddQ6yrp","display_url":"pic.twitter.com\/4rmddQ6yrp","expanded_url":"https:\/\/twitter.com\/wispem_wantex\/status\/1665509126737129472\/photo\/1","type":"photo","original_info":{"width":1455,"height":2048,"focus_rects":[{"x":0,"y":258,"h":815,"w":1455},{"x":0,"y":0,"h":1455,"w":1455},{"x":0,"y":0,"h":1659,"w":1455},{"x":51,"y":0,"h":2048,"w":1024},{"x":0,"y":0,"h":2048,"w":1455}]},"sizes":{"medium":{"w":853,"h":1200,"resize":"fit"},"thumb":{"w":150,"h":150,"resize":"crop"},"small":{"w":483,"h":680,"resize":"fit"},"large":{"w":1455,"h":2048,"resize":"fit"}},"features":{"medium":{"faces":[]},"small":{"faces":[]},"orig":{"faces":[]},"large":{"faces":[]}}}]},"extended_entities":{"media":[{"id":1665509124572946432,"id_str":"1665509124572946432","indices":[277,300],"media_url":"http:\/\/pbs.twimg.com\/media\/Fx0TuXxXoAAJI4f.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/Fx0TuXxXoAAJI4f.jpg","url":"https:\/\/t.co\/4rmddQ6yrp","display_url":"pic.twitter.com\/4rmddQ6yrp","expanded_url":"https:\/\/twitter.com\/wispem_wantex\/status\/1665509126737129472\/photo\/1","type":"photo","original_info":{"width":1455,"height":2048,"focus_rects":[{"x":0,"y":258,"h":815,"w":1455},{"x":0,"y":0,"h":1455,"w":1455},{"x":0,"y":0,"h":1659,"w":1455},{"x":51,"y":0,"h":2048,"w":1024},{"x":0,"y":0,"h":2048,"w":1455}]},"sizes":{"medium":{"w":853,"h":1200,"resize":"fit"},"thumb":{"w":150,"h":150,"resize":"crop"},"small":{"w":483,"h":680,"resize":"fit"},"large":{"w":1455,"h":2048,"resize":"fit"}},"features":{"medium":{"faces":[]},"small":{"faces":[]},"orig":{"faces":[]},"large":{"faces":[]}},"media_key":"3_1665509124572946432","ext_media_color":{"palette":[{"rgb":{"red":137,"green":134,"blue":126},"percentage":48.62},{"rgb":{"red":68,"green":62,"blue":57},"percentage":35.45},{"rgb":{"red":179,"green":206,"blue":216},"percentage":4.38},{"rgb":{"red":191,"green":163,"blue":134},"percentage":4.05},{"rgb":{"red":76,"green":114,"blue":133},"percentage":4.0}]},"ext_alt_text":null,"ext":{"mediaStats":{"r":"Missing","ttl":-1},"mediaColor":{"r":{"ok":{"palette":[{"rgb":{"red":137,"green":134,"blue":126},"percentage":48.62},{"rgb":{"red":68,"green":62,"blue":57},"percentage":35.45},{"rgb":{"red":179,"green":206,"blue":216},"percentage":4.38},{"rgb":{"red":191,"green":163,"blue":134},"percentage":4.05},{"rgb":{"red":76,"green":114,"blue":133},"percentage":4.0}]}},"ttl":-1},"altText":{"r":"Missing","ttl":-1}}}]},"source":"\u003ca href=\"http:\/\/twitter.com\/download\/android\" rel=\"nofollow\"\u003eTwitter for Android\u003c\/a\u003e","in_reply_to_status_id":1665505986184900611,"in_reply_to_status_id_str":"1665505986184900611","in_reply_to_user_id":1458284524761075714,"in_reply_to_user_id_str":"1458284524761075714","in_reply_to_screen_name":"wispem_wantex","user":{"id":1458284524761075714,"id_str":"1458284524761075714","name":"wispem-wantex","screen_name":"wispem_wantex","location":"on my computer","description":"~wispem-wantex\n\nCurrently looking for work (DMs open)","url":"https:\/\/t.co\/7nDTwkz7Hh","entities":{"url":{"urls":[{"url":"https:\/\/t.co\/7nDTwkz7Hh","expanded_url":"https:\/\/offline-twitter.com\/","display_url":"offline-twitter.com","indices":[0,23]}]},"description":{"urls":[]}},"protected":false,"followers_count":495,"fast_followers_count":0,"normal_followers_count":495,"friends_count":144,"listed_count":11,"created_at":"Wed Nov 10 04:05:16 +0000 2021","favourites_count":17655,"utc_offset":null,"time_zone":null,"geo_enabled":false,"verified":false,"statuses_count":6619,"media_count":418,"lang":null,"contributors_enabled":false,"is_translator":false,"is_translation_enabled":false,"profile_background_color":"F5F8FA","profile_background_image_url":null,"profile_background_image_url_https":null,"profile_background_tile":false,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/1724933823144620032\/sYTzWQy2_normal.jpg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/1724933823144620032\/sYTzWQy2_normal.jpg","profile_link_color":"1DA1F2","profile_sidebar_border_color":"C0DEED","profile_sidebar_fill_color":"DDEEF6","profile_text_color":"333333","profile_use_background_image":true,"default_profile":true,"default_profile_image":false,"pinned_tweet_ids":[1723014479200547105],"pinned_tweet_ids_str":["1723014479200547105"],"has_custom_timelines":false,"can_dm":null,"can_media_tag":false,"following":true,"follow_request_sent":false,"notifications":false,"muting":null,"blocking":false,"blocked_by":false,"want_retweets":true,"advertiser_account_type":"none","advertiser_account_service_levels":[],"business_profile_state":"none","translator_type":"none","withheld_in_countries":[],"followed_by":false,"ext_has_nft_avatar":false,"ext":{"superFollowMetadata":{"r":{"ok":{"superFollowEligible":false,"superFollowing":false,"superFollowedBy":false,"exclusiveTweetFollowing":false,"privateSuperFollowing":false}},"ttl":-1},"hasNftAvatar":{"r":{"ok":false},"ttl":-1},"highlightedLabel":{"r":{"ok":{}},"ttl":-1}},"require_some_consent":false},"geo":null,"coordinates":null,"place":null,"contributors":null,"is_quote_status":false,"retweet_count":0,"favorite_count":7,"reply_count":0,"quote_count":0,"favorited":false,"retweeted":false,"possibly_sensitive":false,"possibly_sensitive_editable":true,"lang":"en","supplemental_language":null,"self_thread":{"id":1665505972415000577,"id_str":"1665505972415000577"},"ext":{"superFollowMetadata":{"r":{"ok":{}},"ttl":-1}}}}}},"message_reactions":[{"id":"1726010147888316416","time":"1700347861643","conversation_id":"1458284524761075714-1488963321701171204","message_id":"1665936253483614212","reaction_key":"emoji","emoji_reaction":"\ud83c\udf41","sender_id":"1488963321701171204"}]}},{"message":{"id":"1665922180176044037","time":"1686021773787","request_id":"1686021775772","conversation_id":"1458284524761075714-1488963321701171204","message_data":{"id":"1665922180176044037","time":"1686021773000","recipient_id":"1488963321701171204","sender_id":"1458284524761075714","text":"Yes?","reply_data":{"id":"1663623062195957773","time":"1685473621000","recipient_id":"1458284524761075714","sender_id":"1488963321701171204","text":"Yes helo"}}}},{"message":{"id":"1663623203644751885","time":"1685473655064","request_id":"3c49d700-ff1d-11ed-8bb9-6b9772347ff6","conversation_id":"1458284524761075714-1488963321701171204","message_data":{"id":"1663623203644751885","time":"1685473655000","recipient_id":"1488963321701171204","sender_id":"1458284524761075714","text":"Yeah i know who you are lol"}}},{"message":{"id":"1663623062195957773","time":"1685473621419","request_id":"28239b30-ff1d-11ed-a0c0-670a6eac0c5e","conversation_id":"1458284524761075714-1488963321701171204","message_data":{"id":"1663623062195957773","time":"1685473621000","recipient_id":"1458284524761075714","sender_id":"1488963321701171204","text":"Yes helo"},"message_reactions":[{"id":"1665914315742781440","time":"1686019898732","conversation_id":"1458284524761075714-1488963321701171204","message_id":"1663623062195957773","reaction_key":"funny","emoji_reaction":"\ud83d\ude02","sender_id":"1458284524761075714"}]}}],"users":{"1488963321701171204":{"id":1488963321701171204,"id_str":"1488963321701171204","name":"Offline Twatter","screen_name":"Offline_Twatter","location":null,"description":"Offline Twitter is an open source twitter client and tweet-archiving app all in one. Try it out!\n\nSource code: https:\/\/t.co\/2PMumKSxFO","url":"https:\/\/t.co\/ZOW47ob1CE","entities":{"url":{"urls":[{"url":"https:\/\/t.co\/ZOW47ob1CE","expanded_url":"https:\/\/offline-twitter.com","display_url":"offline-twitter.com","indices":[0,23]}]},"description":{"urls":[{"url":"https:\/\/t.co\/2PMumKSxFO","expanded_url":"https:\/\/gitlab.com\/offline-twitter","display_url":"gitlab.com\/offline-twitter","indices":[112,135]}]}},"protected":false,"followers_count":4,"friends_count":4,"listed_count":0,"created_at":"Wed Feb 02 19:52:02 +0000 2022","favourites_count":5,"utc_offset":null,"time_zone":null,"geo_enabled":false,"verified":false,"statuses_count":5,"lang":null,"contributors_enabled":false,"is_translator":false,"is_translation_enabled":false,"profile_background_color":"F5F8FA","profile_background_image_url":null,"profile_background_image_url_https":null,"profile_background_tile":false,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/1507883049853210626\/TytFbk_3_normal.jpg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/1507883049853210626\/TytFbk_3_normal.jpg","profile_link_color":"1DA1F2","profile_sidebar_border_color":"C0DEED","profile_sidebar_fill_color":"DDEEF6","profile_text_color":"333333","profile_use_background_image":true,"default_profile":true,"default_profile_image":false,"can_dm":null,"can_secret_dm":null,"can_media_tag":true,"following":false,"follow_request_sent":false,"notifications":false,"blocking":false,"subscribed_by":false,"blocked_by":false,"want_retweets":false,"business_profile_state":"none","translator_type":"none","withheld_in_countries":[],"followed_by":false},"1458284524761075714":{"id":1458284524761075714,"id_str":"1458284524761075714","name":"wispem-wantex","screen_name":"wispem_wantex","location":"on my computer","description":"~wispem-wantex\n\nCurrently looking for work (DMs open)","url":"https:\/\/t.co\/7nDTwkz7Hh","entities":{"url":{"urls":[{"url":"https:\/\/t.co\/7nDTwkz7Hh","expanded_url":"https:\/\/offline-twitter.com\/","display_url":"offline-twitter.com","indices":[0,23]}]},"description":{"urls":[]}},"protected":false,"followers_count":495,"friends_count":144,"listed_count":11,"created_at":"Wed Nov 10 04:05:16 +0000 2021","favourites_count":17655,"utc_offset":null,"time_zone":null,"geo_enabled":false,"verified":false,"statuses_count":6619,"lang":null,"contributors_enabled":false,"is_translator":false,"is_translation_enabled":false,"profile_background_color":"F5F8FA","profile_background_image_url":null,"profile_background_image_url_https":null,"profile_background_tile":false,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/1724933823144620032\/sYTzWQy2_normal.jpg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/1724933823144620032\/sYTzWQy2_normal.jpg","profile_link_color":"1DA1F2","profile_sidebar_border_color":"C0DEED","profile_sidebar_fill_color":"DDEEF6","profile_text_color":"333333","profile_use_background_image":true,"default_profile":true,"default_profile_image":false,"can_dm":null,"can_secret_dm":null,"can_media_tag":false,"following":true,"follow_request_sent":false,"notifications":false,"blocking":false,"subscribed_by":false,"blocked_by":false,"want_retweets":true,"business_profile_state":"none","translator_type":"none","withheld_in_countries":[],"followed_by":false}},"conversations":{"1458284524761075714-1488963321701171204":{"conversation_id":"1458284524761075714-1488963321701171204","type":"ONE_TO_ONE","sort_event_id":"1726010147888316416","sort_timestamp":"1700347861631","participants":[{"user_id":"1458284524761075714","last_read_event_id":"1726010147888316416"},{"user_id":"1488963321701171204","last_read_event_id":"1726010147888316416"}],"nsfw":false,"notifications_disabled":false,"mention_notifications_disabled":false,"last_read_event_id":"1726010147888316416","read_only":false,"trusted":true,"low_quality":false,"muted":false}}}}