From 369616aeec344559661bd522b7107a81d2adf971 Mon Sep 17 00:00:00 2001 From: Alessio Date: Thu, 22 Jun 2023 19:42:16 -0300 Subject: [PATCH] Implement parsing of whole inbox state --- pkg/scraper/api_types_dms.go | 21 ++++++++++++++++++ pkg/scraper/api_types_dms_test.go | 32 ++++++++++++++++++++++++--- scraper/dm_trove.go | 25 +++++++++++++++++++++ scraper/test_responses/dms/inbox.json | 1 + 4 files changed, 76 insertions(+), 3 deletions(-) create mode 100644 scraper/dm_trove.go create mode 100644 scraper/test_responses/dms/inbox.json diff --git a/pkg/scraper/api_types_dms.go b/pkg/scraper/api_types_dms.go index c9920ef..cc9c048 100644 --- a/pkg/scraper/api_types_dms.go +++ b/pkg/scraper/api_types_dms.go @@ -55,3 +55,24 @@ type APIInbox struct { type APIDMResponse struct { InboxInitialState APIInbox `json:"inbox_initial_state"` } + +func (r APIDMResponse) ToDMTrove() DMTrove { + ret := NewDMTrove() + for _, entry := range r.InboxInitialState.Entries { + result := ParseAPIDMMessage(entry.Message) + ret.Messages[result.ID] = result + // TODO: parse Tweet attachments + } + for _, room := range r.InboxInitialState.Conversations { + result := ParseAPIDMChatRoom(room) + ret.Rooms[result.ID] = result + } + for _, u := range r.InboxInitialState.Users { + result, err := ParseSingleUser(u) + if err != nil { + panic(err) + } + ret.TweetTrove.Users[result.ID] = result + } + return ret +} diff --git a/pkg/scraper/api_types_dms_test.go b/pkg/scraper/api_types_dms_test.go index 4c568b4..d424349 100644 --- a/pkg/scraper/api_types_dms_test.go +++ b/pkg/scraper/api_types_dms_test.go @@ -57,9 +57,8 @@ func TestParseAPIDMMessageWithReaction(t *testing.T) { func TestParseAPIDMConversation(t *testing.T) { assert := assert.New(t) data, err := os.ReadFile("test_responses/dms/dm_chat_room.json") - if err != nil { - panic(err) - } + require.NoError(t, err) + var api_room APIDMConversation err = json.Unmarshal(data, &api_room) require.NoError(t, err) @@ -90,3 +89,30 @@ func TestParseAPIDMConversation(t *testing.T) { assert.Equal(DMMessageID(1663623062195957773), p2.LastReadEventID) assert.False(p2.IsChatSettingsValid) } + +func TestParseInbox(t *testing.T) { + assert := assert.New(t) + data, err := os.ReadFile("test_responses/dms/inbox.json") + require.NoError(t, err) + + var inbox APIDMResponse + err = json.Unmarshal(data, &inbox) + require.NoError(t, err) + + trove := inbox.ToDMTrove() + + for _, id := range []DMMessageID{1663623062195957773, 1663623203644751885, 1665922180176044037, 1665936253483614212} { + m, is_ok := trove.Messages[id] + assert.True(is_ok, "Message with ID %d not in the trove!") + assert.Equal(m.ID, id) + } + for _, id := range []UserID{1458284524761075714, 1488963321701171204} { + u, is_ok := trove.TweetTrove.Users[id] + assert.True(is_ok, "User with ID %d not in the trove!") + assert.Equal(u.ID, id) + } + room_id := DMChatRoomID("1458284524761075714-1488963321701171204") + room, is_ok := trove.Rooms[room_id] + assert.True(is_ok) + assert.Equal(room.ID, room_id) +} diff --git a/scraper/dm_trove.go b/scraper/dm_trove.go new file mode 100644 index 0000000..7bbe866 --- /dev/null +++ b/scraper/dm_trove.go @@ -0,0 +1,25 @@ +package scraper + +type DMTrove struct { + Rooms map[DMChatRoomID]DMChatRoom + Messages map[DMMessageID]DMMessage + TweetTrove TweetTrove +} + +func NewDMTrove() DMTrove { + ret := DMTrove{} + ret.Rooms = make(map[DMChatRoomID]DMChatRoom) + ret.Messages = make(map[DMMessageID]DMMessage) + ret.TweetTrove = NewTweetTrove() + return ret +} + +func (t1 *DMTrove) MergeWith(t2 DMTrove) { + for id, val := range t2.Rooms { + t1.Rooms[id] = val + } + for id, val := range t2.Messages { + t1.Messages[id] = val + } + t1.TweetTrove.MergeWith(t2.TweetTrove) +} diff --git a/scraper/test_responses/dms/inbox.json b/scraper/test_responses/dms/inbox.json new file mode 100644 index 0000000..5aadfdf --- /dev/null +++ b/scraper/test_responses/dms/inbox.json @@ -0,0 +1 @@ +{"inbox_initial_state":{"last_seen_event_id":"1663623203644751885","trusted_last_seen_event_id":"1663623203644751885","untrusted_last_seen_event_id":"0","cursor":"GRwmgICxgc2Pwp4uFoiAsPXFjMyeLiUCAAA","inbox_timelines":{"trusted":{"status":"AT_END","min_entry_id":"1665936253483614212"},"untrusted":{"status":"AT_END"},"untrusted_low_quality":{"status":"AT_END"}},"entries":[{"message":{"id":"1663623062195957773","time":"1685473621419","affects_sort":true,"request_id":"28239b30-ff1d-11ed-a0c0-670a6eac0c5e","conversation_id":"1458284524761075714-1488963321701171204","message_data":{"id":"1663623062195957773","time":"1685473621000","recipient_id":"1458284524761075714","sender_id":"1488963321701171204","text":"Yes helo"},"message_reactions":[{"id":"1665914315742781440","time":"1686019898732","conversation_id":"1458284524761075714-1488963321701171204","message_id":"1663623062195957773","reaction_key":"funny","emoji_reaction":"😂","sender_id":"1458284524761075714"}]}},{"message":{"id":"1663623203644751885","time":"1685473655064","affects_sort":true,"request_id":"3c49d700-ff1d-11ed-8bb9-6b9772347ff6","conversation_id":"1458284524761075714-1488963321701171204","message_data":{"id":"1663623203644751885","time":"1685473655000","recipient_id":"1488963321701171204","sender_id":"1458284524761075714","text":"Yeah i know who you are lol"}}},{"message":{"id":"1665922180176044037","time":"1686021773787","affects_sort":true,"request_id":"1686021775772","conversation_id":"1458284524761075714-1488963321701171204","message_data":{"id":"1665922180176044037","time":"1686021773000","recipient_id":"1488963321701171204","sender_id":"1458284524761075714","text":"Yes?","reply_data":{"id":"1663623062195957773","time":"1685473621000","recipient_id":"1458284524761075714","sender_id":"1488963321701171204","text":"Yes helo"}}}},{"message":{"id":"1665936253483614212","time":"1686025129132","affects_sort":true,"request_id":"3c761eb0-0421-11ee-a411-33a998da3888","conversation_id":"1458284524761075714-1488963321701171204","message_data":{"id":"1665936253483614212","time":"1686025129000","recipient_id":"1488963321701171204","sender_id":"1458284524761075714","text":"Check this out\nhttps://t.co/rHeWGgNIZ1","entities":{"hashtags":[],"symbols":[],"user_mentions":[],"urls":[{"url":"https://t.co/rHeWGgNIZ1","expanded_url":"https://twitter.com/wispem_wantex/status/1665509126737129472","display_url":"twitter.com/wispem_wantex/…","indices":[15,38]}]},"attachment":{"tweet":{"id":"1665509126737129472","url":"https://t.co/rHeWGgNIZ1","display_url":"twitter.com/wispem_wantex/…","expanded_url":"https://twitter.com/wispem_wantex/status/1665509126737129472","indices":[15,38],"status":{"created_at":"Mon Jun 05 00:01:34 +0000 2023","id":1665509126737129500,"id_str":"1665509126737129472","full_text":"Btw, to the extent this has happened, it's partly thanks to the Golden One (@TheGloriousLion) who invented #fizeekfriday and the \"post physique\" rejoinder. Everyone should follow him if they don't already.\n\nSince I forgot last week, and since it's topical, here's a leg poast https://t.co/4rmddQ6yrp","truncated":false,"display_text_range":[0,276],"entities":{"hashtags":[{"text":"fizeekfriday","indices":[107,120]}],"symbols":[],"user_mentions":[{"screen_name":"TheGloriousLion","name":"The Golden One","id":1900293397,"id_str":"1900293397","indices":[76,92]}],"urls":[],"media":[{"id":1665509124572946400,"id_str":"1665509124572946432","indices":[277,300],"media_url":"http://pbs.twimg.com/media/Fx0TuXxXoAAJI4f.jpg","media_url_https":"https://pbs.twimg.com/media/Fx0TuXxXoAAJI4f.jpg","url":"https://t.co/4rmddQ6yrp","display_url":"pic.twitter.com/4rmddQ6yrp","expanded_url":"https://twitter.com/wispem_wantex/status/1665509126737129472/photo/1","type":"photo","original_info":{"width":1455,"height":2048,"focus_rects":[{"x":0,"y":258,"h":815,"w":1455},{"x":0,"y":0,"h":1455,"w":1455},{"x":0,"y":0,"h":1659,"w":1455},{"x":51,"y":0,"h":2048,"w":1024},{"x":0,"y":0,"h":2048,"w":1455}]},"sizes":{"medium":{"w":853,"h":1200,"resize":"fit"},"thumb":{"w":150,"h":150,"resize":"crop"},"small":{"w":483,"h":680,"resize":"fit"},"large":{"w":1455,"h":2048,"resize":"fit"}},"features":{"medium":{"faces":[]},"small":{"faces":[]},"orig":{"faces":[]},"large":{"faces":[]}}}]},"extended_entities":{"media":[{"id":1665509124572946400,"id_str":"1665509124572946432","indices":[277,300],"media_url":"http://pbs.twimg.com/media/Fx0TuXxXoAAJI4f.jpg","media_url_https":"https://pbs.twimg.com/media/Fx0TuXxXoAAJI4f.jpg","url":"https://t.co/4rmddQ6yrp","display_url":"pic.twitter.com/4rmddQ6yrp","expanded_url":"https://twitter.com/wispem_wantex/status/1665509126737129472/photo/1","type":"photo","original_info":{"width":1455,"height":2048,"focus_rects":[{"x":0,"y":258,"h":815,"w":1455},{"x":0,"y":0,"h":1455,"w":1455},{"x":0,"y":0,"h":1659,"w":1455},{"x":51,"y":0,"h":2048,"w":1024},{"x":0,"y":0,"h":2048,"w":1455}]},"sizes":{"medium":{"w":853,"h":1200,"resize":"fit"},"thumb":{"w":150,"h":150,"resize":"crop"},"small":{"w":483,"h":680,"resize":"fit"},"large":{"w":1455,"h":2048,"resize":"fit"}},"features":{"medium":{"faces":[]},"small":{"faces":[]},"orig":{"faces":[]},"large":{"faces":[]}},"media_key":"3_1665509124572946432","ext_media_color":{"palette":[{"rgb":{"red":137,"green":134,"blue":126},"percentage":48.62},{"rgb":{"red":68,"green":62,"blue":57},"percentage":35.45},{"rgb":{"red":179,"green":206,"blue":216},"percentage":4.38},{"rgb":{"red":191,"green":163,"blue":134},"percentage":4.05},{"rgb":{"red":76,"green":114,"blue":133},"percentage":4}]},"ext_alt_text":null,"ext":{"mediaStats":{"r":"Missing","ttl":-1},"mediaColor":{"r":{"ok":{"palette":[{"rgb":{"red":137,"green":134,"blue":126},"percentage":48.62},{"rgb":{"red":68,"green":62,"blue":57},"percentage":35.45},{"rgb":{"red":179,"green":206,"blue":216},"percentage":4.38},{"rgb":{"red":191,"green":163,"blue":134},"percentage":4.05},{"rgb":{"red":76,"green":114,"blue":133},"percentage":4}]}},"ttl":-1},"altText":{"r":"Missing","ttl":-1}}}]},"source":"Twitter for Android","in_reply_to_status_id":1665505986184900600,"in_reply_to_status_id_str":"1665505986184900611","in_reply_to_user_id":1458284524761075700,"in_reply_to_user_id_str":"1458284524761075714","in_reply_to_screen_name":"wispem_wantex","user":{"id":1458284524761075700,"id_str":"1458284524761075714","name":"wispem-wantex","screen_name":"wispem_wantex","location":null,"description":"~wispem-wantex\n\nwispem-wantex.n1gu0@slmail.me 4 https://t.co/A0AYijhuZL","url":"https://t.co/7nDTwkyzRJ","entities":{"url":{"urls":[{"url":"https://t.co/7nDTwkyzRJ","expanded_url":"https://offline-twitter.com/","display_url":"offline-twitter.com","indices":[0,23]}]},"description":{"urls":[{"url":"https://t.co/A0AYijhuZL","expanded_url":"http://archive.org","display_url":"archive.org","indices":[48,71]}]}},"protected":false,"followers_count":367,"fast_followers_count":0,"normal_followers_count":367,"friends_count":119,"listed_count":4,"created_at":"Wed Nov 10 04:05:16 +0000 2021","favourites_count":10858,"utc_offset":null,"time_zone":null,"geo_enabled":false,"verified":false,"statuses_count":4505,"media_count":308,"lang":null,"contributors_enabled":false,"is_translator":false,"is_translation_enabled":false,"profile_background_color":"F5F8FA","profile_background_image_url":null,"profile_background_image_url_https":null,"profile_background_tile":false,"profile_image_url":"http://pbs.twimg.com/profile_images/1462880679687954433/dXJN4Bo4_normal.jpg","profile_image_url_https":"https://pbs.twimg.com/profile_images/1462880679687954433/dXJN4Bo4_normal.jpg","profile_link_color":"1DA1F2","profile_sidebar_border_color":"C0DEED","profile_sidebar_fill_color":"DDEEF6","profile_text_color":"333333","profile_use_background_image":true,"default_profile":true,"default_profile_image":false,"pinned_tweet_ids":[1652383573909840000],"pinned_tweet_ids_str":["1652383573909839873"],"has_custom_timelines":false,"can_dm":null,"can_media_tag":false,"following":true,"follow_request_sent":false,"notifications":false,"muting":null,"blocking":false,"blocked_by":false,"want_retweets":true,"advertiser_account_type":"none","advertiser_account_service_levels":[],"business_profile_state":"none","translator_type":"none","withheld_in_countries":[],"followed_by":false,"ext_has_nft_avatar":false,"ext":{"superFollowMetadata":{"r":{"ok":{"superFollowEligible":false,"superFollowing":false,"superFollowedBy":false,"exclusiveTweetFollowing":false,"privateSuperFollowing":false}},"ttl":-1},"hasNftAvatar":{"r":{"ok":false},"ttl":-1},"highlightedLabel":{"r":{"ok":{}},"ttl":-1}},"require_some_consent":false},"geo":null,"coordinates":null,"place":null,"contributors":null,"is_quote_status":false,"retweet_count":0,"favorite_count":7,"reply_count":0,"quote_count":0,"favorited":false,"retweeted":false,"possibly_sensitive":false,"possibly_sensitive_editable":true,"lang":"en","supplemental_language":null,"self_thread":{"id":1665505972415000600,"id_str":"1665505972415000577"},"ext":{"superFollowMetadata":{"r":{"ok":{}},"ttl":-1}}}}}}}}],"users":{"1458284524761075714":{"id":1458284524761075700,"id_str":"1458284524761075714","name":"wispem-wantex","screen_name":"wispem_wantex","profile_image_url":"http://pbs.twimg.com/profile_images/1462880679687954433/dXJN4Bo4_normal.jpg","profile_image_url_https":"https://pbs.twimg.com/profile_images/1462880679687954433/dXJN4Bo4_normal.jpg","following":true,"follow_request_sent":false,"description":"~wispem-wantex\n\nwispem-wantex.n1gu0@slmail.me 4 https://t.co/A0AYijhuZL","entities":{"url":{"urls":[{"url":"https://t.co/7nDTwkyzRJ","expanded_url":"https://offline-twitter.com/","display_url":"offline-twitter.com","indices":[0,23]}]},"description":{"urls":[{"url":"https://t.co/A0AYijhuZL","expanded_url":"http://archive.org","display_url":"archive.org","indices":[48,71]}]}},"verified":false,"is_blue_verified":false,"protected":false,"blocking":false,"can_media_tag":false,"created_at":"Wed Nov 10 04:05:16 +0000 2021","friends_count":119,"followers_count":367,"ext_has_nft_avatar":false},"1488963321701171204":{"id":1488963321701171200,"id_str":"1488963321701171204","name":"Offline Twatter","screen_name":"Offline_Twatter","profile_image_url":"http://pbs.twimg.com/profile_images/1507883049853210626/TytFbk_3_normal.jpg","profile_image_url_https":"https://pbs.twimg.com/profile_images/1507883049853210626/TytFbk_3_normal.jpg","following":false,"follow_request_sent":false,"description":"Offline Twitter is an open source twitter client and tweet-archiving app all in one. Try it out!\n\nSource code: https://t.co/2PMumKSxFO","entities":{"url":{"urls":[{"url":"https://t.co/ZOW47ob1CE","expanded_url":"https://offline-twitter.com","display_url":"offline-twitter.com","indices":[0,23]}]},"description":{"urls":[{"url":"https://t.co/2PMumKSxFO","expanded_url":"https://gitlab.com/offline-twitter","display_url":"gitlab.com/offline-twitter","indices":[112,135]}]}},"verified":false,"is_blue_verified":false,"protected":false,"blocking":false,"can_media_tag":true,"created_at":"Wed Feb 02 19:52:02 +0000 2022","friends_count":4,"followers_count":2,"ext_has_nft_avatar":false}},"conversations":{"1458284524761075714-1488963321701171204":{"conversation_id":"1458284524761075714-1488963321701171204","type":"ONE_TO_ONE","sort_event_id":"1665936253483614212","sort_timestamp":"1686025129086","participants":[{"user_id":"1458284524761075714","last_read_event_id":"1665936253483614212"},{"user_id":"1488963321701171204","last_read_event_id":"1663623062195957773"}],"nsfw":false,"notifications_disabled":false,"mention_notifications_disabled":false,"last_read_event_id":"1663623062195957773","read_only":false,"trusted":true,"low_quality":false,"muted":false,"status":"AT_END","min_entry_id":"1663623062195957773","max_entry_id":"1665936253483614212"}}}}