From c6d0bbce65c640b057601fda6d1e5882231791bf Mon Sep 17 00:00:00 2001 From: Alessio Date: Sun, 19 Nov 2023 13:51:16 -0800 Subject: [PATCH] Add polling for DM updates while webserver is running --- internal/webserver/stopwatch.go | 49 ++++++++ pkg/scraper/api_types_dms.go | 107 +++++++++++++++++- pkg/scraper/api_types_dms_test.go | 21 ++++ pkg/scraper/dm_trove.go | 13 +++ .../dms/user_updates_simulated.json | 1 + 5 files changed, 186 insertions(+), 5 deletions(-) create mode 100644 pkg/scraper/test_responses/dms/user_updates_simulated.json diff --git a/internal/webserver/stopwatch.go b/internal/webserver/stopwatch.go index 19ca27d..c2a7e8d 100644 --- a/internal/webserver/stopwatch.go +++ b/internal/webserver/stopwatch.go @@ -75,6 +75,42 @@ func (app *Application) background_user_likes_scrape() { fmt.Println("Scraping succeeded.") } +var inbox_cursor string = "" + +func (app *Application) background_dm_polling_scrape() { + // Avoid crashing the thread if a scrape fails + defer func() { + if r := recover(); r != nil { + // TODO + fmt.Println("Background Home Timeline thread: panicked!") + if err, ok := r.(error); ok { + fmt.Println(err.Error()) + } else { + fmt.Println(r) + } + } + }() + + fmt.Println("Starting user DMs scrape...") + + // Do nothing if scraping is currently disabled + if app.IsScrapingDisabled { + fmt.Println("Skipping user DMs scrape!") + return + } + + fmt.Println("Scraping user DMs...") + var trove scraper.DMTrove + if inbox_cursor == "" { + trove, inbox_cursor = scraper.GetInbox(0) + } else { + trove, inbox_cursor = scraper.PollInboxUpdates(inbox_cursor) + } + fmt.Println("Saving DM results...") + app.Profile.SaveDMTrove(trove) + fmt.Println("Scraping DMs succeeded.") +} + func (app *Application) start_background() { fmt.Println("Starting background") @@ -107,4 +143,17 @@ func (app *Application) start_background() { app.background_user_likes_scrape() } }() + + // Scrape inbox DMs every 10 seconds + go func() { + time.Sleep(5 * time.Second) + app.background_dm_polling_scrape() + + interval := 10 * time.Second + timer := time.NewTicker(interval) + defer timer.Stop() + for range timer.C { + app.background_dm_polling_scrape() + } + }() } diff --git a/pkg/scraper/api_types_dms.go b/pkg/scraper/api_types_dms.go index ccf30db..9e5458a 100644 --- a/pkg/scraper/api_types_dms.go +++ b/pkg/scraper/api_types_dms.go @@ -7,10 +7,11 @@ import ( ) type APIDMReaction struct { - ID int `json:"id,string"` - Time int `json:"time,string"` - SenderID int `json:"sender_id,string"` - Emoji string `json:"emoji_reaction"` + ID int `json:"id,string"` + Time int `json:"time,string"` + SenderID int `json:"sender_id,string"` + Emoji string `json:"emoji_reaction"` + MessageID int `json:"message_id,string"` } type APIDMMessage struct { @@ -60,7 +61,37 @@ type APIInbox struct { } `json:"trusted"` } `json:"inbox_timelines"` Entries []struct { - Message APIDMMessage `json:"message"` + Message APIDMMessage `json:"message"` + ReactionCreate APIDMReaction `json:"reaction_create"` + JoinConversation struct { + ID int `json:"id,string"` + ConversationID string `json:"conversation_id"` + SenderID int `json:"sender_id,string"` + Time int `json:"time,string"` + Participants []struct { + UserID int `json:"user_id,string"` + } `json:"participants"` + } `json:"join_conversation"` + TrustConversation struct { + ID int `json:"id,string"` + ConversationID string `json:"conversation_id"` + Reason string `json:"reason"` + Time int `json:"time,string"` + } `json:"trust_conversation"` + ParticipantsLeave struct { + ID int `json:"id,string"` + ConversationID string `json:"conversation_id"` + Time int `json:"time,string"` + Participants []struct { + UserID int `json:"user_id,string"` + } `json:"participants"` + } `json:"participants_leave"` + ConversationRead struct { + ID int `json:"id,string"` + Time int `json:"time,string"` + ConversationID string `json:"conversation_id"` + LastReadEventID int `json:"last_read_event_id,string"` + } `json:"conversation_read"` } `json:"entries"` Users map[string]APIUser `json:"users"` Conversations map[string]APIDMConversation `json:"conversations"` @@ -70,13 +101,30 @@ type APIDMResponse struct { InboxInitialState APIInbox `json:"inbox_initial_state"` InboxTimeline APIInbox `json:"inbox_timeline"` ConversationTimeline APIInbox `json:"conversation_timeline"` + UserEvents APIInbox `json:"user_events"` } func (r APIInbox) ToDMTrove() DMTrove { ret := NewDMTrove() for _, entry := range r.Entries { + if entry.JoinConversation.ID != 0 || entry.TrustConversation.ID != 0 || + entry.ParticipantsLeave.ID != 0 || entry.ConversationRead.ID != 0 { + // TODO: message invitations + // TODO: people join/leave the chat + // TODO: updating read/unread indicators + continue + } + if entry.ReactionCreate.ID != 0 { + // Convert it into a Message + entry.Message.ID = entry.ReactionCreate.MessageID + entry.Message.MessageReactions = []APIDMReaction{entry.ReactionCreate} + } result := ParseAPIDMMessage(entry.Message) + if _, is_ok := ret.Messages[result.ID]; is_ok { + // No clobbering + panic("Already in the trove: " + fmt.Sprint(result.ID)) + } ret.Messages[result.ID] = result // TODO: parse Tweet attachments } @@ -271,3 +319,52 @@ func (api *API) GetDMConversation(id DMChatRoomID, max_id DMMessageID) (APIInbox err = api.do_http(url.String(), "", &result) return result.ConversationTimeline, err } + +func (api *API) PollInboxUpdates(cursor string) (APIInbox, error) { + url, err := url.Parse("https://twitter.com/i/api/1.1/dm/user_updates.json") + if err != nil { + panic(err) + } + query := url.Query() + query.Add("cursor", cursor) + query.Add("nsfw_filtering_enabled", "false") + query.Add("filter_low_quality", "true") + query.Add("include_quality", "all") + query.Add("dm_secret_conversations_enabled", "false") + query.Add("krs_registration_enabled", "true") + query.Add("cards_platform", "Web-12") + query.Add("include_cards", "1") + query.Add("include_ext_alt_text", "true") + query.Add("include_ext_limited_action_results", "true") + query.Add("include_quote_count", "true") + query.Add("include_reply_count", "1") + query.Add("tweet_mode", "extended") + query.Add("include_ext_views", "true") + query.Add("dm_users", "false") + query.Add("include_groups", "true") + query.Add("include_inbox_timelines", "true") + query.Add("include_ext_media_color", "true") + query.Add("supports_reactions", "true") + query.Add("include_ext_edit_control", "true") + query.Add("include_ext_business_affiliations_label", "true") + query.Add("ext", strings.Join([]string{ + "mediaColor", + "altText", + "businessAffiliationsLabel", + "mediaStats", + "highlightedLabel", + "hasNftAvatar", + "voiceInfo", + "birdwatchPivot", + "enrichments", + "superFollowMetadata", + "unmentionInfo", + "editControl", + "vibe", + }, ",")) + url.RawQuery = query.Encode() + + var result APIDMResponse + err = api.do_http(url.String(), "", &result) + return result.UserEvents, err +} diff --git a/pkg/scraper/api_types_dms_test.go b/pkg/scraper/api_types_dms_test.go index 96a3fc2..99340e5 100644 --- a/pkg/scraper/api_types_dms_test.go +++ b/pkg/scraper/api_types_dms_test.go @@ -150,3 +150,24 @@ func TestParseDMRoomResponse(t *testing.T) { assert.Equal(room.ID, room_id) assert.Equal(trove.GetOldestMessage(room_id), DMMessageID(1663623062195957773)) } + +func TestParseInboxUpdates(t *testing.T) { + assert := assert.New(t) + data, err := os.ReadFile("test_responses/dms/user_updates_simulated.json") + require.NoError(t, err) + + var inbox APIDMResponse + err = json.Unmarshal(data, &inbox) + require.NoError(t, err) + + trove := inbox.UserEvents.ToDMTrove() + + assert.Len(trove.Messages, 2) // Should ignore stuff that isn't a message + + _, is_ok := trove.Messages[1725969457464447135] + assert.True(is_ok) + + message_receiving_a_reacc, is_ok := trove.Messages[1725980964718100721] + assert.True(is_ok) + assert.Len(message_receiving_a_reacc.Reactions, 1) +} diff --git a/pkg/scraper/dm_trove.go b/pkg/scraper/dm_trove.go index e9b1845..bafc88f 100644 --- a/pkg/scraper/dm_trove.go +++ b/pkg/scraper/dm_trove.go @@ -87,3 +87,16 @@ func GetConversation(id DMChatRoomID, max_id DMMessageID, how_many int) DMTrove return trove } + +// Returns a DMTrove and the cursor for the next update +func PollInboxUpdates(cursor string) (DMTrove, string) { + if !the_api.IsAuthenticated { + log.Fatalf("Fetching DMs can only be done when authenticated. Please provide `--session [user]`") + } + dm_response, err := the_api.PollInboxUpdates(cursor) + if err != nil { + panic(err) + } + + return dm_response.ToDMTrove(), dm_response.Cursor +} diff --git a/pkg/scraper/test_responses/dms/user_updates_simulated.json b/pkg/scraper/test_responses/dms/user_updates_simulated.json new file mode 100644 index 0000000..e7681e6 --- /dev/null +++ b/pkg/scraper/test_responses/dms/user_updates_simulated.json @@ -0,0 +1 @@ +{"user_events":{"min_entry_id":"1725969457464447135","max_entry_id":"1725969457464447135","cursor":"GRwmoMfercvZ7_MvFr7CvNX2hPDzLyUAAAA","last_seen_event_id":"1725968712917045712","trusted_last_seen_event_id":"1725968712917045712","untrusted_last_seen_event_id":"1718755749029990839","entries":[{"message":{"id":"1725969457464447135","time":"1700338160320","affects_sort":true,"request_id":"5A324046-2F82-45C2-A3DF-9A2D209523A1","conversation_id":"2694459866-1458284524761075714","message_data":{"id":"1725969457464447135","time":"1700338160000","recipient_id":"1458284524761075714","sender_id":"2694459866","text":"meow"}}},{"conversation_read":{"id":"1726008491389988864","time":"1700347466694","affects_sort":false,"conversation_id":"1458284524761075714-1635579077536145408","last_read_event_id":"1726003702677541072"}},{"reaction_create":{"id":"1725987444326363136","time":"1700342448691","affects_sort":true,"conversation_id":"2694459866-1458284524761075714","message_id":"1725980964718100721","reaction_key":"funny","emoji_reaction":"😂","sender_id":"1458284524761075714"}}]}}