diff --git a/cmd/twitter/main.go b/cmd/twitter/main.go index ae33445..bdfd3d3 100644 --- a/cmd/twitter/main.go +++ b/cmd/twitter/main.go @@ -160,7 +160,7 @@ func main() { case "webserver": start_webserver(*addr) case "fetch_inbox": - fetch_inbox() + fetch_inbox(*how_many) default: die(fmt.Sprintf("Invalid operation: %s", operation), true, 3) } @@ -398,8 +398,8 @@ func start_webserver(addr string) { app.Run(addr) } -func fetch_inbox() { - trove := scraper.GetInbox() +func fetch_inbox(how_many int) { + trove := scraper.GetInbox(how_many) profile.SaveDMTrove(trove) happy_exit(fmt.Sprintf("Saved %d messages from %d chats", len(trove.Messages), len(trove.Rooms))) } diff --git a/pkg/scraper/api_types_dms.go b/pkg/scraper/api_types_dms.go index 6e76e54..b9ff3ee 100644 --- a/pkg/scraper/api_types_dms.go +++ b/pkg/scraper/api_types_dms.go @@ -48,10 +48,18 @@ type APIDMConversation struct { } type APIInbox struct { + Status string `json:"status"` + MinEntryID int `json:"min_entry_id,string"` LastSeenEventID int `json:"last_seen_event_id,string"` Cursor string `json:"cursor"` - Entries []struct { - Message APIDMMessage `json:"message"` + InboxTimelines struct { + Trusted struct { + Status string `json:"status"` + MinEntryID int `json:"min_entry_id,string"` + } `json:"trusted"` + } `json:"inbox_timelines"` + Entries []struct { + Message APIDMMessage `json:"message"` } `json:"entries"` Users map[string]APIUser `json:"users"` Conversations map[string]APIDMConversation `json:"conversations"` @@ -59,20 +67,22 @@ type APIInbox struct { type APIDMResponse struct { InboxInitialState APIInbox `json:"inbox_initial_state"` + InboxTimeline APIInbox `json:"inbox_timeline"` } -func (r APIDMResponse) ToDMTrove() DMTrove { +func (r APIInbox) ToDMTrove() DMTrove { ret := NewDMTrove() - for _, entry := range r.InboxInitialState.Entries { + + for _, entry := range r.Entries { result := ParseAPIDMMessage(entry.Message) ret.Messages[result.ID] = result // TODO: parse Tweet attachments } - for _, room := range r.InboxInitialState.Conversations { + for _, room := range r.Conversations { result := ParseAPIDMChatRoom(room) ret.Rooms[result.ID] = result } - for _, u := range r.InboxInitialState.Users { + for _, u := range r.Users { result, err := ParseSingleUser(u) if err != nil { panic(err) @@ -82,7 +92,7 @@ func (r APIDMResponse) ToDMTrove() DMTrove { return ret } -func (api *API) GetDMInbox() (APIDMResponse, error) { +func (api *API) GetDMInbox() (APIInbox, error) { url, err := url.Parse("https://twitter.com/i/api/1.1/dm/inbox_initial_state.json") if err != nil { panic(err) @@ -138,5 +148,66 @@ func (api *API) GetDMInbox() (APIDMResponse, error) { var result APIDMResponse err = api.do_http(url.String(), "", &result) - return result, err + result.InboxInitialState.Status = result.InboxInitialState.InboxTimelines.Trusted.Status + return result.InboxInitialState, err +} + +func (api *API) GetInboxTrusted(oldest_id int) (APIInbox, error) { + url, err := url.Parse("https://twitter.com/i/api/1.1/dm/inbox_timeline/trusted.json") + if err != nil { + panic(err) + } + query := url.Query() + query.Add("max_id", fmt.Sprint(oldest_id)) + query.Add("nsfw_filtering_enabled", "false") + query.Add("filter_low_quality", "true") + query.Add("include_quality", "all") + query.Add("include_profile_interstitial_type", "1") + query.Add("include_blocking", "1") + query.Add("include_blocked_by", "1") + query.Add("include_followed_by", "1") + query.Add("include_want_retweets", "1") + query.Add("include_mute_edge", "1") + query.Add("include_can_dm", "1") + query.Add("include_can_media_tag", "1") + query.Add("include_ext_has_nft_avatar", "1") + query.Add("include_ext_is_blue_verified", "1") + query.Add("include_ext_verified_type", "1") + query.Add("include_ext_profile_image_shape", "1") + query.Add("skip_status", "1") + query.Add("dm_secret_conversations_enabled", "false") + query.Add("krs_registration_enabled", "true") + query.Add("cards_platform", "Web-12") + query.Add("include_cards", "1") + query.Add("include_ext_alt_text", "true") + query.Add("include_ext_limited_action_results", "false") + query.Add("include_quote_count", "true") + query.Add("include_reply_count", "1") + query.Add("tweet_mode", "extended") + query.Add("include_ext_views", "true") + query.Add("dm_users", "true") + query.Add("include_groups", "true") + query.Add("include_inbox_timelines", "true") + query.Add("include_ext_media_color", "true") + query.Add("supports_reactions", "true") + query.Add("include_ext_edit_control", "true") + query.Add("ext", strings.Join([]string{ + "mediaColor", + "altText", + "mediaStats", + "highlightedLabel", + "hasNftAvatar", + "voiceInfo", + "birdwatchPivot", + "enrichments", + "superFollowMetadata", + "unmentionInfo", + "editControl", + "vibe", + }, ",")) + url.RawQuery = query.Encode() + + var result APIDMResponse + err = api.do_http(url.String(), "", &result) + return result.InboxTimeline, err } diff --git a/pkg/scraper/api_types_dms_test.go b/pkg/scraper/api_types_dms_test.go index f85842c..77eae48 100644 --- a/pkg/scraper/api_types_dms_test.go +++ b/pkg/scraper/api_types_dms_test.go @@ -99,7 +99,7 @@ func TestParseInbox(t *testing.T) { err = json.Unmarshal(data, &inbox) require.NoError(t, err) - trove := inbox.ToDMTrove() + trove := inbox.InboxInitialState.ToDMTrove() for _, id := range []DMMessageID{1663623062195957773, 1663623203644751885, 1665922180176044037, 1665936253483614212} { m, is_ok := trove.Messages[id] diff --git a/pkg/scraper/dm_trove.go b/pkg/scraper/dm_trove.go index bbc7a82..b6a1562 100644 --- a/pkg/scraper/dm_trove.go +++ b/pkg/scraper/dm_trove.go @@ -28,7 +28,8 @@ func (t1 *DMTrove) MergeWith(t2 DMTrove) { t1.TweetTrove.MergeWith(t2.TweetTrove) } -func GetInbox() DMTrove { +// Returns a DMTrove and the cursor for the next update +func GetInbox(how_many int) (DMTrove, string) { if !the_api.IsAuthenticated { log.Fatalf("Fetching DMs can only be done when authenticated. Please provide `--session [user]`") } @@ -36,5 +37,19 @@ func GetInbox() DMTrove { if err != nil { panic(err) } - return dm_response.ToDMTrove() + + trove := dm_response.ToDMTrove() + cursor := dm_response.Cursor + next_cursor_id := dm_response.InboxTimelines.Trusted.MinEntryID + for len(trove.Rooms) < how_many && dm_response.Status != "AT_END" { + dm_response, err = the_api.GetInboxTrusted(next_cursor_id) + if err != nil { + panic(err) + } + next_trove := dm_response.ToDMTrove() + next_cursor_id = dm_response.MinEntryID + trove.MergeWith(next_trove) + } + + return trove, cursor }