Add 'fetch_dm' subcommand

This commit is contained in:
Alessio 2023-11-19 10:41:45 -08:00
parent a4c4dba5e0
commit ae01208cde
6 changed files with 145 additions and 2 deletions

View File

@ -377,6 +377,8 @@ tw fetch_inbox
test $(sqlite3 twitter.db "select count(*) from chat_rooms") -ge "1"
test $(sqlite3 twitter.db "select count(*) from chat_messages where chat_room_id = '1458284524761075714-1488963321701171204'") -ge "5"
# Test fetch a DM conversation
tw fetch_dm "1458284524761075714-1488963321701171204"
# TODO: Maybe this file should be broken up into multiple test scripts

View File

@ -161,6 +161,8 @@ func main() {
start_webserver(*addr)
case "fetch_inbox":
fetch_inbox(*how_many)
case "fetch_dm":
fetch_dm(target, *how_many)
default:
die(fmt.Sprintf("Invalid operation: %s", operation), true, 3)
}
@ -403,3 +405,14 @@ func fetch_inbox(how_many int) {
profile.SaveDMTrove(trove)
happy_exit(fmt.Sprintf("Saved %d messages from %d chats", len(trove.Messages), len(trove.Rooms)))
}
func fetch_dm(id string, how_many int) {
room, err := profile.GetChatRoom(scraper.DMChatRoomID(id))
if err != nil {
panic(err)
}
max_id := scraper.DMMessageID(^uint(0) >> 1)
trove := scraper.GetConversation(room.ID, max_id, how_many)
profile.SaveDMTrove(trove)
happy_exit(fmt.Sprintf("Saved %d messages from %d chats", len(trove.Messages), len(trove.Rooms)))
}

View File

@ -67,8 +67,9 @@ type APIInbox struct {
}
type APIDMResponse struct {
InboxInitialState APIInbox `json:"inbox_initial_state"`
InboxTimeline APIInbox `json:"inbox_timeline"`
InboxInitialState APIInbox `json:"inbox_initial_state"`
InboxTimeline APIInbox `json:"inbox_timeline"`
ConversationTimeline APIInbox `json:"conversation_timeline"`
}
func (r APIInbox) ToDMTrove() DMTrove {
@ -212,3 +213,61 @@ func (api *API) GetInboxTrusted(oldest_id int) (APIInbox, error) {
err = api.do_http(url.String(), "", &result)
return result.InboxTimeline, err
}
func (api *API) GetDMConversation(id DMChatRoomID, max_id DMMessageID) (APIInbox, error) {
url, err := url.Parse("https://twitter.com/i/api/1.1/dm/conversation/" + string(id) + ".json")
if err != nil {
panic(err)
}
query := url.Query()
query.Add("max_id", fmt.Sprint(max_id))
query.Add("context", "FETCH_DM_CONVERSATION_HISTORY")
query.Add("include_profile_interstitial_type", "1")
query.Add("include_blocking", "1")
query.Add("include_blocked_by", "1")
query.Add("include_followed_by", "1")
query.Add("include_want_retweets", "1")
query.Add("include_mute_edge", "1")
query.Add("include_can_dm", "1")
query.Add("include_can_media_tag", "1")
query.Add("include_ext_has_nft_avatar", "1")
query.Add("include_ext_is_blue_verified", "1")
query.Add("include_ext_verified_type", "1")
query.Add("include_ext_profile_image_shape", "1")
query.Add("skip_status", "1")
query.Add("dm_secret_conversations_enabled", "false")
query.Add("krs_registration_enabled", "true")
query.Add("cards_platform", "Web-12")
query.Add("include_cards", "1")
query.Add("include_ext_alt_text", "true")
query.Add("include_ext_limited_action_results", "true")
query.Add("include_quote_count", "true")
query.Add("include_reply_count", "1")
query.Add("tweet_mode", "extended")
query.Add("include_ext_views", "true")
query.Add("dm_users", "false")
query.Add("include_groups", "true")
query.Add("include_inbox_timelines", "true")
query.Add("include_ext_media_color", "true")
query.Add("supports_reactions", "true")
query.Add("include_conversation_info", "true")
query.Add("ext", strings.Join([]string{
"mediaColor",
"altText",
"mediaStats",
"highlightedLabel",
"hasNftAvatar",
"voiceInfo",
"birdwatchPivot",
"enrichments",
"superFollowMetadata",
"unmentionInfo",
"editControl",
"vibe",
}, ","))
url.RawQuery = query.Encode()
var result APIDMResponse
err = api.do_http(url.String(), "", &result)
return result.ConversationTimeline, err
}

View File

@ -116,3 +116,37 @@ func TestParseInbox(t *testing.T) {
assert.True(is_ok)
assert.Equal(room.ID, room_id)
}
func TestParseDMRoomResponse(t *testing.T) {
assert := assert.New(t)
data, err := os.ReadFile("test_responses/dms/dm_conversation_response.json")
require.NoError(t, err)
var inbox APIDMResponse
err = json.Unmarshal(data, &inbox)
require.NoError(t, err)
trove := inbox.ConversationTimeline.ToDMTrove()
for _, id := range []DMMessageID{
1663623062195957773,
1663623203644751885,
1665922180176044037,
1665936253483614212,
1726009944393372005,
} {
m, is_ok := trove.Messages[id]
assert.True(is_ok, "Message with ID %d not in the trove!")
assert.Equal(m.ID, id)
}
for _, id := range []UserID{1458284524761075714, 1488963321701171204} {
u, is_ok := trove.TweetTrove.Users[id]
assert.True(is_ok, "User with ID %d not in the trove!")
assert.Equal(u.ID, id)
}
room_id := DMChatRoomID("1458284524761075714-1488963321701171204")
room, is_ok := trove.Rooms[room_id]
assert.True(is_ok)
assert.Equal(room.ID, room_id)
assert.Equal(trove.GetOldestMessage(room_id), DMMessageID(1663623062195957773))
}

View File

@ -28,6 +28,16 @@ func (t1 *DMTrove) MergeWith(t2 DMTrove) {
t1.TweetTrove.MergeWith(t2.TweetTrove)
}
func (t DMTrove) GetOldestMessage(id DMChatRoomID) DMMessageID {
oldest := DMMessageID(^uint(0) >> 1) // Max integer
for _, m := range t.Messages {
if m.ID < oldest && m.DMChatRoomID == id {
oldest = m.ID
}
}
return oldest
}
// Returns a DMTrove and the cursor for the next update
func GetInbox(how_many int) (DMTrove, string) {
if !the_api.IsAuthenticated {
@ -53,3 +63,27 @@ func GetInbox(how_many int) (DMTrove, string) {
return trove, cursor
}
func GetConversation(id DMChatRoomID, max_id DMMessageID, how_many int) DMTrove {
if !the_api.IsAuthenticated {
log.Fatalf("Fetching DMs can only be done when authenticated. Please provide `--session [user]`")
}
dm_response, err := the_api.GetDMConversation(id, max_id)
if err != nil {
panic(err)
}
trove := dm_response.ToDMTrove()
oldest := trove.GetOldestMessage(id)
for len(trove.Messages) < how_many && dm_response.Status != "AT_END" {
dm_response, err = the_api.GetDMConversation(id, oldest)
if err != nil {
panic(err)
}
next_trove := dm_response.ToDMTrove()
oldest = next_trove.GetOldestMessage(id)
trove.MergeWith(next_trove)
}
return trove
}

File diff suppressed because one or more lines are too long