diff --git a/pkg/persistence/dm_queries.go b/pkg/persistence/dm_queries.go index fdb1922..09dd8d3 100644 --- a/pkg/persistence/dm_queries.go +++ b/pkg/persistence/dm_queries.go @@ -101,8 +101,8 @@ func (p Profile) GetChatRoom(id DMChatRoomID) (ret DMChatRoom, err error) { func (p Profile) SaveChatMessage(m DMMessage) error { _, err := p.DB.NamedExec(` - insert into chat_messages (id, chat_room_id, sender_id, sent_at, request_id, in_reply_to_id, text) - values (:id, :chat_room_id, :sender_id, :sent_at, :request_id, :in_reply_to_id, :text) + insert into chat_messages (id, chat_room_id, sender_id, sent_at, request_id, in_reply_to_id, text, embedded_tweet_id) + values (:id, :chat_room_id, :sender_id, :sent_at, :request_id, :in_reply_to_id, :text, :embedded_tweet_id) on conflict do nothing `, m, ) @@ -127,7 +127,7 @@ func (p Profile) SaveChatMessage(m DMMessage) error { func (p Profile) GetChatMessage(id DMMessageID) (ret DMMessage, err error) { err = p.DB.Get(&ret, ` - select id, chat_room_id, sender_id, sent_at, request_id, text, in_reply_to_id + select id, chat_room_id, sender_id, sent_at, request_id, text, in_reply_to_id, embedded_tweet_id from chat_messages where id = ? `, id, @@ -186,7 +186,7 @@ func (p Profile) GetChatRoomsPreview(id UserID) DMChatView { // Fetch the latest message var msg DMMessage q, args, err := sqlx.Named(` - select id, chat_room_id, sender_id, sent_at, request_id, text, in_reply_to_id + select id, chat_room_id, sender_id, sent_at, request_id, text, in_reply_to_id, embedded_tweet_id from chat_messages where chat_room_id = :room_id and sent_at = (select max(sent_at) from chat_messages where chat_room_id = :room_id) @@ -271,7 +271,7 @@ func (p Profile) GetChatRoomContents(id DMChatRoomID) DMChatView { // Fetch all messages var msgs []DMMessage err = p.DB.Select(&msgs, ` - select id, chat_room_id, sender_id, sent_at, request_id, text, in_reply_to_id + select id, chat_room_id, sender_id, sent_at, request_id, text, in_reply_to_id, embedded_tweet_id from chat_messages where chat_room_id = :room_id order by sent_at desc @@ -313,5 +313,29 @@ func (p Profile) GetChatRoomContents(id DMChatRoomID) DMChatView { ret.Messages[reacc.DMMessageID] = msg } + // Fetch all embedded tweets + embedded_tweet_ids := []interface{}{} + for _, m := range ret.Messages { + if m.EmbeddedTweetID != 0 { + embedded_tweet_ids = append(embedded_tweet_ids, m.EmbeddedTweetID) + } + } + if len(embedded_tweet_ids) > 0 { + var embedded_tweets []Tweet + err = p.DB.Select(&embedded_tweets, ` + select `+TWEETS_ALL_SQL_FIELDS+` + from tweets + left join tombstone_types on tweets.tombstone_type = tombstone_types.rowid + left join likes on tweets.id = likes.tweet_id and likes.user_id = ? + where id in (`+strings.Repeat("?,", len(embedded_tweet_ids)-1)+`?)`, + append([]interface{}{UserID(0)}, embedded_tweet_ids...)...) + if err != nil { + panic(err) + } + for _, t := range embedded_tweets { + ret.Tweets[t.ID] = t + } + } + return ret } diff --git a/pkg/persistence/dm_queries_test.go b/pkg/persistence/dm_queries_test.go index c7df087..dcb249a 100644 --- a/pkg/persistence/dm_queries_test.go +++ b/pkg/persistence/dm_queries_test.go @@ -221,4 +221,14 @@ func TestGetChatRoomContents(t *testing.T) { reacc, is_ok := msg_with_reacc.Reactions[UserID(1458284524761075714)] require.True(is_ok) assert.Equal(reacc.Emoji, "😂") + + // Embedded tweets + require.Len(chat_view.Tweets, 1) + twt, is_ok := chat_view.Tweets[TweetID(1665509126737129472)] + require.True(is_ok) + assert.Equal(twt.InReplyToID, TweetID(1665505986184900611)) + assert.Equal(twt.NumLikes, 7) + u, is_ok := chat_view.Users[twt.UserID] + require.True(is_ok) + assert.Equal(u.Location, "on my computer") } diff --git a/pkg/persistence/schema.sql b/pkg/persistence/schema.sql index f85b175..08939a0 100644 --- a/pkg/persistence/schema.sql +++ b/pkg/persistence/schema.sql @@ -242,6 +242,7 @@ create table chat_messages (rowid integer primary key, request_id text not null, in_reply_to_id integer, text text not null, + embedded_tweet_id integer not null default 0, foreign key(chat_room_id) references chat_rooms(id) foreign key(sender_id) references users(id) ); diff --git a/pkg/scraper/api_types_dms.go b/pkg/scraper/api_types_dms.go index 76833ec..d1be2de 100644 --- a/pkg/scraper/api_types_dms.go +++ b/pkg/scraper/api_types_dms.go @@ -26,13 +26,60 @@ type APIDMMessage struct { ReplyData struct { ID int `json:"id,string"` } `json:"reply_data"` + Urls []struct { + Url string `json:"url"` + Indices []int `json:"indices"` + } `json:"urls"` Attachment struct { - Tweet APITweet `json:"tweet"` + Tweet struct { + Url string `json:"url"` + Status struct { + APITweet + User APIUser `json:"user"` + } `json:"status"` + } `json:"tweet"` } `json:"attachment"` } `json:"message_data"` MessageReactions []APIDMReaction `json:"message_reactions"` } +// Remove embedded tweet short-URLs +func (m *APIDMMessage) NormalizeContent() { + if m.MessageData.Attachment.Tweet.Status.ID != 0 { + m.MessageData.Text = strings.Replace(m.MessageData.Text, m.MessageData.Attachment.Tweet.Url, "", 1) + } + m.MessageData.Text = strings.TrimSpace(m.MessageData.Text) +} + +func (m APIDMMessage) ToDMTrove() DMTrove { + ret := NewDMTrove() + + m.NormalizeContent() + result := ParseAPIDMMessage(m) + + // Parse tweet attachment + if m.MessageData.Attachment.Tweet.Status.ID != 0 { + u, err := ParseSingleUser(m.MessageData.Attachment.Tweet.Status.User) + if err != nil { + panic(err) + } + ret.Users[u.ID] = u + + t, err := ParseSingleTweet(m.MessageData.Attachment.Tweet.Status.APITweet) + if err != nil { + panic(err) + } + t.UserID = u.ID + ret.Tweets[t.ID] = t + result.EmbeddedTweetID = t.ID + } + ret.Messages[result.ID] = result + + // TODO: parse attached images + + return ret +} + type APIDMConversation struct { ConversationID string `json:"conversation_id"` Type string `json:"type"` @@ -126,13 +173,14 @@ func (r APIInbox) ToDMTrove() DMTrove { entry.Message.ID = entry.ReactionCreate.MessageID entry.Message.MessageReactions = []APIDMReaction{entry.ReactionCreate} } - result := ParseAPIDMMessage(entry.Message) - if _, is_ok := ret.Messages[result.ID]; is_ok { - // No clobbering - panic("Already in the trove: " + fmt.Sprint(result.ID)) - } - ret.Messages[result.ID] = result - // TODO: parse Tweet attachments + + // TODO: + // if _, is_ok := ret.Messages[result.ID]; is_ok { + // // No clobbering + // panic("Already in the trove: " + fmt.Sprint(result.ID)) + // } + + ret.MergeWith(entry.Message.ToDMTrove()) } for _, room := range r.Conversations { result := ParseAPIDMChatRoom(room) diff --git a/pkg/scraper/api_types_dms_test.go b/pkg/scraper/api_types_dms_test.go index 8198160..8238de3 100644 --- a/pkg/scraper/api_types_dms_test.go +++ b/pkg/scraper/api_types_dms_test.go @@ -54,6 +54,33 @@ func TestParseAPIDMMessageWithReaction(t *testing.T) { assert.Equal(reacc.Emoji, "😂") } +func TestParseAPIDMMessageWithEmbeddedTweet(t *testing.T) { + assert := assert.New(t) + data, err := os.ReadFile("test_responses/dms/dm_message_with_tweet_attachment.json") + if err != nil { + panic(err) + } + var api_message APIDMMessage + err = json.Unmarshal(data, &api_message) + require.NoError(t, err) + + trove := api_message.ToDMTrove() + + assert.Len(trove.Messages, 1) + m, is_ok := trove.Messages[DMMessageID(1665936253483614212)] + assert.True(is_ok) + assert.Equal(m.EmbeddedTweetID, TweetID(1665509126737129472)) + assert.Equal(m.Text, "Check this out") + + assert.Len(trove.Tweets, 1) + _, is_ok = trove.Tweets[TweetID(1665509126737129472)] + assert.True(is_ok) + + assert.Len(trove.Users, 1) + _, is_ok = trove.Users[UserID(1458284524761075714)] + assert.True(is_ok) +} + func TestParseAPIDMConversation(t *testing.T) { assert := assert.New(t) data, err := os.ReadFile("test_responses/dms/dm_chat_room.json") diff --git a/pkg/scraper/dm_message.go b/pkg/scraper/dm_message.go index 3fab350..fedbc68 100644 --- a/pkg/scraper/dm_message.go +++ b/pkg/scraper/dm_message.go @@ -20,14 +20,15 @@ func ParseAPIDMReaction(reacc APIDMReaction) DMReaction { } type DMMessage struct { - ID DMMessageID `db:"id"` - DMChatRoomID DMChatRoomID `db:"chat_room_id"` - SenderID UserID `db:"sender_id"` - SentAt Timestamp `db:"sent_at"` - RequestID string `db:"request_id"` - Text string `db:"text"` - InReplyToID DMMessageID `db:"in_reply_to_id"` - Reactions map[UserID]DMReaction + ID DMMessageID `db:"id"` + DMChatRoomID DMChatRoomID `db:"chat_room_id"` + SenderID UserID `db:"sender_id"` + SentAt Timestamp `db:"sent_at"` + RequestID string `db:"request_id"` + Text string `db:"text"` + InReplyToID DMMessageID `db:"in_reply_to_id"` + EmbeddedTweetID TweetID `db:"embedded_tweet_id"` + Reactions map[UserID]DMReaction } func ParseAPIDMMessage(message APIDMMessage) DMMessage { diff --git a/sample_data/seed_data.sql b/sample_data/seed_data.sql index f21c54d..b538f8e 100644 --- a/sample_data/seed_data.sql +++ b/sample_data/seed_data.sql @@ -168,7 +168,8 @@ INSERT INTO tweets VALUES (1408673,1698765208393576891,1458284524761075714,'I appreciate the C preprocessor for this cutting insight',1693852161,0,0,0,0,1698764077458202845,1620206875450167296,'EKokemoor','EKokemoor','',NULL,NULL,0,1,0,0,0), (1409940,1698797388914151523,1458284524761075714,replace('This looks quite neat, but "comptime" is cool because it was designed to do stuff like declaring arrays where the size is the result of a function call, e.g.\n\nvar my_array: [fibonacci(10)]u32;\n\n...yet being able to create DSLs just emerged from this very simple concept','\n',char(10)),1693859834,2,0,1,0,1698792233619562866,0,'ilyakooo0','ilyakooo0','',NULL,NULL,0,1,0,0,0), (1409953,1698802806096846909,1689006330235760640,replace('Just poking around at some examples and explanation videos, It does seem very similar to Template Haskell, though maybe a bit more ergonomic.\n\nIs there something missing from this mental model?','\n',char(10)),1693861125,3,0,1,0,1698797388914151523,0,'wispem_wantex,ilyakooo0','wispem_wantex,ilyakooo0','',NULL,NULL,0,1,0,0,0), - (1411566,1698848086880133147,1458284524761075714,'I have basically no experience with one and literally no experience with the other, and additionally I''ve never even used Haskell. So unfortunately I''m not really in a position to say.',1693871921,1,0,1,0,1698802806096846909,0,'sol_plunder,ilyakooo0','sol_plunder,ilyakooo0','',NULL,NULL, 0,1,0,0,0); + (1411566,1698848086880133147,1458284524761075714,'I have basically no experience with one and literally no experience with the other, and additionally I''ve never even used Haskell. So unfortunately I''m not really in a position to say.',1693871921,1,0,1,0,1698802806096846909,0,'sol_plunder,ilyakooo0','sol_plunder,ilyakooo0','',NULL,NULL, 0,1,0,0,0), + (1169437,1665509126737129472,1458284524761075714,replace('Btw, to the extent this has happened, it''s partly thanks to the Golden One (@TheGloriousLion) who invented #fizeekfriday and the "post physique" rejoinder. Everyone should follow him if they don''t already.\n\nSince I forgot last week, and since it''s topical, here''s a leg poast','\n',char(10)),1685923294,7,0,0,0,1665505986184900611,0,'TheGloriousLion','','fizeekfriday',NULL,NULL,0,1,0,0,0); CREATE TABLE retweets(rowid integer primary key, retweet_id integer not null unique, @@ -377,19 +378,20 @@ create table chat_messages (rowid integer primary key, request_id text not null, in_reply_to_id integer, text text not null, + embedded_tweet_id integer not null default 0, foreign key(chat_room_id) references chat_rooms(id) foreign key(sender_id) references users(id) ); INSERT INTO chat_messages VALUES - (1,1663623062195957773,'1458284524761075714-1488963321701171204',1488963321701171204,1685473621419,'',0,'Yes helo'), - (2,1663623203644751885,'1458284524761075714-1488963321701171204',1458284524761075714,1685473655064,'',0,'Yeah i know who you are lol'), - (3,1665922180176044037,'1458284524761075714-1488963321701171204',1458284524761075714,1686021773787,'',1663623062195957773,'Yes?'), - (4,1665936253483614212,'1458284524761075714-1488963321701171204',1458284524761075714,1686025129132,'',0,replace('Check this out\nhttps://t.co/rHeWGgNIZ1','\n',char(10))), - (5,1665936253483614213,'1488963321701171204-1178839081222115328',1488963321701171204,1686025129140,'',0,'bruh1'), - (6,1665936253483614214,'1488963321701171204-1178839081222115328',1178839081222115328,1686025129141,'',0,'bruh2'), - (7,1665936253483614215,'1488963321701171204-1178839081222115328',1178839081222115328,1686025129142,'',1665936253483614214,'replying to bruh2'), - (8,1665936253483614216,'1488963321701171204-1178839081222115328',1488963321701171204,1686025129143,'',0,'This conversation is totally fake lol'), - (9,1665936253483614217,'1488963321701171204-1178839081222115328',1178839081222115328,1686025129144,'',0,'exactly'); + (1,1663623062195957773,'1458284524761075714-1488963321701171204',1488963321701171204,1685473621419,'',0,'Yes helo',0), + (2,1663623203644751885,'1458284524761075714-1488963321701171204',1458284524761075714,1685473655064,'',0,'Yeah i know who you are lol',0), + (3,1665922180176044037,'1458284524761075714-1488963321701171204',1458284524761075714,1686021773787,'',1663623062195957773,'Yes?',0), + (4,1665936253483614212,'1458284524761075714-1488963321701171204',1458284524761075714,1686025129132,'',0,replace('Check this out\nhttps://t.co/rHeWGgNIZ1','\n',char(10)),1665509126737129472), + (5,1665936253483614213,'1488963321701171204-1178839081222115328',1488963321701171204,1686025129140,'',0,'bruh1',0), + (6,1665936253483614214,'1488963321701171204-1178839081222115328',1178839081222115328,1686025129141,'',0,'bruh2',0), + (7,1665936253483614215,'1488963321701171204-1178839081222115328',1178839081222115328,1686025129142,'',1665936253483614214,'replying to bruh2',0), + (8,1665936253483614216,'1488963321701171204-1178839081222115328',1488963321701171204,1686025129143,'',0,'This conversation is totally fake lol',0), + (9,1665936253483614217,'1488963321701171204-1178839081222115328',1178839081222115328,1686025129144,'',0,'exactly',0);