diff --git a/pkg/scraper/api_types_dms_test.go b/pkg/scraper/api_types_dms_test.go index 442d17e..cbc81f1 100644 --- a/pkg/scraper/api_types_dms_test.go +++ b/pkg/scraper/api_types_dms_test.go @@ -71,6 +71,7 @@ func TestParseAPIDMMessageWithEmbeddedTweet(t *testing.T) { assert.True(is_ok) assert.Equal(m.EmbeddedTweetID, TweetID(1665509126737129472)) assert.Equal(m.Text, "Check this out") + assert.Len(m.Urls, 0) // Embedded tweets should not parse as URLs assert.Len(trove.Tweets, 1) _, is_ok = trove.Tweets[TweetID(1665509126737129472)] @@ -96,6 +97,7 @@ func TestParseAPIDMMessageWithEmbeddedImage(t *testing.T) { assert.Len(trove.Messages, 1) m, is_ok := trove.Messages[DMMessageID(1766224476729995648)] assert.True(is_ok) + assert.Len(m.Urls, 0) // Embedded images should not parse as URLs // Check that the short-URL is stripped assert.Equal("A gastropub staffed by white college girls and the chefs are all Latino", m.Text) @@ -120,6 +122,7 @@ func TestParseAPIDMMessageWithEmbeddedVideo(t *testing.T) { assert.Len(trove.Messages, 1) m, is_ok := trove.Messages[DMMessageID(1766248283901776125)] assert.True(is_ok) + assert.Len(m.Urls, 0) // Embedded videos should not parse as URLs // Check the short-URL is stripped assert.Equal("", m.Text) diff --git a/pkg/scraper/dm_message.go b/pkg/scraper/dm_message.go index 9955388..23ab2cf 100644 --- a/pkg/scraper/dm_message.go +++ b/pkg/scraper/dm_message.go @@ -67,6 +67,20 @@ func ParseAPIDMMessage(message APIDMMessage) DMMessage { // Process URLs and link previews for _, url := range message.MessageData.Entities.URLs { + // Skip it if it's an embedded tweet + _, id, is_ok := TryParseTweetUrl(url.ExpandedURL) + if is_ok && id == TweetID(message.MessageData.Attachment.Tweet.Status.ID) { + continue + } + // Skip it if it's an embedded image + if message.MessageData.Attachment.Photo.URL == url.ShortenedUrl { + continue + } + // Skip it if it's an embedded video + if message.MessageData.Attachment.Video.URL == url.ShortenedUrl { + continue + } + var new_url Url if message.MessageData.Attachment.Card.ShortenedUrl == url.ShortenedUrl { if message.MessageData.Attachment.Card.Name == "3691233323:audiospace" {