From c52960759e8d652cec99e39f17a0358c72b72165 Mon Sep 17 00:00:00 2001 From: Alessio Date: Sun, 28 Jul 2024 13:23:10 -0700 Subject: [PATCH] Fix bug caused by DMTrove refactor --- pkg/persistence/dm_trove_queries.go | 145 ------------------------- pkg/persistence/tweet_trove_queries.go | 132 ++++++++++++++++++++++ 2 files changed, 132 insertions(+), 145 deletions(-) delete mode 100644 pkg/persistence/dm_trove_queries.go diff --git a/pkg/persistence/dm_trove_queries.go b/pkg/persistence/dm_trove_queries.go deleted file mode 100644 index 2440804..0000000 --- a/pkg/persistence/dm_trove_queries.go +++ /dev/null @@ -1,145 +0,0 @@ -package persistence - -import ( - "errors" - "fmt" - "path" - - . "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper" -) - -// Convenience function that saves all the objects in a TweetTrove. -// Panics if anything goes wrong. -// -// TODO: a lot of this function contains duplicated code and should be extracted to functions -func (p Profile) SaveDMTrove(trove TweetTrove, should_download bool) { - p.SaveTweetTrove(trove, should_download) - - for _, r := range trove.Rooms { - err := p.SaveChatRoom(r) - if err != nil { - panic(fmt.Errorf("Error saving chat room: %#v\n %w", r, err)) - } - } - for _, m := range trove.Messages { - err := p.SaveChatMessage(m) - if err != nil { - panic(fmt.Errorf("Error saving chat message: %#v\n %w", m, err)) - } - - // TODO: all of this is very duplicated and should be refactored - // Copied from media_download.go functions: - // - download_tweet_image, download_tweet_video, download_link_thumbnail - // - DownloadTweetContentWithInjector - // Copied from tweet_queries.go functions: - // - CheckTweetContentDownloadNeeded - - // Download content if needed - if should_download { - downloader := DefaultDownloader{} - - for _, img := range m.Images { - // Check if it's already downloaded - var is_downloaded bool - err := p.DB.Get(&is_downloaded, `select is_downloaded from chat_message_images where id = ?`, img.ID) - if err != nil { - panic(err) - } - if is_downloaded { - // Already downloaded; skip - continue - } - - // DUPE: download-image - outfile := path.Join(p.ProfileDir, "images", img.LocalFilename) - err = downloader.Curl(img.RemoteURL, outfile) - if errors.Is(err, ErrRequestTimeout) { - // Forget about it; if it's important someone will try again - fmt.Printf("Failed to download image %q: %s\n", img.RemoteURL, err.Error()) - } else if err != nil { - panic(fmt.Errorf("downloading image %q on DM message %d:\n %w", img.RemoteURL, m.ID, err)) - } - _, err = p.DB.NamedExec(`update chat_message_images set is_downloaded = 1 where id = :id`, img) - if err != nil { - panic(err) - } - } - - for _, vid := range m.Videos { - // Videos can be geoblocked, and the HTTP response isn't in JSON so it's hard to capture - if vid.IsGeoblocked { - continue - } - - // Check if it's already downloaded - var is_downloaded bool - err := p.DB.Get(&is_downloaded, `select is_downloaded from chat_message_videos where id = ?`, vid.ID) - if err != nil { - panic(err) - } - if is_downloaded { - // Already downloaded; skip - continue - } - - // DUPE: download-video - // Download the video - outfile := path.Join(p.ProfileDir, "videos", vid.LocalFilename) - err = downloader.Curl(vid.RemoteURL, outfile) - - if errors.Is(err, ErrRequestTimeout) { - // Forget about it; if it's important someone will try again - fmt.Printf("Failed to download video %q: %s\n", vid.RemoteURL, err.Error()) - } else if errors.Is(err, ErrorDMCA) { - vid.IsDownloaded = false - vid.IsBlockedByDMCA = true - } else if err != nil { - panic(fmt.Errorf("downloading video %q on DM message %d:\n %w", vid.RemoteURL, m.ID, err)) - } else { - vid.IsDownloaded = true - } - - // Download the thumbnail - outfile = path.Join(p.ProfileDir, "video_thumbnails", vid.ThumbnailLocalPath) - err = downloader.Curl(vid.ThumbnailRemoteUrl, outfile) - if errors.Is(err, ErrRequestTimeout) { - // Forget about it; if it's important someone will try again - fmt.Printf("Failed to download video thumbnail %q: %s\n", vid.ThumbnailRemoteUrl, err.Error()) - } else if err != nil { - panic(fmt.Errorf("Error downloading video thumbnail (DMMessageID %d):\n %w", vid.DMMessageID, err)) - } - - // Update it in the DB - _, err = p.DB.NamedExec(` - update chat_message_videos set is_downloaded = :is_downloaded, is_blocked_by_dmca = :is_blocked_by_dmca where id = :id - `, vid) - if err != nil { - panic(err) - } - } - - for _, url := range m.Urls { - // DUPE: download-link-thumbnail - if url.HasCard && url.HasThumbnail { - outfile := path.Join(p.ProfileDir, "link_preview_images", url.ThumbnailLocalPath) - err := downloader.Curl(url.ThumbnailRemoteUrl, outfile) - if errors.Is(err, ErrRequestTimeout) { - // Forget about it; if it's important someone will try again - fmt.Printf("Failed to download link thumbnail %q: %s\n", url.ThumbnailRemoteUrl, err.Error()) - } else if err != nil { - panic(fmt.Errorf("downloading link thumbnail %q on DM message %d:\n %w", url.ThumbnailRemoteUrl, m.ID, err)) - } - } - url.IsContentDownloaded = true - - // Update it in the DB - _, err = p.DB.NamedExec(` - update chat_message_urls set is_content_downloaded = :is_content_downloaded where chat_message_id = :chat_message_id - `, url) - if err != nil { - panic(err) - } - } - } - } -} diff --git a/pkg/persistence/tweet_trove_queries.go b/pkg/persistence/tweet_trove_queries.go index 9c14bcf..50e090f 100644 --- a/pkg/persistence/tweet_trove_queries.go +++ b/pkg/persistence/tweet_trove_queries.go @@ -3,6 +3,7 @@ package persistence import ( "errors" "fmt" + "path" . "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper" ) @@ -94,4 +95,135 @@ func (p Profile) SaveTweetTrove(trove TweetTrove, should_download bool) { panic(fmt.Errorf("Error saving Bookmark: %#v\n %w", b, err)) } } + + // DM related content + // ------------------ + + for _, r := range trove.Rooms { + err := p.SaveChatRoom(r) + if err != nil { + panic(fmt.Errorf("Error saving chat room: %#v\n %w", r, err)) + } + } + for _, m := range trove.Messages { + err := p.SaveChatMessage(m) + if err != nil { + panic(fmt.Errorf("Error saving chat message: %#v\n %w", m, err)) + } + + // TODO: all of this is very duplicated and should be refactored + // Copied from media_download.go functions: + // - download_tweet_image, download_tweet_video, download_link_thumbnail + // - DownloadTweetContentWithInjector + // Copied from tweet_queries.go functions: + // - CheckTweetContentDownloadNeeded + + // Download content if needed + if should_download { + downloader := DefaultDownloader{} + + for _, img := range m.Images { + // Check if it's already downloaded + var is_downloaded bool + err := p.DB.Get(&is_downloaded, `select is_downloaded from chat_message_images where id = ?`, img.ID) + if err != nil { + panic(err) + } + if is_downloaded { + // Already downloaded; skip + continue + } + + // DUPE: download-image + outfile := path.Join(p.ProfileDir, "images", img.LocalFilename) + err = downloader.Curl(img.RemoteURL, outfile) + if errors.Is(err, ErrRequestTimeout) { + // Forget about it; if it's important someone will try again + fmt.Printf("Failed to download image %q: %s\n", img.RemoteURL, err.Error()) + } else if err != nil { + panic(fmt.Errorf("downloading image %q on DM message %d:\n %w", img.RemoteURL, m.ID, err)) + } + _, err = p.DB.NamedExec(`update chat_message_images set is_downloaded = 1 where id = :id`, img) + if err != nil { + panic(err) + } + } + + for _, vid := range m.Videos { + // Videos can be geoblocked, and the HTTP response isn't in JSON so it's hard to capture + if vid.IsGeoblocked { + continue + } + + // Check if it's already downloaded + var is_downloaded bool + err := p.DB.Get(&is_downloaded, `select is_downloaded from chat_message_videos where id = ?`, vid.ID) + if err != nil { + panic(err) + } + if is_downloaded { + // Already downloaded; skip + continue + } + + // DUPE: download-video + // Download the video + outfile := path.Join(p.ProfileDir, "videos", vid.LocalFilename) + err = downloader.Curl(vid.RemoteURL, outfile) + + if errors.Is(err, ErrRequestTimeout) { + // Forget about it; if it's important someone will try again + fmt.Printf("Failed to download video %q: %s\n", vid.RemoteURL, err.Error()) + } else if errors.Is(err, ErrorDMCA) { + vid.IsDownloaded = false + vid.IsBlockedByDMCA = true + } else if err != nil { + panic(fmt.Errorf("downloading video %q on DM message %d:\n %w", vid.RemoteURL, m.ID, err)) + } else { + vid.IsDownloaded = true + } + + // Download the thumbnail + outfile = path.Join(p.ProfileDir, "video_thumbnails", vid.ThumbnailLocalPath) + err = downloader.Curl(vid.ThumbnailRemoteUrl, outfile) + if errors.Is(err, ErrRequestTimeout) { + // Forget about it; if it's important someone will try again + fmt.Printf("Failed to download video thumbnail %q: %s\n", vid.ThumbnailRemoteUrl, err.Error()) + } else if err != nil { + panic(fmt.Errorf("Error downloading video thumbnail (DMMessageID %d):\n %w", vid.DMMessageID, err)) + } + + // Update it in the DB + _, err = p.DB.NamedExec(` + update chat_message_videos set is_downloaded = :is_downloaded, is_blocked_by_dmca = :is_blocked_by_dmca where id = :id + `, vid) + if err != nil { + panic(err) + } + } + + for _, url := range m.Urls { + // DUPE: download-link-thumbnail + if url.HasCard && url.HasThumbnail { + outfile := path.Join(p.ProfileDir, "link_preview_images", url.ThumbnailLocalPath) + err := downloader.Curl(url.ThumbnailRemoteUrl, outfile) + if errors.Is(err, ErrRequestTimeout) { + // Forget about it; if it's important someone will try again + fmt.Printf("Failed to download link thumbnail %q: %s\n", url.ThumbnailRemoteUrl, err.Error()) + } else if err != nil { + panic(fmt.Errorf("downloading link thumbnail %q on DM message %d:\n %w", url.ThumbnailRemoteUrl, m.ID, err)) + } + } + url.IsContentDownloaded = true + + // Update it in the DB + _, err = p.DB.NamedExec(` + update chat_message_urls set is_content_downloaded = :is_content_downloaded where chat_message_id = :chat_message_id + `, url) + if err != nil { + panic(err) + } + } + } + } }