diff --git a/cmd/twitter/main.go b/cmd/twitter/main.go index de7097a..9b69779 100644 --- a/cmd/twitter/main.go +++ b/cmd/twitter/main.go @@ -522,7 +522,7 @@ func download_tweet_content(tweet_identifier string) { if err != nil { panic(fmt.Errorf("Couldn't get tweet (ID %d) from database:\n %w", tweet_id, err)) } - err = profile.DownloadTweetContentFor(&tweet, &api) + err = profile.DownloadTweetContentFor(&tweet, api.DownloadMedia) if err != nil { panic("Error getting content: " + err.Error()) } @@ -533,7 +533,7 @@ func download_user_content(handle scraper.UserHandle) { if err != nil { panic("Couldn't get the user from database: " + err.Error()) } - err = profile.DownloadUserContentFor(&user, &api) + err = profile.DownloadUserContentFor(&user, api.DownloadMedia) if err != nil { panic("Error getting content: " + err.Error()) } diff --git a/internal/webserver/handler_login.go b/internal/webserver/handler_login.go index 8be140c..6169c1e 100644 --- a/internal/webserver/handler_login.go +++ b/internal/webserver/handler_login.go @@ -76,7 +76,7 @@ func (app *Application) after_login(w http.ResponseWriter, r *http.Request, api return } panic_if(app.Profile.SaveUser(&user)) // TODO: handle conflicting users - panic_if(app.Profile.DownloadUserContentFor(&user, &app.API)) + panic_if(app.Profile.DownloadUserContentFor(&user, app.API.DownloadMedia)) // Now that the user is scraped for sure, set them as the logged-in user err = app.SetActiveUser(api.UserHandle) diff --git a/internal/webserver/handler_user_feed.go b/internal/webserver/handler_user_feed.go index a24e4e0..6d343bc 100644 --- a/internal/webserver/handler_user_feed.go +++ b/internal/webserver/handler_user_feed.go @@ -25,7 +25,7 @@ func (app *Application) UserFeed(w http.ResponseWriter, r *http.Request) { return } panic_if(app.Profile.SaveUser(&user)) // TODO: handle conflicting users - panic_if(app.Profile.DownloadUserContentFor(&user, &app.API)) + panic_if(app.Profile.DownloadUserContentFor(&user, app.API.DownloadMedia)) } else if err != nil { panic(err) } @@ -50,7 +50,7 @@ func (app *Application) UserFeed(w http.ResponseWriter, r *http.Request) { user, err = app.API.GetUser(scraper.UserHandle(parts[0])) panic_if(err) panic_if(app.Profile.SaveUser(&user)) // TODO: handle conflicting users - panic_if(app.Profile.DownloadUserContentFor(&user, &app.API)) + panic_if(app.Profile.DownloadUserContentFor(&user, app.API.DownloadMedia)) if len(parts) == 1 { // The URL is just the user handle // Run scraper diff --git a/pkg/persistence/compound_ssf_queries.go b/pkg/persistence/compound_ssf_queries.go index 6325923..853cbb0 100644 --- a/pkg/persistence/compound_ssf_queries.go +++ b/pkg/persistence/compound_ssf_queries.go @@ -133,9 +133,9 @@ const ( type CursorResult struct { Tweet Retweet - Chrono int `db:"chrono"` - LikeSortOrder int `db:"likes_sort_order"` - BookmarkSortOrder int `db:"bookmarks_sort_order"` + Chrono int `db:"chrono"` + LikeSortOrder int `db:"likes_sort_order"` + BookmarkSortOrder int `db:"bookmarks_sort_order"` ByUserID UserID `db:"by_user_id"` } diff --git a/pkg/persistence/media_download.go b/pkg/persistence/media_download.go index 5169e1e..dc14185 100644 --- a/pkg/persistence/media_download.go +++ b/pkg/persistence/media_download.go @@ -4,7 +4,7 @@ import ( "errors" "fmt" "os" - "path" + "path/filepath" . "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper" ) @@ -13,8 +13,10 @@ type MediaDownloader interface { Curl(url string, outpath string) error } +type DownloadFunc func(url string) ([]byte, error) + type DefaultDownloader struct { - *API + Download DownloadFunc } // Download a file over HTTP and save it. @@ -23,13 +25,13 @@ type DefaultDownloader struct { // - url: the remote file to download // - outpath: the path on disk to save it to func (d DefaultDownloader) Curl(url string, outpath string) error { - data, err := d.API.DownloadMedia(url) + data, err := d.Download(url) if err != nil { return fmt.Errorf("downloading %q:\n %w", url, err) } // Ensure the output directory exists - dirname := path.Dir(outpath) + dirname := filepath.Dir(outpath) if dirname != "." { err = os.MkdirAll(dirname, 0755) if err != nil { @@ -48,7 +50,7 @@ func (d DefaultDownloader) Curl(url string, outpath string) error { // Downloads an Image, and if successful, marks it as downloaded in the DB // DUPE: download-image func (p Profile) download_tweet_image(img *Image, downloader MediaDownloader) error { - outfile := path.Join(p.ProfileDir, "images", img.LocalFilename) + outfile := filepath.Join(p.ProfileDir, "images", img.LocalFilename) err := downloader.Curl(img.RemoteURL, outfile) if err != nil { return fmt.Errorf("Error downloading tweet image (TweetID %d):\n %w", img.TweetID, err) @@ -61,7 +63,7 @@ func (p Profile) download_tweet_image(img *Image, downloader MediaDownloader) er // DUPE: download-video func (p Profile) download_tweet_video(v *Video, downloader MediaDownloader) error { // Download the video - outfile := path.Join(p.ProfileDir, "videos", v.LocalFilename) + outfile := filepath.Join(p.ProfileDir, "videos", v.LocalFilename) err := downloader.Curl(v.RemoteURL, outfile) if errors.Is(err, ErrorDMCA) { @@ -74,7 +76,7 @@ func (p Profile) download_tweet_video(v *Video, downloader MediaDownloader) erro } // Download the thumbnail - outfile = path.Join(p.ProfileDir, "video_thumbnails", v.ThumbnailLocalPath) + outfile = filepath.Join(p.ProfileDir, "video_thumbnails", v.ThumbnailLocalPath) err = downloader.Curl(v.ThumbnailRemoteUrl, outfile) if err != nil { v.IsDownloaded = false @@ -88,7 +90,7 @@ func (p Profile) download_tweet_video(v *Video, downloader MediaDownloader) erro // DUPE: download-link-thumbnail func (p Profile) download_link_thumbnail(url *Url, downloader MediaDownloader) error { if url.HasCard && url.HasThumbnail { - outfile := path.Join(p.ProfileDir, "link_preview_images", url.ThumbnailLocalPath) + outfile := filepath.Join(p.ProfileDir, "link_preview_images", url.ThumbnailLocalPath) err := downloader.Curl(url.ThumbnailRemoteUrl, outfile) if err != nil { return fmt.Errorf("Error downloading link thumbnail (TweetID %d):\n %w", url.TweetID, err) @@ -100,8 +102,8 @@ func (p Profile) download_link_thumbnail(url *Url, downloader MediaDownloader) e // Download a tweet's video and picture content. // Wraps the `DownloadTweetContentWithInjector` method with the default (i.e., real) downloader. -func (p Profile) DownloadTweetContentFor(t *Tweet, api *API) error { - return p.DownloadTweetContentWithInjector(t, DefaultDownloader{API: api}) +func (p Profile) DownloadTweetContentFor(t *Tweet, download DownloadFunc) error { + return p.DownloadTweetContentWithInjector(t, DefaultDownloader{Download: download}) } // Enable injecting a custom MediaDownloader (i.e., for testing) @@ -141,8 +143,8 @@ func (p Profile) DownloadTweetContentWithInjector(t *Tweet, downloader MediaDown } // Download a user's banner and profile images -func (p Profile) DownloadUserContentFor(u *User, api *API) error { - return p.DownloadUserContentWithInjector(u, DefaultDownloader{API: api}) +func (p Profile) DownloadUserContentFor(u *User, download DownloadFunc) error { + return p.DownloadUserContentWithInjector(u, DefaultDownloader{Download: download}) } // Enable injecting a custom MediaDownloader (i.e., for testing) @@ -186,14 +188,14 @@ func (p Profile) DownloadUserContentWithInjector(u *User, downloader MediaDownlo // Download a User's tiny profile image, if it hasn't been downloaded yet. // If it has been downloaded, do nothing. // If this user should have a big profile picture, defer to the regular `DownloadUserContentFor` method. -func (p Profile) DownloadUserProfileImageTiny(u *User, api *API) error { +func (p Profile) DownloadUserProfileImageTiny(u *User, download DownloadFunc) error { if p.IsFollowing(*u) { - return p.DownloadUserContentFor(u, api) + return p.DownloadUserContentFor(u, download) } - d := DefaultDownloader{API: api} + d := DefaultDownloader{Download: download} - outfile := path.Join(p.ProfileDir, "profile_images", u.GetTinyProfileImageLocalPath()) + outfile := filepath.Join(p.ProfileDir, "profile_images", u.GetTinyProfileImageLocalPath()) if file_exists(outfile) { return nil } diff --git a/pkg/persistence/tweet_trove_queries.go b/pkg/persistence/tweet_trove_queries.go index 7407e9a..01e958f 100644 --- a/pkg/persistence/tweet_trove_queries.go +++ b/pkg/persistence/tweet_trove_queries.go @@ -64,7 +64,7 @@ func (p Profile) SaveTweetTrove(trove TweetTrove, should_download bool, api *API if should_download { // Download their tiny profile image - err = p.DownloadUserProfileImageTiny(&u, api) + err = p.DownloadUserProfileImageTiny(&u, api.DownloadMedia) if errors.Is(err, ErrRequestTimeout) { // Forget about it; if it's important someone will try again fmt.Printf("Failed to @%s's tiny profile image (%q): %s\n", u.Handle, u.ProfileImageUrl, err.Error()) @@ -88,7 +88,7 @@ func (p Profile) SaveTweetTrove(trove TweetTrove, should_download bool, api *API } if should_download { - err = p.DownloadTweetContentFor(&t, api) + err = p.DownloadTweetContentFor(&t, api.DownloadMedia) if errors.Is(err, ErrRequestTimeout) || errors.Is(err, ErrMediaDownload404) { // Forget about it; if it's important someone will try again fmt.Printf("Failed to download tweet ID %d: %s\n", t.ID, err.Error()) @@ -147,7 +147,7 @@ func (p Profile) SaveTweetTrove(trove TweetTrove, should_download bool, api *API // Download content if needed if should_download { - downloader := DefaultDownloader{API: api} + downloader := DefaultDownloader{Download: api.DownloadMedia} for _, img := range m.Images { // Check if it's already downloaded diff --git a/pkg/scraper/api_request_utils.go b/pkg/scraper/api_request_utils.go index a4689c5..2105f63 100644 --- a/pkg/scraper/api_request_utils.go +++ b/pkg/scraper/api_request_utils.go @@ -1,6 +1,7 @@ package scraper import ( + "bytes" "encoding/json" "errors" "fmt" @@ -10,7 +11,6 @@ import ( "net/http/cookiejar" "net/url" "strings" - "bytes" "time" log "github.com/sirupsen/logrus"