REFACTOR: change media downloader dependency injection to take just the download function rather than a whole API object
This commit is contained in:
parent
ac763a97a6
commit
0c1d853f55
@ -522,7 +522,7 @@ func download_tweet_content(tweet_identifier string) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
panic(fmt.Errorf("Couldn't get tweet (ID %d) from database:\n %w", tweet_id, err))
|
panic(fmt.Errorf("Couldn't get tweet (ID %d) from database:\n %w", tweet_id, err))
|
||||||
}
|
}
|
||||||
err = profile.DownloadTweetContentFor(&tweet, &api)
|
err = profile.DownloadTweetContentFor(&tweet, api.DownloadMedia)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic("Error getting content: " + err.Error())
|
panic("Error getting content: " + err.Error())
|
||||||
}
|
}
|
||||||
@ -533,7 +533,7 @@ func download_user_content(handle scraper.UserHandle) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
panic("Couldn't get the user from database: " + err.Error())
|
panic("Couldn't get the user from database: " + err.Error())
|
||||||
}
|
}
|
||||||
err = profile.DownloadUserContentFor(&user, &api)
|
err = profile.DownloadUserContentFor(&user, api.DownloadMedia)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic("Error getting content: " + err.Error())
|
panic("Error getting content: " + err.Error())
|
||||||
}
|
}
|
||||||
|
@ -76,7 +76,7 @@ func (app *Application) after_login(w http.ResponseWriter, r *http.Request, api
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
panic_if(app.Profile.SaveUser(&user)) // TODO: handle conflicting users
|
panic_if(app.Profile.SaveUser(&user)) // TODO: handle conflicting users
|
||||||
panic_if(app.Profile.DownloadUserContentFor(&user, &app.API))
|
panic_if(app.Profile.DownloadUserContentFor(&user, app.API.DownloadMedia))
|
||||||
|
|
||||||
// Now that the user is scraped for sure, set them as the logged-in user
|
// Now that the user is scraped for sure, set them as the logged-in user
|
||||||
err = app.SetActiveUser(api.UserHandle)
|
err = app.SetActiveUser(api.UserHandle)
|
||||||
|
@ -25,7 +25,7 @@ func (app *Application) UserFeed(w http.ResponseWriter, r *http.Request) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
panic_if(app.Profile.SaveUser(&user)) // TODO: handle conflicting users
|
panic_if(app.Profile.SaveUser(&user)) // TODO: handle conflicting users
|
||||||
panic_if(app.Profile.DownloadUserContentFor(&user, &app.API))
|
panic_if(app.Profile.DownloadUserContentFor(&user, app.API.DownloadMedia))
|
||||||
} else if err != nil {
|
} else if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
@ -50,7 +50,7 @@ func (app *Application) UserFeed(w http.ResponseWriter, r *http.Request) {
|
|||||||
user, err = app.API.GetUser(scraper.UserHandle(parts[0]))
|
user, err = app.API.GetUser(scraper.UserHandle(parts[0]))
|
||||||
panic_if(err)
|
panic_if(err)
|
||||||
panic_if(app.Profile.SaveUser(&user)) // TODO: handle conflicting users
|
panic_if(app.Profile.SaveUser(&user)) // TODO: handle conflicting users
|
||||||
panic_if(app.Profile.DownloadUserContentFor(&user, &app.API))
|
panic_if(app.Profile.DownloadUserContentFor(&user, app.API.DownloadMedia))
|
||||||
|
|
||||||
if len(parts) == 1 { // The URL is just the user handle
|
if len(parts) == 1 { // The URL is just the user handle
|
||||||
// Run scraper
|
// Run scraper
|
||||||
|
@ -4,7 +4,7 @@ import (
|
|||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"path"
|
"path/filepath"
|
||||||
|
|
||||||
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||||
)
|
)
|
||||||
@ -13,8 +13,10 @@ type MediaDownloader interface {
|
|||||||
Curl(url string, outpath string) error
|
Curl(url string, outpath string) error
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type DownloadFunc func(url string) ([]byte, error)
|
||||||
|
|
||||||
type DefaultDownloader struct {
|
type DefaultDownloader struct {
|
||||||
*API
|
Download DownloadFunc
|
||||||
}
|
}
|
||||||
|
|
||||||
// Download a file over HTTP and save it.
|
// Download a file over HTTP and save it.
|
||||||
@ -23,13 +25,13 @@ type DefaultDownloader struct {
|
|||||||
// - url: the remote file to download
|
// - url: the remote file to download
|
||||||
// - outpath: the path on disk to save it to
|
// - outpath: the path on disk to save it to
|
||||||
func (d DefaultDownloader) Curl(url string, outpath string) error {
|
func (d DefaultDownloader) Curl(url string, outpath string) error {
|
||||||
data, err := d.API.DownloadMedia(url)
|
data, err := d.Download(url)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("downloading %q:\n %w", url, err)
|
return fmt.Errorf("downloading %q:\n %w", url, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ensure the output directory exists
|
// Ensure the output directory exists
|
||||||
dirname := path.Dir(outpath)
|
dirname := filepath.Dir(outpath)
|
||||||
if dirname != "." {
|
if dirname != "." {
|
||||||
err = os.MkdirAll(dirname, 0755)
|
err = os.MkdirAll(dirname, 0755)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -48,7 +50,7 @@ func (d DefaultDownloader) Curl(url string, outpath string) error {
|
|||||||
// Downloads an Image, and if successful, marks it as downloaded in the DB
|
// Downloads an Image, and if successful, marks it as downloaded in the DB
|
||||||
// DUPE: download-image
|
// DUPE: download-image
|
||||||
func (p Profile) download_tweet_image(img *Image, downloader MediaDownloader) error {
|
func (p Profile) download_tweet_image(img *Image, downloader MediaDownloader) error {
|
||||||
outfile := path.Join(p.ProfileDir, "images", img.LocalFilename)
|
outfile := filepath.Join(p.ProfileDir, "images", img.LocalFilename)
|
||||||
err := downloader.Curl(img.RemoteURL, outfile)
|
err := downloader.Curl(img.RemoteURL, outfile)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("Error downloading tweet image (TweetID %d):\n %w", img.TweetID, err)
|
return fmt.Errorf("Error downloading tweet image (TweetID %d):\n %w", img.TweetID, err)
|
||||||
@ -61,7 +63,7 @@ func (p Profile) download_tweet_image(img *Image, downloader MediaDownloader) er
|
|||||||
// DUPE: download-video
|
// DUPE: download-video
|
||||||
func (p Profile) download_tweet_video(v *Video, downloader MediaDownloader) error {
|
func (p Profile) download_tweet_video(v *Video, downloader MediaDownloader) error {
|
||||||
// Download the video
|
// Download the video
|
||||||
outfile := path.Join(p.ProfileDir, "videos", v.LocalFilename)
|
outfile := filepath.Join(p.ProfileDir, "videos", v.LocalFilename)
|
||||||
err := downloader.Curl(v.RemoteURL, outfile)
|
err := downloader.Curl(v.RemoteURL, outfile)
|
||||||
|
|
||||||
if errors.Is(err, ErrorDMCA) {
|
if errors.Is(err, ErrorDMCA) {
|
||||||
@ -74,7 +76,7 @@ func (p Profile) download_tweet_video(v *Video, downloader MediaDownloader) erro
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Download the thumbnail
|
// Download the thumbnail
|
||||||
outfile = path.Join(p.ProfileDir, "video_thumbnails", v.ThumbnailLocalPath)
|
outfile = filepath.Join(p.ProfileDir, "video_thumbnails", v.ThumbnailLocalPath)
|
||||||
err = downloader.Curl(v.ThumbnailRemoteUrl, outfile)
|
err = downloader.Curl(v.ThumbnailRemoteUrl, outfile)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
v.IsDownloaded = false
|
v.IsDownloaded = false
|
||||||
@ -88,7 +90,7 @@ func (p Profile) download_tweet_video(v *Video, downloader MediaDownloader) erro
|
|||||||
// DUPE: download-link-thumbnail
|
// DUPE: download-link-thumbnail
|
||||||
func (p Profile) download_link_thumbnail(url *Url, downloader MediaDownloader) error {
|
func (p Profile) download_link_thumbnail(url *Url, downloader MediaDownloader) error {
|
||||||
if url.HasCard && url.HasThumbnail {
|
if url.HasCard && url.HasThumbnail {
|
||||||
outfile := path.Join(p.ProfileDir, "link_preview_images", url.ThumbnailLocalPath)
|
outfile := filepath.Join(p.ProfileDir, "link_preview_images", url.ThumbnailLocalPath)
|
||||||
err := downloader.Curl(url.ThumbnailRemoteUrl, outfile)
|
err := downloader.Curl(url.ThumbnailRemoteUrl, outfile)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("Error downloading link thumbnail (TweetID %d):\n %w", url.TweetID, err)
|
return fmt.Errorf("Error downloading link thumbnail (TweetID %d):\n %w", url.TweetID, err)
|
||||||
@ -100,8 +102,8 @@ func (p Profile) download_link_thumbnail(url *Url, downloader MediaDownloader) e
|
|||||||
|
|
||||||
// Download a tweet's video and picture content.
|
// Download a tweet's video and picture content.
|
||||||
// Wraps the `DownloadTweetContentWithInjector` method with the default (i.e., real) downloader.
|
// Wraps the `DownloadTweetContentWithInjector` method with the default (i.e., real) downloader.
|
||||||
func (p Profile) DownloadTweetContentFor(t *Tweet, api *API) error {
|
func (p Profile) DownloadTweetContentFor(t *Tweet, download DownloadFunc) error {
|
||||||
return p.DownloadTweetContentWithInjector(t, DefaultDownloader{API: api})
|
return p.DownloadTweetContentWithInjector(t, DefaultDownloader{Download: download})
|
||||||
}
|
}
|
||||||
|
|
||||||
// Enable injecting a custom MediaDownloader (i.e., for testing)
|
// Enable injecting a custom MediaDownloader (i.e., for testing)
|
||||||
@ -141,8 +143,8 @@ func (p Profile) DownloadTweetContentWithInjector(t *Tweet, downloader MediaDown
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Download a user's banner and profile images
|
// Download a user's banner and profile images
|
||||||
func (p Profile) DownloadUserContentFor(u *User, api *API) error {
|
func (p Profile) DownloadUserContentFor(u *User, download DownloadFunc) error {
|
||||||
return p.DownloadUserContentWithInjector(u, DefaultDownloader{API: api})
|
return p.DownloadUserContentWithInjector(u, DefaultDownloader{Download: download})
|
||||||
}
|
}
|
||||||
|
|
||||||
// Enable injecting a custom MediaDownloader (i.e., for testing)
|
// Enable injecting a custom MediaDownloader (i.e., for testing)
|
||||||
@ -186,14 +188,14 @@ func (p Profile) DownloadUserContentWithInjector(u *User, downloader MediaDownlo
|
|||||||
// Download a User's tiny profile image, if it hasn't been downloaded yet.
|
// Download a User's tiny profile image, if it hasn't been downloaded yet.
|
||||||
// If it has been downloaded, do nothing.
|
// If it has been downloaded, do nothing.
|
||||||
// If this user should have a big profile picture, defer to the regular `DownloadUserContentFor` method.
|
// If this user should have a big profile picture, defer to the regular `DownloadUserContentFor` method.
|
||||||
func (p Profile) DownloadUserProfileImageTiny(u *User, api *API) error {
|
func (p Profile) DownloadUserProfileImageTiny(u *User, download DownloadFunc) error {
|
||||||
if p.IsFollowing(*u) {
|
if p.IsFollowing(*u) {
|
||||||
return p.DownloadUserContentFor(u, api)
|
return p.DownloadUserContentFor(u, download)
|
||||||
}
|
}
|
||||||
|
|
||||||
d := DefaultDownloader{API: api}
|
d := DefaultDownloader{Download: download}
|
||||||
|
|
||||||
outfile := path.Join(p.ProfileDir, "profile_images", u.GetTinyProfileImageLocalPath())
|
outfile := filepath.Join(p.ProfileDir, "profile_images", u.GetTinyProfileImageLocalPath())
|
||||||
if file_exists(outfile) {
|
if file_exists(outfile) {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -64,7 +64,7 @@ func (p Profile) SaveTweetTrove(trove TweetTrove, should_download bool, api *API
|
|||||||
|
|
||||||
if should_download {
|
if should_download {
|
||||||
// Download their tiny profile image
|
// Download their tiny profile image
|
||||||
err = p.DownloadUserProfileImageTiny(&u, api)
|
err = p.DownloadUserProfileImageTiny(&u, api.DownloadMedia)
|
||||||
if errors.Is(err, ErrRequestTimeout) {
|
if errors.Is(err, ErrRequestTimeout) {
|
||||||
// Forget about it; if it's important someone will try again
|
// Forget about it; if it's important someone will try again
|
||||||
fmt.Printf("Failed to @%s's tiny profile image (%q): %s\n", u.Handle, u.ProfileImageUrl, err.Error())
|
fmt.Printf("Failed to @%s's tiny profile image (%q): %s\n", u.Handle, u.ProfileImageUrl, err.Error())
|
||||||
@ -88,7 +88,7 @@ func (p Profile) SaveTweetTrove(trove TweetTrove, should_download bool, api *API
|
|||||||
}
|
}
|
||||||
|
|
||||||
if should_download {
|
if should_download {
|
||||||
err = p.DownloadTweetContentFor(&t, api)
|
err = p.DownloadTweetContentFor(&t, api.DownloadMedia)
|
||||||
if errors.Is(err, ErrRequestTimeout) || errors.Is(err, ErrMediaDownload404) {
|
if errors.Is(err, ErrRequestTimeout) || errors.Is(err, ErrMediaDownload404) {
|
||||||
// Forget about it; if it's important someone will try again
|
// Forget about it; if it's important someone will try again
|
||||||
fmt.Printf("Failed to download tweet ID %d: %s\n", t.ID, err.Error())
|
fmt.Printf("Failed to download tweet ID %d: %s\n", t.ID, err.Error())
|
||||||
@ -147,7 +147,7 @@ func (p Profile) SaveTweetTrove(trove TweetTrove, should_download bool, api *API
|
|||||||
|
|
||||||
// Download content if needed
|
// Download content if needed
|
||||||
if should_download {
|
if should_download {
|
||||||
downloader := DefaultDownloader{API: api}
|
downloader := DefaultDownloader{Download: api.DownloadMedia}
|
||||||
|
|
||||||
for _, img := range m.Images {
|
for _, img := range m.Images {
|
||||||
// Check if it's already downloaded
|
// Check if it's already downloaded
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
package scraper
|
package scraper
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
@ -10,7 +11,6 @@ import (
|
|||||||
"net/http/cookiejar"
|
"net/http/cookiejar"
|
||||||
"net/url"
|
"net/url"
|
||||||
"strings"
|
"strings"
|
||||||
"bytes"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user