diff --git a/pkg/persistence/dm_trove_queries.go b/pkg/persistence/dm_trove_queries.go index ec1f029..013d054 100644 --- a/pkg/persistence/dm_trove_queries.go +++ b/pkg/persistence/dm_trove_queries.go @@ -53,7 +53,10 @@ func (p Profile) SaveDMTrove(trove DMTrove, should_download bool) { // DUPE: download-image outfile := path.Join(p.ProfileDir, "images", img.LocalFilename) err = downloader.Curl(img.RemoteURL, outfile) - if err != nil { + if errors.Is(err, ErrRequestTimeout) { + // Forget about it; if it's important someone will try again + fmt.Printf("Failed to download image %q: %s\n", img.RemoteURL, err.Error()) + } else if err != nil { panic(fmt.Errorf("downloading image %q on DM message %d:\n %w", img.RemoteURL, m.ID, err)) } _, err = p.DB.NamedExec(`update chat_message_images set is_downloaded = 1 where id = :id`, img) @@ -84,7 +87,10 @@ func (p Profile) SaveDMTrove(trove DMTrove, should_download bool) { outfile := path.Join(p.ProfileDir, "videos", vid.LocalFilename) err = downloader.Curl(vid.RemoteURL, outfile) - if errors.Is(err, ErrorDMCA) { + if errors.Is(err, ErrRequestTimeout) { + // Forget about it; if it's important someone will try again + fmt.Printf("Failed to download video %q: %s\n", vid.RemoteURL, err.Error()) + } else if errors.Is(err, ErrorDMCA) { vid.IsDownloaded = false vid.IsBlockedByDMCA = true } else if err != nil { @@ -96,7 +102,10 @@ func (p Profile) SaveDMTrove(trove DMTrove, should_download bool) { // Download the thumbnail outfile = path.Join(p.ProfileDir, "video_thumbnails", vid.ThumbnailLocalPath) err = downloader.Curl(vid.ThumbnailRemoteUrl, outfile) - if err != nil { + if errors.Is(err, ErrRequestTimeout) { + // Forget about it; if it's important someone will try again + fmt.Printf("Failed to download video thumbnail %q: %s\n", vid.ThumbnailRemoteUrl, err.Error()) + } else if err != nil { panic(fmt.Errorf("Error downloading video thumbnail (DMMessageID %d):\n %w", vid.DMMessageID, err)) } @@ -114,7 +123,10 @@ func (p Profile) SaveDMTrove(trove DMTrove, should_download bool) { if url.HasCard && url.HasThumbnail { outfile := path.Join(p.ProfileDir, "link_preview_images", url.ThumbnailLocalPath) err := downloader.Curl(url.ThumbnailRemoteUrl, outfile) - if err != nil { + if errors.Is(err, ErrRequestTimeout) { + // Forget about it; if it's important someone will try again + fmt.Printf("Failed to download link thumbnail %q: %s\n", url.ThumbnailRemoteUrl, err.Error()) + } else if err != nil { panic(fmt.Errorf("downloading link thumbnail %q on DM message %d:\n %w", url.ThumbnailRemoteUrl, m.ID, err)) } } diff --git a/pkg/persistence/tweet_trove_queries.go b/pkg/persistence/tweet_trove_queries.go index 60d530e..9eed8c4 100644 --- a/pkg/persistence/tweet_trove_queries.go +++ b/pkg/persistence/tweet_trove_queries.go @@ -2,6 +2,7 @@ package persistence import ( "fmt" + "errors" . "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper" ) @@ -40,7 +41,10 @@ func (p Profile) SaveTweetTrove(trove TweetTrove, should_download bool) { if should_download { // Download their tiny profile image err = p.DownloadUserProfileImageTiny(&u) - if err != nil { + if errors.Is(err, ErrRequestTimeout) { + // Forget about it; if it's important someone will try again + fmt.Printf("Failed to @%s's tiny profile image (%q): %s\n", u.Handle, u.ProfileImageUrl, err.Error()) + } else if err != nil { panic(fmt.Errorf("Error downloading user content for user with ID %d and handle %s:\n %w", u.ID, u.Handle, err)) } } @@ -61,7 +65,10 @@ func (p Profile) SaveTweetTrove(trove TweetTrove, should_download bool) { if should_download { err = p.DownloadTweetContentFor(&t) - if err != nil { + if errors.Is(err, ErrRequestTimeout) { + // Forget about it; if it's important someone will try again + fmt.Printf("Failed to download tweet ID %d: %s\n", t.ID, err.Error()) + } else if err != nil { panic(fmt.Errorf("Error downloading tweet content for tweet ID %d:\n %w", t.ID, err)) } } diff --git a/pkg/scraper/api_errors.go b/pkg/scraper/api_errors.go index 4acd14b..1d2052a 100644 --- a/pkg/scraper/api_errors.go +++ b/pkg/scraper/api_errors.go @@ -11,4 +11,5 @@ var ( ErrorIsTombstone = errors.New("tweet is a tombstone") ErrRateLimited = errors.New("rate limited") ErrorDMCA = errors.New("video is DMCAed, unable to download (HTTP 403 Forbidden)") + ErrRequestTimeout = errors.New("request timed out") ) diff --git a/pkg/scraper/api_request_utils.go b/pkg/scraper/api_request_utils.go index 85b18e4..721236c 100644 --- a/pkg/scraper/api_request_utils.go +++ b/pkg/scraper/api_request_utils.go @@ -144,8 +144,8 @@ func (api *API) update_csrf_token() { } } -func (api *API) do_http_POST(url string, body string, result interface{}) error { - req, err := http.NewRequest("POST", url, strings.NewReader(body)) +func (api *API) do_http_POST(remote_url string, body string, result interface{}) error { + req, err := http.NewRequest("POST", remote_url, strings.NewReader(body)) if err != nil { return fmt.Errorf("Error initializing HTTP POST request:\n %w", err) } @@ -161,7 +161,9 @@ func (api *API) do_http_POST(url string, body string, result interface{}) error log.Debug(" " + body) resp, err := api.Client.Do(req) - if err != nil { + if urlErr, ok := err.(*url.Error); ok && urlErr.Timeout() { + return fmt.Errorf("POST %q:\n %w", remote_url, ErrRequestTimeout) + } else if err != nil { return fmt.Errorf("Error executing HTTP POST request:\n %w", err) } api.update_csrf_token() @@ -190,8 +192,8 @@ func (api *API) do_http_POST(url string, body string, result interface{}) error return nil } -func (api *API) do_http(url string, cursor string, result interface{}) error { - req, err := http.NewRequest("GET", url, nil) +func (api *API) do_http(remote_url string, cursor string, result interface{}) error { + req, err := http.NewRequest("GET", remote_url, nil) if err != nil { return fmt.Errorf("Error initializing HTTP GET request:\n %w", err) } @@ -210,7 +212,9 @@ func (api *API) do_http(url string, cursor string, result interface{}) error { } resp, err := api.Client.Do(req) - if err != nil { + if urlErr, ok := err.(*url.Error); ok && urlErr.Timeout() { + return fmt.Errorf("GET %q:\n %w", remote_url, ErrRequestTimeout) + } else if err != nil { return fmt.Errorf("Error executing HTTP request:\n %w", err) } defer resp.Body.Close() @@ -386,7 +390,9 @@ func (api *API) DownloadMedia(remote_url string) ([]byte, error) { // req.Header.Set("Referer", "https://twitter.com/") // DM embedded images require this header resp, err := api.Client.Do(req) - if err != nil { + if urlErr, ok := err.(*url.Error); ok && urlErr.Timeout() { + return []byte{}, fmt.Errorf("GET %q:\n %w", remote_url, ErrRequestTimeout) + } else if err != nil { return []byte{}, fmt.Errorf("Error executing HTTP request:\n %w", err) } defer resp.Body.Close()