diff --git a/pkg/persistence/tweet_trove_queries.go b/pkg/persistence/tweet_trove_queries.go index bd86b2c..9c14bcf 100644 --- a/pkg/persistence/tweet_trove_queries.go +++ b/pkg/persistence/tweet_trove_queries.go @@ -65,7 +65,7 @@ func (p Profile) SaveTweetTrove(trove TweetTrove, should_download bool) { if should_download { err = p.DownloadTweetContentFor(&t) - if errors.Is(err, ErrRequestTimeout) { + if errors.Is(err, ErrRequestTimeout) || errors.Is(err, ErrMediaDownload404) { // Forget about it; if it's important someone will try again fmt.Printf("Failed to download tweet ID %d: %s\n", t.ID, err.Error()) } else if err != nil { diff --git a/pkg/scraper/api_errors.go b/pkg/scraper/api_errors.go index ff7d717..a5ff6ea 100644 --- a/pkg/scraper/api_errors.go +++ b/pkg/scraper/api_errors.go @@ -5,12 +5,13 @@ import ( ) var ( - END_OF_FEED = errors.New("End of feed") - ErrDoesntExist = errors.New("Doesn't exist") - EXTERNAL_API_ERROR = errors.New("Unexpected result from external API") - ErrorIsTombstone = errors.New("tweet is a tombstone") - ErrRateLimited = errors.New("rate limited") - ErrorDMCA = errors.New("video is DMCAed, unable to download (HTTP 403 Forbidden)") + END_OF_FEED = errors.New("End of feed") + ErrDoesntExist = errors.New("Doesn't exist") + EXTERNAL_API_ERROR = errors.New("Unexpected result from external API") + ErrorIsTombstone = errors.New("tweet is a tombstone") + ErrRateLimited = errors.New("rate limited") + ErrorDMCA = errors.New("video is DMCAed, unable to download (HTTP 403 Forbidden)") + ErrMediaDownload404 = errors.New("media download HTTP 404") // These are not API errors, but network errors generally ErrNoInternet = errors.New("no internet connection") diff --git a/pkg/scraper/api_request_utils.go b/pkg/scraper/api_request_utils.go index 2c18a5d..33f1b5a 100644 --- a/pkg/scraper/api_request_utils.go +++ b/pkg/scraper/api_request_utils.go @@ -452,6 +452,11 @@ func (api *API) DownloadMedia(remote_url string) ([]byte, error) { // Not a DCMA; fall through } + if resp.StatusCode == 404 { + log.Debugf("Media download 404 (%s)", remote_url) + return body, ErrMediaDownload404 + } + if resp.StatusCode != 200 { url, err := url.Parse(remote_url) if err != nil { diff --git a/pkg/scraper/url.go b/pkg/scraper/url.go index ab1beef..e8c2dde 100644 --- a/pkg/scraper/url.go +++ b/pkg/scraper/url.go @@ -2,10 +2,10 @@ package scraper import ( "fmt" + "log" "net/url" "path" "regexp" - "log" ) type Url struct {