Handle media download 404s gracefully
This commit is contained in:
parent
d0c23b392b
commit
8aca12695b
@ -65,7 +65,7 @@ func (p Profile) SaveTweetTrove(trove TweetTrove, should_download bool) {
|
||||
|
||||
if should_download {
|
||||
err = p.DownloadTweetContentFor(&t)
|
||||
if errors.Is(err, ErrRequestTimeout) {
|
||||
if errors.Is(err, ErrRequestTimeout) || errors.Is(err, ErrMediaDownload404) {
|
||||
// Forget about it; if it's important someone will try again
|
||||
fmt.Printf("Failed to download tweet ID %d: %s\n", t.ID, err.Error())
|
||||
} else if err != nil {
|
||||
|
@ -11,6 +11,7 @@ var (
|
||||
ErrorIsTombstone = errors.New("tweet is a tombstone")
|
||||
ErrRateLimited = errors.New("rate limited")
|
||||
ErrorDMCA = errors.New("video is DMCAed, unable to download (HTTP 403 Forbidden)")
|
||||
ErrMediaDownload404 = errors.New("media download HTTP 404")
|
||||
|
||||
// These are not API errors, but network errors generally
|
||||
ErrNoInternet = errors.New("no internet connection")
|
||||
|
@ -452,6 +452,11 @@ func (api *API) DownloadMedia(remote_url string) ([]byte, error) {
|
||||
// Not a DCMA; fall through
|
||||
}
|
||||
|
||||
if resp.StatusCode == 404 {
|
||||
log.Debugf("Media download 404 (%s)", remote_url)
|
||||
return body, ErrMediaDownload404
|
||||
}
|
||||
|
||||
if resp.StatusCode != 200 {
|
||||
url, err := url.Parse(remote_url)
|
||||
if err != nil {
|
||||
|
@ -2,10 +2,10 @@ package scraper
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"net/url"
|
||||
"path"
|
||||
"regexp"
|
||||
"log"
|
||||
)
|
||||
|
||||
type Url struct {
|
||||
|
Loading…
x
Reference in New Issue
Block a user