Handle media download 404s gracefully
This commit is contained in:
parent
d0c23b392b
commit
8aca12695b
@ -65,7 +65,7 @@ func (p Profile) SaveTweetTrove(trove TweetTrove, should_download bool) {
|
|||||||
|
|
||||||
if should_download {
|
if should_download {
|
||||||
err = p.DownloadTweetContentFor(&t)
|
err = p.DownloadTweetContentFor(&t)
|
||||||
if errors.Is(err, ErrRequestTimeout) {
|
if errors.Is(err, ErrRequestTimeout) || errors.Is(err, ErrMediaDownload404) {
|
||||||
// Forget about it; if it's important someone will try again
|
// Forget about it; if it's important someone will try again
|
||||||
fmt.Printf("Failed to download tweet ID %d: %s\n", t.ID, err.Error())
|
fmt.Printf("Failed to download tweet ID %d: %s\n", t.ID, err.Error())
|
||||||
} else if err != nil {
|
} else if err != nil {
|
||||||
|
@ -5,12 +5,13 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
END_OF_FEED = errors.New("End of feed")
|
END_OF_FEED = errors.New("End of feed")
|
||||||
ErrDoesntExist = errors.New("Doesn't exist")
|
ErrDoesntExist = errors.New("Doesn't exist")
|
||||||
EXTERNAL_API_ERROR = errors.New("Unexpected result from external API")
|
EXTERNAL_API_ERROR = errors.New("Unexpected result from external API")
|
||||||
ErrorIsTombstone = errors.New("tweet is a tombstone")
|
ErrorIsTombstone = errors.New("tweet is a tombstone")
|
||||||
ErrRateLimited = errors.New("rate limited")
|
ErrRateLimited = errors.New("rate limited")
|
||||||
ErrorDMCA = errors.New("video is DMCAed, unable to download (HTTP 403 Forbidden)")
|
ErrorDMCA = errors.New("video is DMCAed, unable to download (HTTP 403 Forbidden)")
|
||||||
|
ErrMediaDownload404 = errors.New("media download HTTP 404")
|
||||||
|
|
||||||
// These are not API errors, but network errors generally
|
// These are not API errors, but network errors generally
|
||||||
ErrNoInternet = errors.New("no internet connection")
|
ErrNoInternet = errors.New("no internet connection")
|
||||||
|
@ -452,6 +452,11 @@ func (api *API) DownloadMedia(remote_url string) ([]byte, error) {
|
|||||||
// Not a DCMA; fall through
|
// Not a DCMA; fall through
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if resp.StatusCode == 404 {
|
||||||
|
log.Debugf("Media download 404 (%s)", remote_url)
|
||||||
|
return body, ErrMediaDownload404
|
||||||
|
}
|
||||||
|
|
||||||
if resp.StatusCode != 200 {
|
if resp.StatusCode != 200 {
|
||||||
url, err := url.Parse(remote_url)
|
url, err := url.Parse(remote_url)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -2,10 +2,10 @@ package scraper
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"log"
|
||||||
"net/url"
|
"net/url"
|
||||||
"path"
|
"path"
|
||||||
"regexp"
|
"regexp"
|
||||||
"log"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type Url struct {
|
type Url struct {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user