Handle HTTP request timeouts

This commit is contained in:
Alessio 2024-03-16 19:55:05 -07:00
parent 51ae1d09a1
commit 69e0a35e57
4 changed files with 39 additions and 13 deletions

View File

@ -53,7 +53,10 @@ func (p Profile) SaveDMTrove(trove DMTrove, should_download bool) {
// DUPE: download-image // DUPE: download-image
outfile := path.Join(p.ProfileDir, "images", img.LocalFilename) outfile := path.Join(p.ProfileDir, "images", img.LocalFilename)
err = downloader.Curl(img.RemoteURL, outfile) err = downloader.Curl(img.RemoteURL, outfile)
if err != nil { if errors.Is(err, ErrRequestTimeout) {
// Forget about it; if it's important someone will try again
fmt.Printf("Failed to download image %q: %s\n", img.RemoteURL, err.Error())
} else if err != nil {
panic(fmt.Errorf("downloading image %q on DM message %d:\n %w", img.RemoteURL, m.ID, err)) panic(fmt.Errorf("downloading image %q on DM message %d:\n %w", img.RemoteURL, m.ID, err))
} }
_, err = p.DB.NamedExec(`update chat_message_images set is_downloaded = 1 where id = :id`, img) _, err = p.DB.NamedExec(`update chat_message_images set is_downloaded = 1 where id = :id`, img)
@ -84,7 +87,10 @@ func (p Profile) SaveDMTrove(trove DMTrove, should_download bool) {
outfile := path.Join(p.ProfileDir, "videos", vid.LocalFilename) outfile := path.Join(p.ProfileDir, "videos", vid.LocalFilename)
err = downloader.Curl(vid.RemoteURL, outfile) err = downloader.Curl(vid.RemoteURL, outfile)
if errors.Is(err, ErrorDMCA) { if errors.Is(err, ErrRequestTimeout) {
// Forget about it; if it's important someone will try again
fmt.Printf("Failed to download video %q: %s\n", vid.RemoteURL, err.Error())
} else if errors.Is(err, ErrorDMCA) {
vid.IsDownloaded = false vid.IsDownloaded = false
vid.IsBlockedByDMCA = true vid.IsBlockedByDMCA = true
} else if err != nil { } else if err != nil {
@ -96,7 +102,10 @@ func (p Profile) SaveDMTrove(trove DMTrove, should_download bool) {
// Download the thumbnail // Download the thumbnail
outfile = path.Join(p.ProfileDir, "video_thumbnails", vid.ThumbnailLocalPath) outfile = path.Join(p.ProfileDir, "video_thumbnails", vid.ThumbnailLocalPath)
err = downloader.Curl(vid.ThumbnailRemoteUrl, outfile) err = downloader.Curl(vid.ThumbnailRemoteUrl, outfile)
if err != nil { if errors.Is(err, ErrRequestTimeout) {
// Forget about it; if it's important someone will try again
fmt.Printf("Failed to download video thumbnail %q: %s\n", vid.ThumbnailRemoteUrl, err.Error())
} else if err != nil {
panic(fmt.Errorf("Error downloading video thumbnail (DMMessageID %d):\n %w", vid.DMMessageID, err)) panic(fmt.Errorf("Error downloading video thumbnail (DMMessageID %d):\n %w", vid.DMMessageID, err))
} }
@ -114,7 +123,10 @@ func (p Profile) SaveDMTrove(trove DMTrove, should_download bool) {
if url.HasCard && url.HasThumbnail { if url.HasCard && url.HasThumbnail {
outfile := path.Join(p.ProfileDir, "link_preview_images", url.ThumbnailLocalPath) outfile := path.Join(p.ProfileDir, "link_preview_images", url.ThumbnailLocalPath)
err := downloader.Curl(url.ThumbnailRemoteUrl, outfile) err := downloader.Curl(url.ThumbnailRemoteUrl, outfile)
if err != nil { if errors.Is(err, ErrRequestTimeout) {
// Forget about it; if it's important someone will try again
fmt.Printf("Failed to download link thumbnail %q: %s\n", url.ThumbnailRemoteUrl, err.Error())
} else if err != nil {
panic(fmt.Errorf("downloading link thumbnail %q on DM message %d:\n %w", url.ThumbnailRemoteUrl, m.ID, err)) panic(fmt.Errorf("downloading link thumbnail %q on DM message %d:\n %w", url.ThumbnailRemoteUrl, m.ID, err))
} }
} }

View File

@ -2,6 +2,7 @@ package persistence
import ( import (
"fmt" "fmt"
"errors"
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper" . "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
) )
@ -40,7 +41,10 @@ func (p Profile) SaveTweetTrove(trove TweetTrove, should_download bool) {
if should_download { if should_download {
// Download their tiny profile image // Download their tiny profile image
err = p.DownloadUserProfileImageTiny(&u) err = p.DownloadUserProfileImageTiny(&u)
if err != nil { if errors.Is(err, ErrRequestTimeout) {
// Forget about it; if it's important someone will try again
fmt.Printf("Failed to @%s's tiny profile image (%q): %s\n", u.Handle, u.ProfileImageUrl, err.Error())
} else if err != nil {
panic(fmt.Errorf("Error downloading user content for user with ID %d and handle %s:\n %w", u.ID, u.Handle, err)) panic(fmt.Errorf("Error downloading user content for user with ID %d and handle %s:\n %w", u.ID, u.Handle, err))
} }
} }
@ -61,7 +65,10 @@ func (p Profile) SaveTweetTrove(trove TweetTrove, should_download bool) {
if should_download { if should_download {
err = p.DownloadTweetContentFor(&t) err = p.DownloadTweetContentFor(&t)
if err != nil { if errors.Is(err, ErrRequestTimeout) {
// Forget about it; if it's important someone will try again
fmt.Printf("Failed to download tweet ID %d: %s\n", t.ID, err.Error())
} else if err != nil {
panic(fmt.Errorf("Error downloading tweet content for tweet ID %d:\n %w", t.ID, err)) panic(fmt.Errorf("Error downloading tweet content for tweet ID %d:\n %w", t.ID, err))
} }
} }

View File

@ -11,4 +11,5 @@ var (
ErrorIsTombstone = errors.New("tweet is a tombstone") ErrorIsTombstone = errors.New("tweet is a tombstone")
ErrRateLimited = errors.New("rate limited") ErrRateLimited = errors.New("rate limited")
ErrorDMCA = errors.New("video is DMCAed, unable to download (HTTP 403 Forbidden)") ErrorDMCA = errors.New("video is DMCAed, unable to download (HTTP 403 Forbidden)")
ErrRequestTimeout = errors.New("request timed out")
) )

View File

@ -144,8 +144,8 @@ func (api *API) update_csrf_token() {
} }
} }
func (api *API) do_http_POST(url string, body string, result interface{}) error { func (api *API) do_http_POST(remote_url string, body string, result interface{}) error {
req, err := http.NewRequest("POST", url, strings.NewReader(body)) req, err := http.NewRequest("POST", remote_url, strings.NewReader(body))
if err != nil { if err != nil {
return fmt.Errorf("Error initializing HTTP POST request:\n %w", err) return fmt.Errorf("Error initializing HTTP POST request:\n %w", err)
} }
@ -161,7 +161,9 @@ func (api *API) do_http_POST(url string, body string, result interface{}) error
log.Debug(" " + body) log.Debug(" " + body)
resp, err := api.Client.Do(req) resp, err := api.Client.Do(req)
if err != nil { if urlErr, ok := err.(*url.Error); ok && urlErr.Timeout() {
return fmt.Errorf("POST %q:\n %w", remote_url, ErrRequestTimeout)
} else if err != nil {
return fmt.Errorf("Error executing HTTP POST request:\n %w", err) return fmt.Errorf("Error executing HTTP POST request:\n %w", err)
} }
api.update_csrf_token() api.update_csrf_token()
@ -190,8 +192,8 @@ func (api *API) do_http_POST(url string, body string, result interface{}) error
return nil return nil
} }
func (api *API) do_http(url string, cursor string, result interface{}) error { func (api *API) do_http(remote_url string, cursor string, result interface{}) error {
req, err := http.NewRequest("GET", url, nil) req, err := http.NewRequest("GET", remote_url, nil)
if err != nil { if err != nil {
return fmt.Errorf("Error initializing HTTP GET request:\n %w", err) return fmt.Errorf("Error initializing HTTP GET request:\n %w", err)
} }
@ -210,7 +212,9 @@ func (api *API) do_http(url string, cursor string, result interface{}) error {
} }
resp, err := api.Client.Do(req) resp, err := api.Client.Do(req)
if err != nil { if urlErr, ok := err.(*url.Error); ok && urlErr.Timeout() {
return fmt.Errorf("GET %q:\n %w", remote_url, ErrRequestTimeout)
} else if err != nil {
return fmt.Errorf("Error executing HTTP request:\n %w", err) return fmt.Errorf("Error executing HTTP request:\n %w", err)
} }
defer resp.Body.Close() defer resp.Body.Close()
@ -386,7 +390,9 @@ func (api *API) DownloadMedia(remote_url string) ([]byte, error) {
// req.Header.Set("Referer", "https://twitter.com/") // DM embedded images require this header // req.Header.Set("Referer", "https://twitter.com/") // DM embedded images require this header
resp, err := api.Client.Do(req) resp, err := api.Client.Do(req)
if err != nil { if urlErr, ok := err.(*url.Error); ok && urlErr.Timeout() {
return []byte{}, fmt.Errorf("GET %q:\n %w", remote_url, ErrRequestTimeout)
} else if err != nil {
return []byte{}, fmt.Errorf("Error executing HTTP request:\n %w", err) return []byte{}, fmt.Errorf("Error executing HTTP request:\n %w", err)
} }
defer resp.Body.Close() defer resp.Body.Close()