Scraper: handle videos being geo-blocked, which was panicking the downloader
This commit is contained in:
parent
c19d36d053
commit
b076f4d2f2
@ -27,7 +27,7 @@ var ErrorDMCA error = errors.New("video is DMCAed, unable to download (HTTP 403
|
||||
// - url: the remote file to download
|
||||
// - outpath: the path on disk to save it to
|
||||
func (d DefaultDownloader) Curl(url string, outpath string) error {
|
||||
println(url)
|
||||
fmt.Println(url)
|
||||
resp, err := http.Get(url)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Error executing HTTP GET(%q):\n %w", url, err)
|
||||
@ -150,6 +150,11 @@ func (p Profile) DownloadTweetContentWithInjector(t *scraper.Tweet, downloader M
|
||||
}
|
||||
|
||||
for i := range t.Videos {
|
||||
// Videos can be geoblocked, and the HTTP response isn't in JSON so it's hard to capture
|
||||
if t.Videos[i].IsGeoblocked {
|
||||
continue
|
||||
}
|
||||
|
||||
err := p.download_tweet_video(&t.Videos[i], downloader)
|
||||
if err != nil {
|
||||
return err
|
||||
|
@ -38,6 +38,10 @@ type APIExtendedMedia struct {
|
||||
Variants SortableVariants `json:"variants"`
|
||||
Duration int `json:"duration_millis"`
|
||||
} `json:"video_info"`
|
||||
ExtMediaAvailability struct {
|
||||
Status string `json:"status"`
|
||||
Reason string `json:"reason"`
|
||||
} `json:"ext_media_availability"`
|
||||
OriginalInfo struct {
|
||||
Width int `json:"width"`
|
||||
Height int `json:"height"`
|
||||
|
@ -26,6 +26,7 @@ type Video struct {
|
||||
|
||||
IsDownloaded bool `db:"is_downloaded"`
|
||||
IsBlockedByDMCA bool `db:"is_blocked_by_dmca"`
|
||||
IsGeoblocked bool `db:"is_geoblocked"`
|
||||
IsGif bool `db:"is_gif"`
|
||||
}
|
||||
|
||||
@ -78,6 +79,7 @@ func ParseAPIVideo(apiVideo APIExtendedMedia, tweet_id TweetID) Video {
|
||||
|
||||
IsDownloaded: false,
|
||||
IsBlockedByDMCA: false,
|
||||
IsGeoblocked: apiVideo.ExtMediaAvailability.Reason == "Geoblocked",
|
||||
IsGif: apiVideo.Type == "animated_gif",
|
||||
}
|
||||
}
|
||||
|
@ -13,13 +13,13 @@ import (
|
||||
|
||||
func TestParseAPIVideo(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
require := require.New(t)
|
||||
data, err := os.ReadFile("test_responses/tweet_content/video.json")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
require.NoError(err)
|
||||
|
||||
var apivideo APIExtendedMedia
|
||||
err = json.Unmarshal(data, &apivideo)
|
||||
require.NoError(t, err)
|
||||
require.NoError(err)
|
||||
|
||||
tweet_id := TweetID(28)
|
||||
video := ParseAPIVideo(apivideo, tweet_id)
|
||||
@ -35,3 +35,18 @@ func TestParseAPIVideo(t *testing.T) {
|
||||
assert.Equal(88300, video.Duration)
|
||||
assert.False(video.IsDownloaded)
|
||||
}
|
||||
|
||||
func TestParseGeoblockedVideo(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
require := require.New(t)
|
||||
data, err := os.ReadFile("test_responses/tweet_content/video_geoblocked.json")
|
||||
require.NoError(err)
|
||||
|
||||
var apivideo APIExtendedMedia
|
||||
err = json.Unmarshal(data, &apivideo)
|
||||
require.NoError(err)
|
||||
|
||||
tweet_id := TweetID(28)
|
||||
video := ParseAPIVideo(apivideo, tweet_id)
|
||||
assert.True(video.IsGeoblocked)
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user