Scraper: handle videos being geo-blocked, which was panicking the downloader
This commit is contained in:
parent
c19d36d053
commit
b076f4d2f2
@ -27,7 +27,7 @@ var ErrorDMCA error = errors.New("video is DMCAed, unable to download (HTTP 403
|
|||||||
// - url: the remote file to download
|
// - url: the remote file to download
|
||||||
// - outpath: the path on disk to save it to
|
// - outpath: the path on disk to save it to
|
||||||
func (d DefaultDownloader) Curl(url string, outpath string) error {
|
func (d DefaultDownloader) Curl(url string, outpath string) error {
|
||||||
println(url)
|
fmt.Println(url)
|
||||||
resp, err := http.Get(url)
|
resp, err := http.Get(url)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("Error executing HTTP GET(%q):\n %w", url, err)
|
return fmt.Errorf("Error executing HTTP GET(%q):\n %w", url, err)
|
||||||
@ -150,6 +150,11 @@ func (p Profile) DownloadTweetContentWithInjector(t *scraper.Tweet, downloader M
|
|||||||
}
|
}
|
||||||
|
|
||||||
for i := range t.Videos {
|
for i := range t.Videos {
|
||||||
|
// Videos can be geoblocked, and the HTTP response isn't in JSON so it's hard to capture
|
||||||
|
if t.Videos[i].IsGeoblocked {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
err := p.download_tweet_video(&t.Videos[i], downloader)
|
err := p.download_tweet_video(&t.Videos[i], downloader)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
@ -38,6 +38,10 @@ type APIExtendedMedia struct {
|
|||||||
Variants SortableVariants `json:"variants"`
|
Variants SortableVariants `json:"variants"`
|
||||||
Duration int `json:"duration_millis"`
|
Duration int `json:"duration_millis"`
|
||||||
} `json:"video_info"`
|
} `json:"video_info"`
|
||||||
|
ExtMediaAvailability struct {
|
||||||
|
Status string `json:"status"`
|
||||||
|
Reason string `json:"reason"`
|
||||||
|
} `json:"ext_media_availability"`
|
||||||
OriginalInfo struct {
|
OriginalInfo struct {
|
||||||
Width int `json:"width"`
|
Width int `json:"width"`
|
||||||
Height int `json:"height"`
|
Height int `json:"height"`
|
||||||
|
@ -26,6 +26,7 @@ type Video struct {
|
|||||||
|
|
||||||
IsDownloaded bool `db:"is_downloaded"`
|
IsDownloaded bool `db:"is_downloaded"`
|
||||||
IsBlockedByDMCA bool `db:"is_blocked_by_dmca"`
|
IsBlockedByDMCA bool `db:"is_blocked_by_dmca"`
|
||||||
|
IsGeoblocked bool `db:"is_geoblocked"`
|
||||||
IsGif bool `db:"is_gif"`
|
IsGif bool `db:"is_gif"`
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -78,6 +79,7 @@ func ParseAPIVideo(apiVideo APIExtendedMedia, tweet_id TweetID) Video {
|
|||||||
|
|
||||||
IsDownloaded: false,
|
IsDownloaded: false,
|
||||||
IsBlockedByDMCA: false,
|
IsBlockedByDMCA: false,
|
||||||
|
IsGeoblocked: apiVideo.ExtMediaAvailability.Reason == "Geoblocked",
|
||||||
IsGif: apiVideo.Type == "animated_gif",
|
IsGif: apiVideo.Type == "animated_gif",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -13,13 +13,13 @@ import (
|
|||||||
|
|
||||||
func TestParseAPIVideo(t *testing.T) {
|
func TestParseAPIVideo(t *testing.T) {
|
||||||
assert := assert.New(t)
|
assert := assert.New(t)
|
||||||
|
require := require.New(t)
|
||||||
data, err := os.ReadFile("test_responses/tweet_content/video.json")
|
data, err := os.ReadFile("test_responses/tweet_content/video.json")
|
||||||
if err != nil {
|
require.NoError(err)
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
var apivideo APIExtendedMedia
|
var apivideo APIExtendedMedia
|
||||||
err = json.Unmarshal(data, &apivideo)
|
err = json.Unmarshal(data, &apivideo)
|
||||||
require.NoError(t, err)
|
require.NoError(err)
|
||||||
|
|
||||||
tweet_id := TweetID(28)
|
tweet_id := TweetID(28)
|
||||||
video := ParseAPIVideo(apivideo, tweet_id)
|
video := ParseAPIVideo(apivideo, tweet_id)
|
||||||
@ -35,3 +35,18 @@ func TestParseAPIVideo(t *testing.T) {
|
|||||||
assert.Equal(88300, video.Duration)
|
assert.Equal(88300, video.Duration)
|
||||||
assert.False(video.IsDownloaded)
|
assert.False(video.IsDownloaded)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestParseGeoblockedVideo(t *testing.T) {
|
||||||
|
assert := assert.New(t)
|
||||||
|
require := require.New(t)
|
||||||
|
data, err := os.ReadFile("test_responses/tweet_content/video_geoblocked.json")
|
||||||
|
require.NoError(err)
|
||||||
|
|
||||||
|
var apivideo APIExtendedMedia
|
||||||
|
err = json.Unmarshal(data, &apivideo)
|
||||||
|
require.NoError(err)
|
||||||
|
|
||||||
|
tweet_id := TweetID(28)
|
||||||
|
video := ParseAPIVideo(apivideo, tweet_id)
|
||||||
|
assert.True(video.IsGeoblocked)
|
||||||
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user