diff --git a/cmd/tests.sh b/cmd/tests.sh index 8742d34..122c237 100755 --- a/cmd/tests.sh +++ b/cmd/tests.sh @@ -77,7 +77,6 @@ test $(sqlite3 twitter.db "select is_content_downloaded from tweets where id = 1 test $(find videos -mindepth 2 | wc -l) = "1" test $(find video_thumbnails -mindepth 2 | wc -l) = "1" - # Try to double-download it tw fetch_tweet_only https://twitter.com/SpaceX/status/1581025285524242432 test $(sqlite3 twitter.db "select count(*) from tweets") = "2" @@ -130,11 +129,12 @@ tw download_tweet_content https://twitter.com/mexicanwilddog/status/157929228189 test $(find videos -mindepth 2 | wc -l) = "$((initial_videos_count + 1))" test $(find images -mindepth 2 | wc -l) = "$((initial_images_count + 1))" -#Fetch and attempt to download a DMCAed tweet + +# Fetch and attempt to download a DMCAed tweet tw fetch_user TyCardon tw fetch_tweet_only https://twitter.com/TyCardon/status/1480640777281839106 tw download_tweet_content 1480640777281839106 -test $(sqlite3 twitter.db "select is_blocked_by_dmca from videos where tweet_id = 1480640777281839106") = "1" +test $(sqlite3 twitter.db "select is_blocked_by_dmca, is_downloaded from videos where tweet_id = 1480640777281839106") = "1|0" # Fetch a tweet with a poll tw fetch_tweet 1465534109573390348 @@ -302,4 +302,3 @@ test "$(sqlite3 twitter.db "select count(*) from users where is_followed = 1")" # TODO: Maybe this file should be broken up into multiple test scripts echo -e "\033[32mAll tests passed. Finished successfully.\033[0m" - diff --git a/doc/TODO.txt b/doc/TODO.txt index 3e7977a..d9a0320 100644 --- a/doc/TODO.txt +++ b/doc/TODO.txt @@ -1,5 +1,3 @@ -TODO update videos schema to account for download status (success, fail, DCMA, etc). bool -> int? or str? Or add a 'is valid/has been DMCAed' flag? - TODO user-images-extract-to-table: User profile images and banner images => extract to table. Add `is_downloaded` column. Add test for double-downloading a user, shouldn't change the write-times on these files @@ -30,7 +28,6 @@ Video size reduction: - option to upgrade or downgrade video quality TODO: Problem tweets -- DONE - https://twitter.com/TyCardon/status/1480640777281839106: "Media has been disabled in response to report by copyright owner" - https://twitter.com/CovfefeAnon/status/1427693931047837702: parent thread not loading (possible GUI problem) - "account no longer exists" tombstone not being collected => https://twitter.com/michaelmalice/status/1461031030278742020 - tweet with warning label not getting scraped right: https://twitter.com/michaelmalice/status/1493324611999748098 diff --git a/go.mod b/go.mod index 409063c..a2381b0 100644 --- a/go.mod +++ b/go.mod @@ -9,6 +9,5 @@ require ( github.com/mattn/go-sqlite3 v1.14.7 github.com/sirupsen/logrus v1.8.1 github.com/stretchr/testify v1.7.0 - golang.org/x/sys v0.2.0 // indirect gopkg.in/yaml.v2 v2.4.0 ) diff --git a/go.sum b/go.sum index 1c27f6b..9579380 100644 --- a/go.sum +++ b/go.sum @@ -24,8 +24,6 @@ github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5Cc github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= golang.org/x/sys v0.0.0-20191026070338-33540a1f6037 h1:YyJpGZS1sBuBCzLAR1VEpK193GlqGZbnPFnPV/5Rsb4= golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.2.0 h1:ljd4t30dBnAvMZaQCevtY0xLLD0A+bRZXbgLMLU1F/A= -golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= diff --git a/persistence/media_download.go b/persistence/media_download.go index aec0105..e79ff53 100644 --- a/persistence/media_download.go +++ b/persistence/media_download.go @@ -19,7 +19,7 @@ type MediaDownloader interface { type DefaultDownloader struct{} -var ErrorDMCA error = errors.New("Error Video is DMCAed, unable to download (HTTP 403 Forbidden)") +var ErrorDMCA error = errors.New("video is DMCAed, unable to download (HTTP 403 Forbidden)") /** * Download a file over HTTP and save it. @@ -40,22 +40,19 @@ func (d DefaultDownloader) Curl(url string, outpath string) error { Error_response string `json:"error_response"` } body, err := io.ReadAll(resp.Body) - fmt.Println("body = " + string(body)) - if err != nil { panic(err) } + fmt.Println(string(body)) err = json.Unmarshal(body, &response) if err != nil { panic(err) } - if response.Error_response == "Dmcaed" { return ErrorDMCA } - - return fmt.Errorf("Error 403 Forbidden %s: %s", url, resp.Status) + // Not a DCMA; fall through } if resp.StatusCode != 200 { diff --git a/scraper/user_test.go b/scraper/user_test.go index 972cd79..ff18f1b 100644 --- a/scraper/user_test.go +++ b/scraper/user_test.go @@ -131,23 +131,18 @@ func TestParseHandleFromShortenedTweetUrl(t *testing.T) { assert.Equal(UserHandle("MarkSnyderJr1"), result) } -// Test Get Tiny URLs +// Should compute tiny profile image URLs correctly, and fix local paths if needed (e.g., "_normal" and no file extension) func TestGetTinyURLs(t *testing.T) { assert := assert.New(t) - u := User{ ProfileImageUrl: "https://pbs.twimg.com/profile_images/1208124284/iwRReicO.jpg", Handle: "testUser", } - assert.Equal(u.GetTinyProfileImageUrl(), "https://pbs.twimg.com/profile_images/1208124284/iwRReicO_normal.jpg") - assert.Equal(u.GetTinyProfileImageLocalPath(), "testUser_profile_iwRReicO_normal.jpg") - //append in "_normal" to generate new problematic tweet sample + // User with poorly formed profile image URL u.ProfileImageUrl = "https://pbs.twimg.com/profile_images/1208124284/iwRReicO_normal" - assert.Equal(u.GetTinyProfileImageUrl(), "https://pbs.twimg.com/profile_images/1208124284/iwRReicO_normal") - assert.Equal(u.GetTinyProfileImageLocalPath(), "testUser_profile_iwRReicO_normal.jpg") }