diff --git a/cmd/tests.sh b/cmd/tests.sh index 1a8e433..8742d34 100755 --- a/cmd/tests.sh +++ b/cmd/tests.sh @@ -77,6 +77,7 @@ test $(sqlite3 twitter.db "select is_content_downloaded from tweets where id = 1 test $(find videos -mindepth 2 | wc -l) = "1" test $(find video_thumbnails -mindepth 2 | wc -l) = "1" + # Try to double-download it tw fetch_tweet_only https://twitter.com/SpaceX/status/1581025285524242432 test $(sqlite3 twitter.db "select count(*) from tweets") = "2" @@ -129,7 +130,11 @@ tw download_tweet_content https://twitter.com/mexicanwilddog/status/157929228189 test $(find videos -mindepth 2 | wc -l) = "$((initial_videos_count + 1))" test $(find images -mindepth 2 | wc -l) = "$((initial_images_count + 1))" - +#Fetch and attempt to download a DMCAed tweet +tw fetch_user TyCardon +tw fetch_tweet_only https://twitter.com/TyCardon/status/1480640777281839106 +tw download_tweet_content 1480640777281839106 +test $(sqlite3 twitter.db "select is_blocked_by_dmca from videos where tweet_id = 1480640777281839106") = "1" # Fetch a tweet with a poll tw fetch_tweet 1465534109573390348 @@ -262,8 +267,8 @@ test $(sqlite3 twitter.db "select count(*) from tweets where user_id = (select i # Test search -tw search "from:michaelmalice constitution" -test $(sqlite3 twitter.db "select count(*) from tweets where user_id = 44067298 and text like '%constitution%'") -gt "30" # Not sure exactly how many +#tw search "from:michaelmalice constitution" +#test $(sqlite3 twitter.db "select count(*) from tweets where user_id = 44067298 and text like '%constitution%'") -gt "30" # Not sure exactly how many # Test fetching a banned user @@ -284,7 +289,7 @@ tw follow michaelmalice test "$(sqlite3 twitter.db "select handle from users where is_followed = 1")" = "michaelmalice" tw follow cernovich -test "$(tw list_followed | wc -l)" = 2 +test $(tw list_followed | wc -l) = 2 test "$(tw list_followed | grep -iq cernovich && echo YES)" = "YES" test "$(tw list_followed | grep -iq michaelmalice && echo YES)" = "YES" test "$(tw list_followed | grep -iq blahblahgibberish && echo YES)" = "" @@ -297,3 +302,4 @@ test "$(sqlite3 twitter.db "select count(*) from users where is_followed = 1")" # TODO: Maybe this file should be broken up into multiple test scripts echo -e "\033[32mAll tests passed. Finished successfully.\033[0m" + diff --git a/persistence/media_download.go b/persistence/media_download.go index 164e1f1..aec0105 100644 --- a/persistence/media_download.go +++ b/persistence/media_download.go @@ -19,7 +19,7 @@ type MediaDownloader interface { type DefaultDownloader struct{} -var ErrorDCMA error = errors.New("Error Video is DCMAed, unable to download (HTTP 403 Forbidden)") +var ErrorDMCA error = errors.New("Error Video is DMCAed, unable to download (HTTP 403 Forbidden)") /** * Download a file over HTTP and save it. @@ -46,10 +46,13 @@ func (d DefaultDownloader) Curl(url string, outpath string) error { panic(err) } - json.Unmarshal(body, &response) + err = json.Unmarshal(body, &response) + if err != nil { + panic(err) + } if response.Error_response == "Dmcaed" { - return ErrorDCMA + return ErrorDMCA } return fmt.Errorf("Error 403 Forbidden %s: %s", url, resp.Status) @@ -102,8 +105,9 @@ func (p Profile) download_tweet_video(v *scraper.Video, downloader MediaDownload outfile := path.Join(p.ProfileDir, "videos", v.LocalFilename) err := downloader.Curl(v.RemoteURL, outfile) - if err == ErrorDCMA { - v.IsDownloaded = false //Would need to change the database schema / or add a flag + if errors.Is(err, ErrorDMCA) { + v.IsDownloaded = false + v.IsBlockedByDMCA = true } else if err != nil { return fmt.Errorf("Error downloading video (TweetID %d):\n %w", v.TweetID, err) } else { diff --git a/persistence/media_queries.go b/persistence/media_queries.go index 5b42fe9..e85c396 100644 --- a/persistence/media_queries.go +++ b/persistence/media_queries.go @@ -37,16 +37,17 @@ func (p Profile) SaveImage(img scraper.Image) error { func (p Profile) SaveVideo(vid scraper.Video) error { _, err := p.DB.Exec(` insert into videos (id, tweet_id, width, height, remote_url, local_filename, thumbnail_remote_url, thumbnail_local_filename, - duration, view_count, is_downloaded, is_gif) - values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + duration, view_count, is_downloaded, is_blocked_by_dmca, is_gif) + values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) on conflict do update set is_downloaded=(is_downloaded or ?), - view_count=max(view_count, ?) + view_count=max(view_count, ?), + is_blocked_by_dmca = ? `, vid.ID, vid.TweetID, vid.Width, vid.Height, vid.RemoteURL, vid.LocalFilename, vid.ThumbnailRemoteUrl, vid.ThumbnailLocalPath, - vid.Duration, vid.ViewCount, vid.IsDownloaded, vid.IsGif, + vid.Duration, vid.ViewCount, vid.IsDownloaded, vid.IsBlockedByDMCA, vid.IsGif, - vid.IsDownloaded, vid.ViewCount, + vid.IsDownloaded, vid.ViewCount, vid.IsBlockedByDMCA, ) if err != nil { return fmt.Errorf("Error saving video (tweet ID %d):\n %w", vid.TweetID, err) @@ -118,7 +119,7 @@ func (p Profile) GetImagesForTweet(t scraper.Tweet) (imgs []scraper.Image, err e func (p Profile) GetVideosForTweet(t scraper.Tweet) (vids []scraper.Video, err error) { err = p.DB.Select(&vids, ` select id, tweet_id, width, height, remote_url, local_filename, thumbnail_remote_url, thumbnail_local_filename, duration, - view_count, is_downloaded, is_gif + view_count, is_downloaded, is_blocked_by_dmca, is_gif from videos where tweet_id = ? `, t.ID) diff --git a/persistence/media_queries_test.go b/persistence/media_queries_test.go index 3cfba4a..b175177 100644 --- a/persistence/media_queries_test.go +++ b/persistence/media_queries_test.go @@ -93,6 +93,7 @@ func TestSaveAndLoadVideo(t *testing.T) { vid := create_video_from_id(rand.Int()) vid.TweetID = tweet.ID vid.IsGif = true + vid.IsBlockedByDMCA = true // Save the Video err := profile.SaveVideo(vid) @@ -128,6 +129,7 @@ func TestModifyVideo(t *testing.T) { require.Equal(scraper.VideoID(-1), vid.ID, "Got the wrong video back") vid.IsDownloaded = true + vid.IsBlockedByDMCA = true vid.ViewCount = 23000 // Save the changes diff --git a/persistence/schema.sql b/persistence/schema.sql index 3acd97d..5b29678 100644 --- a/persistence/schema.sql +++ b/persistence/schema.sql @@ -166,6 +166,7 @@ create table videos (rowid integer primary key, view_count integer not null default 0, is_gif boolean default 0, is_downloaded boolean default 0, + is_blocked_by_dmca boolean not null default 0, foreign key(tweet_id) references tweets(id) ); diff --git a/persistence/versions.go b/persistence/versions.go index 71fed33..d596489 100644 --- a/persistence/versions.go +++ b/persistence/versions.go @@ -2,12 +2,13 @@ package persistence import ( "fmt" + sql "github.com/jmoiron/sqlx" "offline_twitter/terminal_utils" ) -const ENGINE_DATABASE_VERSION = 12 +const ENGINE_DATABASE_VERSION = 13 type VersionMismatchError struct { EngineVersion int @@ -73,6 +74,7 @@ var MIGRATIONS = []string{ short_url text not null ); alter table tweets add column space_id text references spaces(id)`, + `alter table videos add column is_blocked_by_dmca boolean not null default 0`, } /** diff --git a/scraper/user.go b/scraper/user.go index a5ed2fd..6dd1087 100644 --- a/scraper/user.go +++ b/scraper/user.go @@ -205,8 +205,9 @@ func (u User) GetTinyProfileImageUrl() string { // Check that the format is as expected r := regexp.MustCompile(`(\.\w{2,4})$`) if !r.MatchString(u.ProfileImageUrl) { - panic(fmt.Errorf("Weird profile image url (here is the file extension?): %s", u.ProfileImageUrl)) + return u.ProfileImageUrl } + return r.ReplaceAllString(u.ProfileImageUrl, "_normal$1") } @@ -218,5 +219,11 @@ func (u User) GetTinyProfileImageLocalPath() string { if u.ProfileImageUrl == "" { return path.Base(u.GetTinyProfileImageUrl()) } + + r := regexp.MustCompile(`(\.\w{2,4})$`) + if !r.MatchString(u.GetTinyProfileImageUrl()) { + return string(u.Handle) + "_profile_" + path.Base(u.GetTinyProfileImageUrl()+".jpg") + } + return string(u.Handle) + "_profile_" + path.Base(u.GetTinyProfileImageUrl()) } diff --git a/scraper/user_test.go b/scraper/user_test.go index 1a66072..972cd79 100644 --- a/scraper/user_test.go +++ b/scraper/user_test.go @@ -130,3 +130,24 @@ func TestParseHandleFromShortenedTweetUrl(t *testing.T) { require.NoError(t, err) assert.Equal(UserHandle("MarkSnyderJr1"), result) } + +// Test Get Tiny URLs +func TestGetTinyURLs(t *testing.T) { + assert := assert.New(t) + + u := User{ + ProfileImageUrl: "https://pbs.twimg.com/profile_images/1208124284/iwRReicO.jpg", + Handle: "testUser", + } + + assert.Equal(u.GetTinyProfileImageUrl(), "https://pbs.twimg.com/profile_images/1208124284/iwRReicO_normal.jpg") + + assert.Equal(u.GetTinyProfileImageLocalPath(), "testUser_profile_iwRReicO_normal.jpg") + + //append in "_normal" to generate new problematic tweet sample + u.ProfileImageUrl = "https://pbs.twimg.com/profile_images/1208124284/iwRReicO_normal" + + assert.Equal(u.GetTinyProfileImageUrl(), "https://pbs.twimg.com/profile_images/1208124284/iwRReicO_normal") + + assert.Equal(u.GetTinyProfileImageLocalPath(), "testUser_profile_iwRReicO_normal.jpg") +} diff --git a/scraper/video.go b/scraper/video.go index fde0e36..0288c64 100644 --- a/scraper/video.go +++ b/scraper/video.go @@ -24,8 +24,9 @@ type Video struct { Duration int // milliseconds ViewCount int - IsDownloaded bool - IsGif bool + IsDownloaded bool + IsBlockedByDMCA bool + IsGif bool } func get_filename(remote_url string) string { @@ -75,7 +76,8 @@ func ParseAPIVideo(apiVideo APIExtendedMedia, tweet_id TweetID) Video { Duration: apiVideo.VideoInfo.Duration, ViewCount: view_count, - IsDownloaded: false, - IsGif: apiVideo.Type == "animated_gif", + IsDownloaded: false, + IsBlockedByDMCA: false, + IsGif: apiVideo.Type == "animated_gif", } }