From c219652dccd3970fc35e46eba8889f91d1d546a6 Mon Sep 17 00:00:00 2001 From: Alessio Date: Sun, 10 Oct 2021 15:13:32 -0700 Subject: [PATCH] Make fetching very aggressive by default --- cmd/tests.sh | 8 +++++--- cmd/twitter/main.go | 23 +++++++++++++++++++---- persistence/media_download.go | 5 +++++ 3 files changed, 29 insertions(+), 7 deletions(-) diff --git a/cmd/tests.sh b/cmd/tests.sh index 3404365..2e239fa 100755 --- a/cmd/tests.sh +++ b/cmd/tests.sh @@ -137,8 +137,9 @@ test $(sqlite3 twitter.db "select is_private from users where handle = 'HbdNrx'" # Test tweets with URLs +tw fetch_user CovfefeAnon urls_count=$(sqlite3 twitter.db "select count(*) from urls") -tw fetch_tweet https://twitter.com/CovfefeAnon/status/1428904664645394433 +tw fetch_tweet_only https://twitter.com/CovfefeAnon/status/1428904664645394433 urls_count_after=$(sqlite3 twitter.db "select count(*) from urls") test $urls_count_after = $(($urls_count + 1)) test "$(sqlite3 twitter.db "select title from urls where tweet_id = 1428904664645394433")" = "Justice Department investigating Elon Musk's SpaceX following complaint of hiring discrimination" @@ -147,7 +148,7 @@ thumbnail_name=$(sqlite3 twitter.db "select thumbnail_remote_url from urls where test -n "$thumbnail_name" # Not testing for what the thumbnail url is because it keeps changing # Try to double-fetch it; shouldn't duplicate the URL -tw fetch_tweet https://twitter.com/CovfefeAnon/status/1428904664645394433 +tw fetch_tweet_only https://twitter.com/CovfefeAnon/status/1428904664645394433 urls_count_after_2x=$(sqlite3 twitter.db "select count(*) from urls") test $urls_count_after_2x = $urls_count_after @@ -163,7 +164,8 @@ test -f link_preview_images/${thumbnail_name}_800x320_1.jpg # Test a tweet with a URL but no thumbnail -tw fetch_tweet https://twitter.com/Xirong7/status/1413665734866186243 +tw fetch_user Xirong7 +tw fetch_tweet_only https://twitter.com/Xirong7/status/1413665734866186243 test $(sqlite3 twitter.db "select is_content_downloaded from urls where tweet_id = 1413665734866186243") = "0" test $(sqlite3 twitter.db "select has_thumbnail from urls where tweet_id = 1413665734866186243") = "0" initial_link_preview_images_count=$(find link_preview_images | wc -l) # Check that it doesn't change, since there's no thumbnail diff --git a/cmd/twitter/main.go b/cmd/twitter/main.go index 899351b..e2e2548 100644 --- a/cmd/twitter/main.go +++ b/cmd/twitter/main.go @@ -155,19 +155,26 @@ func fetch_tweet_conversation(tweet_identifier string) { } for _, u := range users { - fmt.Println(u) + // fmt.Println(u) err = profile.SaveUser(u) if err != nil { - die("Error saving tweet: " + err.Error(), false, 4) + die("Error saving user: " + err.Error(), false, 4) + } + err = profile.DownloadUserContentFor(&u) + if err != nil { + die("Error getting user content: " + err.Error(), false, 10) } } for _, t := range tweets { - fmt.Println(t) err = profile.SaveTweet(t) if err != nil { die("Error saving tweet: " + err.Error(), false, 4) } + err = profile.DownloadTweetContentFor(&t) + if err != nil { + die("Error getting tweet content: " + err.Error(), false, 11) + } } fmt.Printf("Saved %d tweets and %d users. Exiting successfully\n", len(tweets), len(users)) } @@ -193,7 +200,11 @@ func fetch_user_feed(handle string, how_many int) { for _, u := range users { err = profile.SaveUser(u) if err != nil { - die("Error saving tweet: " + err.Error(), false, 4) + die("Error saving user: " + err.Error(), false, 4) + } + err = profile.DownloadUserContentFor(&u) + if err != nil { + die("Error getting user content: " + err.Error(), false, 10) } } @@ -202,6 +213,10 @@ func fetch_user_feed(handle string, how_many int) { if err != nil { die("Error saving tweet: " + err.Error(), false, 4) } + err = profile.DownloadTweetContentFor(&t) + if err != nil { + die("Error getting tweet content: " + err.Error(), false, 11) + } } for _, r := range retweets { diff --git a/persistence/media_download.go b/persistence/media_download.go index 9eab937..f8cf79d 100644 --- a/persistence/media_download.go +++ b/persistence/media_download.go @@ -6,6 +6,7 @@ import ( "path" "net/http" "io/ioutil" + "strings" "offline_twitter/scraper" ) @@ -150,6 +151,10 @@ func (p Profile) DownloadUserContentWithInjector(u *scraper.User, downloader Med outfile = path.Join(p.ProfileDir, "profile_images", u.BannerImageLocalPath) err = downloader.Curl(u.BannerImageUrl, outfile) if err != nil { + if strings.Contains(err.Error(), "404 Not Found") { + // Try adding "600x200". Not sure why this does this but sometimes it does. + err = downloader.Curl(u.BannerImageUrl + "/600x200", outfile) + } return err } }