From e2ca9a975a1a267c2021a778f2b8cd1aaa484d8c Mon Sep 17 00:00:00 2001 From: Alessio Date: Mon, 19 Aug 2024 17:27:54 -0700 Subject: [PATCH] Remove scraper singleton pattern entirely --- cmd/twitter/main.go | 31 ++++++++++------------ internal/webserver/handler_bookmarks.go | 4 +-- internal/webserver/handler_login.go | 10 +++---- internal/webserver/handler_messages.go | 8 +++--- internal/webserver/handler_search.go | 4 +-- internal/webserver/handler_tweet_detail.go | 4 +-- internal/webserver/handler_user_feed.go | 18 ++++++------- internal/webserver/stopwatch.go | 12 ++++----- pkg/persistence/media_download.go | 20 +++++++------- pkg/persistence/tweet_trove_queries.go | 8 +++--- pkg/scraper/api_request_utils.go | 15 ----------- 11 files changed, 59 insertions(+), 75 deletions(-) diff --git a/cmd/twitter/main.go b/cmd/twitter/main.go index 1bf4847..375c36f 100644 --- a/cmd/twitter/main.go +++ b/cmd/twitter/main.go @@ -129,15 +129,12 @@ func main() { *session_name = (*session_name)[:len(*session_name)-8] } api = profile.LoadSession(scraper.UserHandle(*session_name)) - scraper.InitApi(api) } else { var err error api, err = scraper.NewGuestSession() if err != nil { log.Warnf("Unable to initialize guest session! Might be a network issue") - } else { - scraper.InitApi(api) - } + } // Don't exit here, some operations don't require a connection } switch operation { @@ -335,7 +332,7 @@ func fetch_tweet_conversation(tweet_identifier string, how_many int) { if is_scrape_failure(err) { die(err.Error(), false, -1) } - profile.SaveTweetTrove(trove, true) + profile.SaveTweetTrove(trove, true, &api) happy_exit(fmt.Sprintf("Saved %d tweets and %d users", len(trove.Tweets), len(trove.Users)), err) } @@ -356,7 +353,7 @@ func fetch_user_feed(handle string, how_many int) { if is_scrape_failure(err) { die(fmt.Sprintf("Error scraping feed: %s\n %s", handle, err.Error()), false, -2) } - profile.SaveTweetTrove(trove, true) + profile.SaveTweetTrove(trove, true, &api) happy_exit( fmt.Sprintf("Saved %d tweets, %d retweets and %d users", len(trove.Tweets), len(trove.Retweets), len(trove.Users)), @@ -374,7 +371,7 @@ func get_user_likes(handle string, how_many int) { if is_scrape_failure(err) { die(fmt.Sprintf("Error scraping feed: %s\n %s", handle, err.Error()), false, -2) } - profile.SaveTweetTrove(trove, true) + profile.SaveTweetTrove(trove, true, &api) happy_exit( fmt.Sprintf("Saved %d tweets, %d retweets and %d users", len(trove.Tweets), len(trove.Retweets), len(trove.Users)), @@ -392,7 +389,7 @@ func get_followees(handle string, how_many int) { if is_scrape_failure(err) { die(fmt.Sprintf("Error getting followees: %s\n %s", handle, err.Error()), false, -2) } - profile.SaveTweetTrove(trove, true) + profile.SaveTweetTrove(trove, true, &api) profile.SaveAsFolloweesList(user.ID, trove) happy_exit(fmt.Sprintf("Saved %d followees", len(trove.Users)), err) @@ -406,7 +403,7 @@ func get_followers(handle string, how_many int) { if is_scrape_failure(err) { die(fmt.Sprintf("Error getting followees: %s\n %s", handle, err.Error()), false, -2) } - profile.SaveTweetTrove(trove, true) + profile.SaveTweetTrove(trove, true, &api) profile.SaveAsFollowersList(user.ID, trove) happy_exit(fmt.Sprintf("Saved %d followers", len(trove.Users)), err) @@ -416,7 +413,7 @@ func get_bookmarks(how_many int) { if is_scrape_failure(err) { die(fmt.Sprintf("Error scraping bookmarks:\n %s", err.Error()), false, -2) } - profile.SaveTweetTrove(trove, true) + profile.SaveTweetTrove(trove, true, &api) happy_exit(fmt.Sprintf( "Saved %d tweets, %d retweets, %d users, and %d bookmarks", @@ -429,7 +426,7 @@ func fetch_timeline(is_following_only bool) { if is_scrape_failure(err) { die(fmt.Sprintf("Error fetching timeline:\n %s", err.Error()), false, -2) } - profile.SaveTweetTrove(trove, true) + profile.SaveTweetTrove(trove, true, &api) happy_exit( fmt.Sprintf("Saved %d tweets, %d retweets and %d users", len(trove.Tweets), len(trove.Retweets), len(trove.Users)), @@ -447,7 +444,7 @@ func download_tweet_content(tweet_identifier string) { if err != nil { panic(fmt.Errorf("Couldn't get tweet (ID %d) from database:\n %w", tweet_id, err)) } - err = profile.DownloadTweetContentFor(&tweet) + err = profile.DownloadTweetContentFor(&tweet, &api) if err != nil { panic("Error getting content: " + err.Error()) } @@ -458,7 +455,7 @@ func download_user_content(handle scraper.UserHandle) { if err != nil { panic("Couldn't get the user from database: " + err.Error()) } - err = profile.DownloadUserContentFor(&user) + err = profile.DownloadUserContentFor(&user, &api) if err != nil { panic("Error getting content: " + err.Error()) } @@ -469,7 +466,7 @@ func search(query string, how_many int) { if is_scrape_failure(err) { die(fmt.Sprintf("Error scraping search results: %s", err.Error()), false, -100) } - profile.SaveTweetTrove(trove, true) + profile.SaveTweetTrove(trove, true, &api) happy_exit(fmt.Sprintf("Saved %d tweets and %d users", len(trove.Tweets), len(trove.Users)), err) } @@ -532,7 +529,7 @@ func fetch_inbox(how_many int) { if err != nil { die(fmt.Sprintf("Failed to fetch inbox:\n %s", err.Error()), false, 1) } - profile.SaveTweetTrove(trove, true) + profile.SaveTweetTrove(trove, true, &api) happy_exit(fmt.Sprintf("Saved %d messages from %d chats", len(trove.Messages), len(trove.Rooms)), nil) } @@ -546,7 +543,7 @@ func fetch_dm(id string, how_many int) { if err != nil { die(fmt.Sprintf("Failed to fetch dm:\n %s", err.Error()), false, 1) } - profile.SaveTweetTrove(trove, true) + profile.SaveTweetTrove(trove, true, &api) happy_exit( fmt.Sprintf("Saved %d messages from %d chats", len(trove.Messages), len(trove.Rooms)), err, @@ -563,7 +560,7 @@ func send_dm(room_id string, text string, in_reply_to_id int) { if err != nil { die(fmt.Sprintf("Failed to send dm:\n %s", err.Error()), false, 1) } - profile.SaveTweetTrove(trove, true) + profile.SaveTweetTrove(trove, true, &api) happy_exit(fmt.Sprintf("Saved %d messages from %d chats", len(trove.Messages), len(trove.Rooms)), nil) } diff --git a/internal/webserver/handler_bookmarks.go b/internal/webserver/handler_bookmarks.go index 51c263a..8809411 100644 --- a/internal/webserver/handler_bookmarks.go +++ b/internal/webserver/handler_bookmarks.go @@ -26,8 +26,8 @@ func (app *Application) Bookmarks(w http.ResponseWriter, r *http.Request) { panic(err) // Return a toast } - app.Profile.SaveTweetTrove(trove, false) - go app.Profile.SaveTweetTrove(trove, true) + app.Profile.SaveTweetTrove(trove, false, &app.API) + go app.Profile.SaveTweetTrove(trove, true, &app.API) } c := persistence.NewUserFeedBookmarksCursor(app.ActiveUser.Handle) diff --git a/internal/webserver/handler_login.go b/internal/webserver/handler_login.go index c260f5c..04e15a3 100644 --- a/internal/webserver/handler_login.go +++ b/internal/webserver/handler_login.go @@ -75,7 +75,7 @@ func (app *Application) after_login(w http.ResponseWriter, r *http.Request, api return } panic_if(app.Profile.SaveUser(&user)) - panic_if(app.Profile.DownloadUserContentFor(&user)) + panic_if(app.Profile.DownloadUserContentFor(&user, &app.API)) // Now that the user is scraped for sure, set them as the logged-in user err = app.SetActiveUser(api.UserHandle) @@ -88,8 +88,8 @@ func (app *Application) after_login(w http.ResponseWriter, r *http.Request, api http.Redirect(w, r, "/", 303) } fmt.Println("Saving initial feed results...") - app.Profile.SaveTweetTrove(trove, false) - go app.Profile.SaveTweetTrove(trove, true) + app.Profile.SaveTweetTrove(trove, false, &app.API) + go app.Profile.SaveTweetTrove(trove, true, &app.API) // Scrape the user's followers trove, err = app.API.GetFollowees(user.ID, 1000) @@ -97,9 +97,9 @@ func (app *Application) after_login(w http.ResponseWriter, r *http.Request, api app.ErrorLog.Printf("Failed to scrape followers: %s", err.Error()) http.Redirect(w, r, "/", 303) } - app.Profile.SaveTweetTrove(trove, false) + app.Profile.SaveTweetTrove(trove, false, &app.API) app.Profile.SaveAsFolloweesList(user.ID, trove) - go app.Profile.SaveTweetTrove(trove, true) + go app.Profile.SaveTweetTrove(trove, true, &app.API) // Redirect to Timeline http.Redirect(w, r, "/", 303) diff --git a/internal/webserver/handler_messages.go b/internal/webserver/handler_messages.go index 64890f6..7689386 100644 --- a/internal/webserver/handler_messages.go +++ b/internal/webserver/handler_messages.go @@ -70,8 +70,8 @@ func (app *Application) message_send(w http.ResponseWriter, r *http.Request) { if err != nil { panic(err) } - app.Profile.SaveTweetTrove(trove, false) - go app.Profile.SaveTweetTrove(trove, true) + app.Profile.SaveTweetTrove(trove, false, &app.API) + go app.Profile.SaveTweetTrove(trove, true, &app.API) } func (app *Application) message_detail(w http.ResponseWriter, r *http.Request) { @@ -122,8 +122,8 @@ func (app *Application) message_detail(w http.ResponseWriter, r *http.Request) { if err != nil { panic(err) } - app.Profile.SaveTweetTrove(trove, false) - go app.Profile.SaveTweetTrove(trove, true) // Download the content in the background + app.Profile.SaveTweetTrove(trove, false, &app.API) + go app.Profile.SaveTweetTrove(trove, true, &app.API) // Download the content in the background } // `LatestPollingTimestamp` sort of passes-through the function; if we're not updating it, it diff --git a/internal/webserver/handler_search.go b/internal/webserver/handler_search.go index b167621..32c9b54 100644 --- a/internal/webserver/handler_search.go +++ b/internal/webserver/handler_search.go @@ -108,8 +108,8 @@ func (app *Application) Search(w http.ResponseWriter, r *http.Request) { app.ErrorLog.Print(err) // TOOD: show error in UI } - app.Profile.SaveTweetTrove(trove, false) - go app.Profile.SaveTweetTrove(trove, true) + app.Profile.SaveTweetTrove(trove, false, &app.API) + go app.Profile.SaveTweetTrove(trove, true, &app.API) } c, err := persistence.NewCursorFromSearchQuery(search_text) diff --git a/internal/webserver/handler_tweet_detail.go b/internal/webserver/handler_tweet_detail.go index 1d739b0..ad3ba1e 100644 --- a/internal/webserver/handler_tweet_detail.go +++ b/internal/webserver/handler_tweet_detail.go @@ -53,8 +53,8 @@ func (app *Application) ensure_tweet(id scraper.TweetID, is_forced bool, is_conv // Save the trove unless there was an unrecoverable error if err == nil || errors.Is(err, scraper.END_OF_FEED) || errors.Is(err, scraper.ErrRateLimited) { - app.Profile.SaveTweetTrove(trove, false) - go app.Profile.SaveTweetTrove(trove, true) // Download the content in the background + app.Profile.SaveTweetTrove(trove, false, &app.API) + go app.Profile.SaveTweetTrove(trove, true, &app.API) // Download the content in the background _, is_available = trove.Tweets[id] } diff --git a/internal/webserver/handler_user_feed.go b/internal/webserver/handler_user_feed.go index 3c85aa3..c6fb044 100644 --- a/internal/webserver/handler_user_feed.go +++ b/internal/webserver/handler_user_feed.go @@ -25,7 +25,7 @@ func (app *Application) UserFeed(w http.ResponseWriter, r *http.Request) { return } panic_if(app.Profile.SaveUser(&user)) - panic_if(app.Profile.DownloadUserContentFor(&user)) + panic_if(app.Profile.DownloadUserContentFor(&user, &app.API)) } if len(parts) > 1 && parts[1] == "followers" { @@ -51,16 +51,16 @@ func (app *Application) UserFeed(w http.ResponseWriter, r *http.Request) { app.ErrorLog.Print(err) // TOOD: show error in UI } - app.Profile.SaveTweetTrove(trove, false) - go app.Profile.SaveTweetTrove(trove, true) + app.Profile.SaveTweetTrove(trove, false, &app.API) + go app.Profile.SaveTweetTrove(trove, true, &app.API) } else if len(parts) == 2 && parts[1] == "likes" { trove, err := app.API.GetUserLikes(user.ID, 50) // TODO: parameterizable if err != nil { app.ErrorLog.Print(err) // TOOD: show error in UI } - app.Profile.SaveTweetTrove(trove, false) - go app.Profile.SaveTweetTrove(trove, true) + app.Profile.SaveTweetTrove(trove, false, &app.API) + go app.Profile.SaveTweetTrove(trove, true, &app.API) } } @@ -163,9 +163,9 @@ func (app *Application) UserFollowees(w http.ResponseWriter, r *http.Request, us app.ErrorLog.Print(err) // TOOD: show error in UI } - app.Profile.SaveTweetTrove(trove, false) + app.Profile.SaveTweetTrove(trove, false, &app.API) app.Profile.SaveAsFolloweesList(user.ID, trove) - go app.Profile.SaveTweetTrove(trove, true) + go app.Profile.SaveTweetTrove(trove, true, &app.API) } data, trove := NewFollowsData(app.Profile.GetFollowees(user.ID)) @@ -189,9 +189,9 @@ func (app *Application) UserFollowers(w http.ResponseWriter, r *http.Request, us app.ErrorLog.Print(err) // TOOD: show error in UI } - app.Profile.SaveTweetTrove(trove, false) + app.Profile.SaveTweetTrove(trove, false, &app.API) app.Profile.SaveAsFollowersList(user.ID, trove) - go app.Profile.SaveTweetTrove(trove, true) + go app.Profile.SaveTweetTrove(trove, true, &app.API) } data, trove := NewFollowsData(app.Profile.GetFollowers(user.ID)) diff --git a/internal/webserver/stopwatch.go b/internal/webserver/stopwatch.go index d0ea02a..30921fb 100644 --- a/internal/webserver/stopwatch.go +++ b/internal/webserver/stopwatch.go @@ -37,8 +37,8 @@ func (app *Application) background_scrape() { return } fmt.Println("Saving scrape results...") - app.Profile.SaveTweetTrove(trove, false) - go app.Profile.SaveTweetTrove(trove, true) + app.Profile.SaveTweetTrove(trove, false, &app.API) + go app.Profile.SaveTweetTrove(trove, true, &app.API) fmt.Println("Scraping succeeded.") is_following_only = false } @@ -72,8 +72,8 @@ func (app *Application) background_user_likes_scrape() { return } fmt.Println("Saving scrape results...") - app.Profile.SaveTweetTrove(trove, false) - go app.Profile.SaveTweetTrove(trove, true) + app.Profile.SaveTweetTrove(trove, false, &app.API) + go app.Profile.SaveTweetTrove(trove, true, &app.API) fmt.Println("Scraping succeeded.") } @@ -113,8 +113,8 @@ func (app *Application) background_dm_polling_scrape() { panic(err) } fmt.Println("Saving DM results...") - app.Profile.SaveTweetTrove(trove, false) - go app.Profile.SaveTweetTrove(trove, true) + app.Profile.SaveTweetTrove(trove, false, &app.API) + go app.Profile.SaveTweetTrove(trove, true, &app.API) fmt.Println("Scraping DMs succeeded.") } diff --git a/pkg/persistence/media_download.go b/pkg/persistence/media_download.go index 834aa03..bdeec97 100644 --- a/pkg/persistence/media_download.go +++ b/pkg/persistence/media_download.go @@ -13,7 +13,9 @@ type MediaDownloader interface { Curl(url string, outpath string) error } -type DefaultDownloader struct{} +type DefaultDownloader struct { + *scraper.API +} // Download a file over HTTP and save it. // @@ -21,7 +23,7 @@ type DefaultDownloader struct{} // - url: the remote file to download // - outpath: the path on disk to save it to func (d DefaultDownloader) Curl(url string, outpath string) error { - data, err := scraper.DownloadMedia(url) + data, err := d.API.DownloadMedia(url) if err != nil { return fmt.Errorf("downloading %q:\n %w", url, err) } @@ -98,8 +100,8 @@ func (p Profile) download_link_thumbnail(url *scraper.Url, downloader MediaDownl // Download a tweet's video and picture content. // Wraps the `DownloadTweetContentWithInjector` method with the default (i.e., real) downloader. -func (p Profile) DownloadTweetContentFor(t *scraper.Tweet) error { - return p.DownloadTweetContentWithInjector(t, DefaultDownloader{}) +func (p Profile) DownloadTweetContentFor(t *scraper.Tweet, api *scraper.API) error { + return p.DownloadTweetContentWithInjector(t, DefaultDownloader{API: api}) } // Enable injecting a custom MediaDownloader (i.e., for testing) @@ -139,8 +141,8 @@ func (p Profile) DownloadTweetContentWithInjector(t *scraper.Tweet, downloader M } // Download a user's banner and profile images -func (p Profile) DownloadUserContentFor(u *scraper.User) error { - return p.DownloadUserContentWithInjector(u, DefaultDownloader{}) +func (p Profile) DownloadUserContentFor(u *scraper.User, api *scraper.API) error { + return p.DownloadUserContentWithInjector(u, DefaultDownloader{API: api}) } // Enable injecting a custom MediaDownloader (i.e., for testing) @@ -184,12 +186,12 @@ func (p Profile) DownloadUserContentWithInjector(u *scraper.User, downloader Med // Download a User's tiny profile image, if it hasn't been downloaded yet. // If it has been downloaded, do nothing. // If this user should have a big profile picture, defer to the regular `DownloadUserContentFor` method. -func (p Profile) DownloadUserProfileImageTiny(u *scraper.User) error { +func (p Profile) DownloadUserProfileImageTiny(u *scraper.User, api *scraper.API) error { if p.IsFollowing(*u) { - return p.DownloadUserContentFor(u) + return p.DownloadUserContentFor(u, api) } - d := DefaultDownloader{} + d := DefaultDownloader{API: api} outfile := path.Join(p.ProfileDir, "profile_images", u.GetTinyProfileImageLocalPath()) if file_exists(outfile) { diff --git a/pkg/persistence/tweet_trove_queries.go b/pkg/persistence/tweet_trove_queries.go index 50e090f..0d1bdbc 100644 --- a/pkg/persistence/tweet_trove_queries.go +++ b/pkg/persistence/tweet_trove_queries.go @@ -10,7 +10,7 @@ import ( // Convenience function that saves all the objects in a TweetTrove. // Panics if anything goes wrong. -func (p Profile) SaveTweetTrove(trove TweetTrove, should_download bool) { +func (p Profile) SaveTweetTrove(trove TweetTrove, should_download bool, api *API) { for i, u := range trove.Users { err := p.SaveUser(&u) if err != nil { @@ -41,7 +41,7 @@ func (p Profile) SaveTweetTrove(trove TweetTrove, should_download bool) { if should_download { // Download their tiny profile image - err = p.DownloadUserProfileImageTiny(&u) + err = p.DownloadUserProfileImageTiny(&u, api) if errors.Is(err, ErrRequestTimeout) { // Forget about it; if it's important someone will try again fmt.Printf("Failed to @%s's tiny profile image (%q): %s\n", u.Handle, u.ProfileImageUrl, err.Error()) @@ -65,7 +65,7 @@ func (p Profile) SaveTweetTrove(trove TweetTrove, should_download bool) { } if should_download { - err = p.DownloadTweetContentFor(&t) + err = p.DownloadTweetContentFor(&t, api) if errors.Is(err, ErrRequestTimeout) || errors.Is(err, ErrMediaDownload404) { // Forget about it; if it's important someone will try again fmt.Printf("Failed to download tweet ID %d: %s\n", t.ID, err.Error()) @@ -120,7 +120,7 @@ func (p Profile) SaveTweetTrove(trove TweetTrove, should_download bool) { // Download content if needed if should_download { - downloader := DefaultDownloader{} + downloader := DefaultDownloader{API: api} for _, img := range m.Images { // Check if it's already downloaded diff --git a/pkg/scraper/api_request_utils.go b/pkg/scraper/api_request_utils.go index e0f0b61..3b559b3 100644 --- a/pkg/scraper/api_request_utils.go +++ b/pkg/scraper/api_request_utils.go @@ -24,17 +24,6 @@ type API struct { CSRFToken string } -// Use a global API variable since it is needed in so many utility functions (e.g., -// tweet_trove.FillSpaceDetails, tweet_trove.FetchTombstoneUsers, etc.); this avoids having -// to inject it everywhere. -// -// Should be set by the caller (main program) depending on the session file used. -var the_api API - -// Initializer for the global api variable -func InitApi(newApi API) { - the_api = newApi -} type api_outstruct struct { Cookies []*http.Cookie @@ -327,10 +316,6 @@ func add_tweet_query_params(query *url.Values) { query.Add("count", "20") } -func DownloadMedia(url string) ([]byte, error) { - return the_api.DownloadMedia(url) -} - func (api *API) DownloadMedia(remote_url string) ([]byte, error) { fmt.Printf("Downloading: %s\n", remote_url) req, err := http.NewRequest("GET", remote_url, nil)