Remove scraper singleton pattern entirely

This commit is contained in:
Alessio 2024-08-19 17:27:54 -07:00
parent 08e5a726a8
commit e2ca9a975a
11 changed files with 59 additions and 75 deletions

View File

@ -129,15 +129,12 @@ func main() {
*session_name = (*session_name)[:len(*session_name)-8]
}
api = profile.LoadSession(scraper.UserHandle(*session_name))
scraper.InitApi(api)
} else {
var err error
api, err = scraper.NewGuestSession()
if err != nil {
log.Warnf("Unable to initialize guest session! Might be a network issue")
} else {
scraper.InitApi(api)
}
} // Don't exit here, some operations don't require a connection
}
switch operation {
@ -335,7 +332,7 @@ func fetch_tweet_conversation(tweet_identifier string, how_many int) {
if is_scrape_failure(err) {
die(err.Error(), false, -1)
}
profile.SaveTweetTrove(trove, true)
profile.SaveTweetTrove(trove, true, &api)
happy_exit(fmt.Sprintf("Saved %d tweets and %d users", len(trove.Tweets), len(trove.Users)), err)
}
@ -356,7 +353,7 @@ func fetch_user_feed(handle string, how_many int) {
if is_scrape_failure(err) {
die(fmt.Sprintf("Error scraping feed: %s\n %s", handle, err.Error()), false, -2)
}
profile.SaveTweetTrove(trove, true)
profile.SaveTweetTrove(trove, true, &api)
happy_exit(
fmt.Sprintf("Saved %d tweets, %d retweets and %d users", len(trove.Tweets), len(trove.Retweets), len(trove.Users)),
@ -374,7 +371,7 @@ func get_user_likes(handle string, how_many int) {
if is_scrape_failure(err) {
die(fmt.Sprintf("Error scraping feed: %s\n %s", handle, err.Error()), false, -2)
}
profile.SaveTweetTrove(trove, true)
profile.SaveTweetTrove(trove, true, &api)
happy_exit(
fmt.Sprintf("Saved %d tweets, %d retweets and %d users", len(trove.Tweets), len(trove.Retweets), len(trove.Users)),
@ -392,7 +389,7 @@ func get_followees(handle string, how_many int) {
if is_scrape_failure(err) {
die(fmt.Sprintf("Error getting followees: %s\n %s", handle, err.Error()), false, -2)
}
profile.SaveTweetTrove(trove, true)
profile.SaveTweetTrove(trove, true, &api)
profile.SaveAsFolloweesList(user.ID, trove)
happy_exit(fmt.Sprintf("Saved %d followees", len(trove.Users)), err)
@ -406,7 +403,7 @@ func get_followers(handle string, how_many int) {
if is_scrape_failure(err) {
die(fmt.Sprintf("Error getting followees: %s\n %s", handle, err.Error()), false, -2)
}
profile.SaveTweetTrove(trove, true)
profile.SaveTweetTrove(trove, true, &api)
profile.SaveAsFollowersList(user.ID, trove)
happy_exit(fmt.Sprintf("Saved %d followers", len(trove.Users)), err)
@ -416,7 +413,7 @@ func get_bookmarks(how_many int) {
if is_scrape_failure(err) {
die(fmt.Sprintf("Error scraping bookmarks:\n %s", err.Error()), false, -2)
}
profile.SaveTweetTrove(trove, true)
profile.SaveTweetTrove(trove, true, &api)
happy_exit(fmt.Sprintf(
"Saved %d tweets, %d retweets, %d users, and %d bookmarks",
@ -429,7 +426,7 @@ func fetch_timeline(is_following_only bool) {
if is_scrape_failure(err) {
die(fmt.Sprintf("Error fetching timeline:\n %s", err.Error()), false, -2)
}
profile.SaveTweetTrove(trove, true)
profile.SaveTweetTrove(trove, true, &api)
happy_exit(
fmt.Sprintf("Saved %d tweets, %d retweets and %d users", len(trove.Tweets), len(trove.Retweets), len(trove.Users)),
@ -447,7 +444,7 @@ func download_tweet_content(tweet_identifier string) {
if err != nil {
panic(fmt.Errorf("Couldn't get tweet (ID %d) from database:\n %w", tweet_id, err))
}
err = profile.DownloadTweetContentFor(&tweet)
err = profile.DownloadTweetContentFor(&tweet, &api)
if err != nil {
panic("Error getting content: " + err.Error())
}
@ -458,7 +455,7 @@ func download_user_content(handle scraper.UserHandle) {
if err != nil {
panic("Couldn't get the user from database: " + err.Error())
}
err = profile.DownloadUserContentFor(&user)
err = profile.DownloadUserContentFor(&user, &api)
if err != nil {
panic("Error getting content: " + err.Error())
}
@ -469,7 +466,7 @@ func search(query string, how_many int) {
if is_scrape_failure(err) {
die(fmt.Sprintf("Error scraping search results: %s", err.Error()), false, -100)
}
profile.SaveTweetTrove(trove, true)
profile.SaveTweetTrove(trove, true, &api)
happy_exit(fmt.Sprintf("Saved %d tweets and %d users", len(trove.Tweets), len(trove.Users)), err)
}
@ -532,7 +529,7 @@ func fetch_inbox(how_many int) {
if err != nil {
die(fmt.Sprintf("Failed to fetch inbox:\n %s", err.Error()), false, 1)
}
profile.SaveTweetTrove(trove, true)
profile.SaveTweetTrove(trove, true, &api)
happy_exit(fmt.Sprintf("Saved %d messages from %d chats", len(trove.Messages), len(trove.Rooms)), nil)
}
@ -546,7 +543,7 @@ func fetch_dm(id string, how_many int) {
if err != nil {
die(fmt.Sprintf("Failed to fetch dm:\n %s", err.Error()), false, 1)
}
profile.SaveTweetTrove(trove, true)
profile.SaveTweetTrove(trove, true, &api)
happy_exit(
fmt.Sprintf("Saved %d messages from %d chats", len(trove.Messages), len(trove.Rooms)),
err,
@ -563,7 +560,7 @@ func send_dm(room_id string, text string, in_reply_to_id int) {
if err != nil {
die(fmt.Sprintf("Failed to send dm:\n %s", err.Error()), false, 1)
}
profile.SaveTweetTrove(trove, true)
profile.SaveTweetTrove(trove, true, &api)
happy_exit(fmt.Sprintf("Saved %d messages from %d chats", len(trove.Messages), len(trove.Rooms)), nil)
}

View File

@ -26,8 +26,8 @@ func (app *Application) Bookmarks(w http.ResponseWriter, r *http.Request) {
panic(err) // Return a toast
}
app.Profile.SaveTweetTrove(trove, false)
go app.Profile.SaveTweetTrove(trove, true)
app.Profile.SaveTweetTrove(trove, false, &app.API)
go app.Profile.SaveTweetTrove(trove, true, &app.API)
}
c := persistence.NewUserFeedBookmarksCursor(app.ActiveUser.Handle)

View File

@ -75,7 +75,7 @@ func (app *Application) after_login(w http.ResponseWriter, r *http.Request, api
return
}
panic_if(app.Profile.SaveUser(&user))
panic_if(app.Profile.DownloadUserContentFor(&user))
panic_if(app.Profile.DownloadUserContentFor(&user, &app.API))
// Now that the user is scraped for sure, set them as the logged-in user
err = app.SetActiveUser(api.UserHandle)
@ -88,8 +88,8 @@ func (app *Application) after_login(w http.ResponseWriter, r *http.Request, api
http.Redirect(w, r, "/", 303)
}
fmt.Println("Saving initial feed results...")
app.Profile.SaveTweetTrove(trove, false)
go app.Profile.SaveTweetTrove(trove, true)
app.Profile.SaveTweetTrove(trove, false, &app.API)
go app.Profile.SaveTweetTrove(trove, true, &app.API)
// Scrape the user's followers
trove, err = app.API.GetFollowees(user.ID, 1000)
@ -97,9 +97,9 @@ func (app *Application) after_login(w http.ResponseWriter, r *http.Request, api
app.ErrorLog.Printf("Failed to scrape followers: %s", err.Error())
http.Redirect(w, r, "/", 303)
}
app.Profile.SaveTweetTrove(trove, false)
app.Profile.SaveTweetTrove(trove, false, &app.API)
app.Profile.SaveAsFolloweesList(user.ID, trove)
go app.Profile.SaveTweetTrove(trove, true)
go app.Profile.SaveTweetTrove(trove, true, &app.API)
// Redirect to Timeline
http.Redirect(w, r, "/", 303)

View File

@ -70,8 +70,8 @@ func (app *Application) message_send(w http.ResponseWriter, r *http.Request) {
if err != nil {
panic(err)
}
app.Profile.SaveTweetTrove(trove, false)
go app.Profile.SaveTweetTrove(trove, true)
app.Profile.SaveTweetTrove(trove, false, &app.API)
go app.Profile.SaveTweetTrove(trove, true, &app.API)
}
func (app *Application) message_detail(w http.ResponseWriter, r *http.Request) {
@ -122,8 +122,8 @@ func (app *Application) message_detail(w http.ResponseWriter, r *http.Request) {
if err != nil {
panic(err)
}
app.Profile.SaveTweetTrove(trove, false)
go app.Profile.SaveTweetTrove(trove, true) // Download the content in the background
app.Profile.SaveTweetTrove(trove, false, &app.API)
go app.Profile.SaveTweetTrove(trove, true, &app.API) // Download the content in the background
}
// `LatestPollingTimestamp` sort of passes-through the function; if we're not updating it, it

View File

@ -108,8 +108,8 @@ func (app *Application) Search(w http.ResponseWriter, r *http.Request) {
app.ErrorLog.Print(err)
// TOOD: show error in UI
}
app.Profile.SaveTweetTrove(trove, false)
go app.Profile.SaveTweetTrove(trove, true)
app.Profile.SaveTweetTrove(trove, false, &app.API)
go app.Profile.SaveTweetTrove(trove, true, &app.API)
}
c, err := persistence.NewCursorFromSearchQuery(search_text)

View File

@ -53,8 +53,8 @@ func (app *Application) ensure_tweet(id scraper.TweetID, is_forced bool, is_conv
// Save the trove unless there was an unrecoverable error
if err == nil || errors.Is(err, scraper.END_OF_FEED) || errors.Is(err, scraper.ErrRateLimited) {
app.Profile.SaveTweetTrove(trove, false)
go app.Profile.SaveTweetTrove(trove, true) // Download the content in the background
app.Profile.SaveTweetTrove(trove, false, &app.API)
go app.Profile.SaveTweetTrove(trove, true, &app.API) // Download the content in the background
_, is_available = trove.Tweets[id]
}

View File

@ -25,7 +25,7 @@ func (app *Application) UserFeed(w http.ResponseWriter, r *http.Request) {
return
}
panic_if(app.Profile.SaveUser(&user))
panic_if(app.Profile.DownloadUserContentFor(&user))
panic_if(app.Profile.DownloadUserContentFor(&user, &app.API))
}
if len(parts) > 1 && parts[1] == "followers" {
@ -51,16 +51,16 @@ func (app *Application) UserFeed(w http.ResponseWriter, r *http.Request) {
app.ErrorLog.Print(err)
// TOOD: show error in UI
}
app.Profile.SaveTweetTrove(trove, false)
go app.Profile.SaveTweetTrove(trove, true)
app.Profile.SaveTweetTrove(trove, false, &app.API)
go app.Profile.SaveTweetTrove(trove, true, &app.API)
} else if len(parts) == 2 && parts[1] == "likes" {
trove, err := app.API.GetUserLikes(user.ID, 50) // TODO: parameterizable
if err != nil {
app.ErrorLog.Print(err)
// TOOD: show error in UI
}
app.Profile.SaveTweetTrove(trove, false)
go app.Profile.SaveTweetTrove(trove, true)
app.Profile.SaveTweetTrove(trove, false, &app.API)
go app.Profile.SaveTweetTrove(trove, true, &app.API)
}
}
@ -163,9 +163,9 @@ func (app *Application) UserFollowees(w http.ResponseWriter, r *http.Request, us
app.ErrorLog.Print(err)
// TOOD: show error in UI
}
app.Profile.SaveTweetTrove(trove, false)
app.Profile.SaveTweetTrove(trove, false, &app.API)
app.Profile.SaveAsFolloweesList(user.ID, trove)
go app.Profile.SaveTweetTrove(trove, true)
go app.Profile.SaveTweetTrove(trove, true, &app.API)
}
data, trove := NewFollowsData(app.Profile.GetFollowees(user.ID))
@ -189,9 +189,9 @@ func (app *Application) UserFollowers(w http.ResponseWriter, r *http.Request, us
app.ErrorLog.Print(err)
// TOOD: show error in UI
}
app.Profile.SaveTweetTrove(trove, false)
app.Profile.SaveTweetTrove(trove, false, &app.API)
app.Profile.SaveAsFollowersList(user.ID, trove)
go app.Profile.SaveTweetTrove(trove, true)
go app.Profile.SaveTweetTrove(trove, true, &app.API)
}
data, trove := NewFollowsData(app.Profile.GetFollowers(user.ID))

View File

@ -37,8 +37,8 @@ func (app *Application) background_scrape() {
return
}
fmt.Println("Saving scrape results...")
app.Profile.SaveTweetTrove(trove, false)
go app.Profile.SaveTweetTrove(trove, true)
app.Profile.SaveTweetTrove(trove, false, &app.API)
go app.Profile.SaveTweetTrove(trove, true, &app.API)
fmt.Println("Scraping succeeded.")
is_following_only = false
}
@ -72,8 +72,8 @@ func (app *Application) background_user_likes_scrape() {
return
}
fmt.Println("Saving scrape results...")
app.Profile.SaveTweetTrove(trove, false)
go app.Profile.SaveTweetTrove(trove, true)
app.Profile.SaveTweetTrove(trove, false, &app.API)
go app.Profile.SaveTweetTrove(trove, true, &app.API)
fmt.Println("Scraping succeeded.")
}
@ -113,8 +113,8 @@ func (app *Application) background_dm_polling_scrape() {
panic(err)
}
fmt.Println("Saving DM results...")
app.Profile.SaveTweetTrove(trove, false)
go app.Profile.SaveTweetTrove(trove, true)
app.Profile.SaveTweetTrove(trove, false, &app.API)
go app.Profile.SaveTweetTrove(trove, true, &app.API)
fmt.Println("Scraping DMs succeeded.")
}

View File

@ -13,7 +13,9 @@ type MediaDownloader interface {
Curl(url string, outpath string) error
}
type DefaultDownloader struct{}
type DefaultDownloader struct {
*scraper.API
}
// Download a file over HTTP and save it.
//
@ -21,7 +23,7 @@ type DefaultDownloader struct{}
// - url: the remote file to download
// - outpath: the path on disk to save it to
func (d DefaultDownloader) Curl(url string, outpath string) error {
data, err := scraper.DownloadMedia(url)
data, err := d.API.DownloadMedia(url)
if err != nil {
return fmt.Errorf("downloading %q:\n %w", url, err)
}
@ -98,8 +100,8 @@ func (p Profile) download_link_thumbnail(url *scraper.Url, downloader MediaDownl
// Download a tweet's video and picture content.
// Wraps the `DownloadTweetContentWithInjector` method with the default (i.e., real) downloader.
func (p Profile) DownloadTweetContentFor(t *scraper.Tweet) error {
return p.DownloadTweetContentWithInjector(t, DefaultDownloader{})
func (p Profile) DownloadTweetContentFor(t *scraper.Tweet, api *scraper.API) error {
return p.DownloadTweetContentWithInjector(t, DefaultDownloader{API: api})
}
// Enable injecting a custom MediaDownloader (i.e., for testing)
@ -139,8 +141,8 @@ func (p Profile) DownloadTweetContentWithInjector(t *scraper.Tweet, downloader M
}
// Download a user's banner and profile images
func (p Profile) DownloadUserContentFor(u *scraper.User) error {
return p.DownloadUserContentWithInjector(u, DefaultDownloader{})
func (p Profile) DownloadUserContentFor(u *scraper.User, api *scraper.API) error {
return p.DownloadUserContentWithInjector(u, DefaultDownloader{API: api})
}
// Enable injecting a custom MediaDownloader (i.e., for testing)
@ -184,12 +186,12 @@ func (p Profile) DownloadUserContentWithInjector(u *scraper.User, downloader Med
// Download a User's tiny profile image, if it hasn't been downloaded yet.
// If it has been downloaded, do nothing.
// If this user should have a big profile picture, defer to the regular `DownloadUserContentFor` method.
func (p Profile) DownloadUserProfileImageTiny(u *scraper.User) error {
func (p Profile) DownloadUserProfileImageTiny(u *scraper.User, api *scraper.API) error {
if p.IsFollowing(*u) {
return p.DownloadUserContentFor(u)
return p.DownloadUserContentFor(u, api)
}
d := DefaultDownloader{}
d := DefaultDownloader{API: api}
outfile := path.Join(p.ProfileDir, "profile_images", u.GetTinyProfileImageLocalPath())
if file_exists(outfile) {

View File

@ -10,7 +10,7 @@ import (
// Convenience function that saves all the objects in a TweetTrove.
// Panics if anything goes wrong.
func (p Profile) SaveTweetTrove(trove TweetTrove, should_download bool) {
func (p Profile) SaveTweetTrove(trove TweetTrove, should_download bool, api *API) {
for i, u := range trove.Users {
err := p.SaveUser(&u)
if err != nil {
@ -41,7 +41,7 @@ func (p Profile) SaveTweetTrove(trove TweetTrove, should_download bool) {
if should_download {
// Download their tiny profile image
err = p.DownloadUserProfileImageTiny(&u)
err = p.DownloadUserProfileImageTiny(&u, api)
if errors.Is(err, ErrRequestTimeout) {
// Forget about it; if it's important someone will try again
fmt.Printf("Failed to @%s's tiny profile image (%q): %s\n", u.Handle, u.ProfileImageUrl, err.Error())
@ -65,7 +65,7 @@ func (p Profile) SaveTweetTrove(trove TweetTrove, should_download bool) {
}
if should_download {
err = p.DownloadTweetContentFor(&t)
err = p.DownloadTweetContentFor(&t, api)
if errors.Is(err, ErrRequestTimeout) || errors.Is(err, ErrMediaDownload404) {
// Forget about it; if it's important someone will try again
fmt.Printf("Failed to download tweet ID %d: %s\n", t.ID, err.Error())
@ -120,7 +120,7 @@ func (p Profile) SaveTweetTrove(trove TweetTrove, should_download bool) {
// Download content if needed
if should_download {
downloader := DefaultDownloader{}
downloader := DefaultDownloader{API: api}
for _, img := range m.Images {
// Check if it's already downloaded

View File

@ -24,17 +24,6 @@ type API struct {
CSRFToken string
}
// Use a global API variable since it is needed in so many utility functions (e.g.,
// tweet_trove.FillSpaceDetails, tweet_trove.FetchTombstoneUsers, etc.); this avoids having
// to inject it everywhere.
//
// Should be set by the caller (main program) depending on the session file used.
var the_api API
// Initializer for the global api variable
func InitApi(newApi API) {
the_api = newApi
}
type api_outstruct struct {
Cookies []*http.Cookie
@ -327,10 +316,6 @@ func add_tweet_query_params(query *url.Values) {
query.Add("count", "20")
}
func DownloadMedia(url string) ([]byte, error) {
return the_api.DownloadMedia(url)
}
func (api *API) DownloadMedia(remote_url string) ([]byte, error) {
fmt.Printf("Downloading: %s\n", remote_url)
req, err := http.NewRequest("GET", remote_url, nil)