Remove scraper singleton pattern entirely
This commit is contained in:
parent
08e5a726a8
commit
e2ca9a975a
@ -129,15 +129,12 @@ func main() {
|
||||
*session_name = (*session_name)[:len(*session_name)-8]
|
||||
}
|
||||
api = profile.LoadSession(scraper.UserHandle(*session_name))
|
||||
scraper.InitApi(api)
|
||||
} else {
|
||||
var err error
|
||||
api, err = scraper.NewGuestSession()
|
||||
if err != nil {
|
||||
log.Warnf("Unable to initialize guest session! Might be a network issue")
|
||||
} else {
|
||||
scraper.InitApi(api)
|
||||
}
|
||||
} // Don't exit here, some operations don't require a connection
|
||||
}
|
||||
|
||||
switch operation {
|
||||
@ -335,7 +332,7 @@ func fetch_tweet_conversation(tweet_identifier string, how_many int) {
|
||||
if is_scrape_failure(err) {
|
||||
die(err.Error(), false, -1)
|
||||
}
|
||||
profile.SaveTweetTrove(trove, true)
|
||||
profile.SaveTweetTrove(trove, true, &api)
|
||||
|
||||
happy_exit(fmt.Sprintf("Saved %d tweets and %d users", len(trove.Tweets), len(trove.Users)), err)
|
||||
}
|
||||
@ -356,7 +353,7 @@ func fetch_user_feed(handle string, how_many int) {
|
||||
if is_scrape_failure(err) {
|
||||
die(fmt.Sprintf("Error scraping feed: %s\n %s", handle, err.Error()), false, -2)
|
||||
}
|
||||
profile.SaveTweetTrove(trove, true)
|
||||
profile.SaveTweetTrove(trove, true, &api)
|
||||
|
||||
happy_exit(
|
||||
fmt.Sprintf("Saved %d tweets, %d retweets and %d users", len(trove.Tweets), len(trove.Retweets), len(trove.Users)),
|
||||
@ -374,7 +371,7 @@ func get_user_likes(handle string, how_many int) {
|
||||
if is_scrape_failure(err) {
|
||||
die(fmt.Sprintf("Error scraping feed: %s\n %s", handle, err.Error()), false, -2)
|
||||
}
|
||||
profile.SaveTweetTrove(trove, true)
|
||||
profile.SaveTweetTrove(trove, true, &api)
|
||||
|
||||
happy_exit(
|
||||
fmt.Sprintf("Saved %d tweets, %d retweets and %d users", len(trove.Tweets), len(trove.Retweets), len(trove.Users)),
|
||||
@ -392,7 +389,7 @@ func get_followees(handle string, how_many int) {
|
||||
if is_scrape_failure(err) {
|
||||
die(fmt.Sprintf("Error getting followees: %s\n %s", handle, err.Error()), false, -2)
|
||||
}
|
||||
profile.SaveTweetTrove(trove, true)
|
||||
profile.SaveTweetTrove(trove, true, &api)
|
||||
profile.SaveAsFolloweesList(user.ID, trove)
|
||||
|
||||
happy_exit(fmt.Sprintf("Saved %d followees", len(trove.Users)), err)
|
||||
@ -406,7 +403,7 @@ func get_followers(handle string, how_many int) {
|
||||
if is_scrape_failure(err) {
|
||||
die(fmt.Sprintf("Error getting followees: %s\n %s", handle, err.Error()), false, -2)
|
||||
}
|
||||
profile.SaveTweetTrove(trove, true)
|
||||
profile.SaveTweetTrove(trove, true, &api)
|
||||
profile.SaveAsFollowersList(user.ID, trove)
|
||||
|
||||
happy_exit(fmt.Sprintf("Saved %d followers", len(trove.Users)), err)
|
||||
@ -416,7 +413,7 @@ func get_bookmarks(how_many int) {
|
||||
if is_scrape_failure(err) {
|
||||
die(fmt.Sprintf("Error scraping bookmarks:\n %s", err.Error()), false, -2)
|
||||
}
|
||||
profile.SaveTweetTrove(trove, true)
|
||||
profile.SaveTweetTrove(trove, true, &api)
|
||||
|
||||
happy_exit(fmt.Sprintf(
|
||||
"Saved %d tweets, %d retweets, %d users, and %d bookmarks",
|
||||
@ -429,7 +426,7 @@ func fetch_timeline(is_following_only bool) {
|
||||
if is_scrape_failure(err) {
|
||||
die(fmt.Sprintf("Error fetching timeline:\n %s", err.Error()), false, -2)
|
||||
}
|
||||
profile.SaveTweetTrove(trove, true)
|
||||
profile.SaveTweetTrove(trove, true, &api)
|
||||
|
||||
happy_exit(
|
||||
fmt.Sprintf("Saved %d tweets, %d retweets and %d users", len(trove.Tweets), len(trove.Retweets), len(trove.Users)),
|
||||
@ -447,7 +444,7 @@ func download_tweet_content(tweet_identifier string) {
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("Couldn't get tweet (ID %d) from database:\n %w", tweet_id, err))
|
||||
}
|
||||
err = profile.DownloadTweetContentFor(&tweet)
|
||||
err = profile.DownloadTweetContentFor(&tweet, &api)
|
||||
if err != nil {
|
||||
panic("Error getting content: " + err.Error())
|
||||
}
|
||||
@ -458,7 +455,7 @@ func download_user_content(handle scraper.UserHandle) {
|
||||
if err != nil {
|
||||
panic("Couldn't get the user from database: " + err.Error())
|
||||
}
|
||||
err = profile.DownloadUserContentFor(&user)
|
||||
err = profile.DownloadUserContentFor(&user, &api)
|
||||
if err != nil {
|
||||
panic("Error getting content: " + err.Error())
|
||||
}
|
||||
@ -469,7 +466,7 @@ func search(query string, how_many int) {
|
||||
if is_scrape_failure(err) {
|
||||
die(fmt.Sprintf("Error scraping search results: %s", err.Error()), false, -100)
|
||||
}
|
||||
profile.SaveTweetTrove(trove, true)
|
||||
profile.SaveTweetTrove(trove, true, &api)
|
||||
|
||||
happy_exit(fmt.Sprintf("Saved %d tweets and %d users", len(trove.Tweets), len(trove.Users)), err)
|
||||
}
|
||||
@ -532,7 +529,7 @@ func fetch_inbox(how_many int) {
|
||||
if err != nil {
|
||||
die(fmt.Sprintf("Failed to fetch inbox:\n %s", err.Error()), false, 1)
|
||||
}
|
||||
profile.SaveTweetTrove(trove, true)
|
||||
profile.SaveTweetTrove(trove, true, &api)
|
||||
happy_exit(fmt.Sprintf("Saved %d messages from %d chats", len(trove.Messages), len(trove.Rooms)), nil)
|
||||
}
|
||||
|
||||
@ -546,7 +543,7 @@ func fetch_dm(id string, how_many int) {
|
||||
if err != nil {
|
||||
die(fmt.Sprintf("Failed to fetch dm:\n %s", err.Error()), false, 1)
|
||||
}
|
||||
profile.SaveTweetTrove(trove, true)
|
||||
profile.SaveTweetTrove(trove, true, &api)
|
||||
happy_exit(
|
||||
fmt.Sprintf("Saved %d messages from %d chats", len(trove.Messages), len(trove.Rooms)),
|
||||
err,
|
||||
@ -563,7 +560,7 @@ func send_dm(room_id string, text string, in_reply_to_id int) {
|
||||
if err != nil {
|
||||
die(fmt.Sprintf("Failed to send dm:\n %s", err.Error()), false, 1)
|
||||
}
|
||||
profile.SaveTweetTrove(trove, true)
|
||||
profile.SaveTweetTrove(trove, true, &api)
|
||||
happy_exit(fmt.Sprintf("Saved %d messages from %d chats", len(trove.Messages), len(trove.Rooms)), nil)
|
||||
}
|
||||
|
||||
|
@ -26,8 +26,8 @@ func (app *Application) Bookmarks(w http.ResponseWriter, r *http.Request) {
|
||||
panic(err) // Return a toast
|
||||
}
|
||||
|
||||
app.Profile.SaveTweetTrove(trove, false)
|
||||
go app.Profile.SaveTweetTrove(trove, true)
|
||||
app.Profile.SaveTweetTrove(trove, false, &app.API)
|
||||
go app.Profile.SaveTweetTrove(trove, true, &app.API)
|
||||
}
|
||||
|
||||
c := persistence.NewUserFeedBookmarksCursor(app.ActiveUser.Handle)
|
||||
|
@ -75,7 +75,7 @@ func (app *Application) after_login(w http.ResponseWriter, r *http.Request, api
|
||||
return
|
||||
}
|
||||
panic_if(app.Profile.SaveUser(&user))
|
||||
panic_if(app.Profile.DownloadUserContentFor(&user))
|
||||
panic_if(app.Profile.DownloadUserContentFor(&user, &app.API))
|
||||
|
||||
// Now that the user is scraped for sure, set them as the logged-in user
|
||||
err = app.SetActiveUser(api.UserHandle)
|
||||
@ -88,8 +88,8 @@ func (app *Application) after_login(w http.ResponseWriter, r *http.Request, api
|
||||
http.Redirect(w, r, "/", 303)
|
||||
}
|
||||
fmt.Println("Saving initial feed results...")
|
||||
app.Profile.SaveTweetTrove(trove, false)
|
||||
go app.Profile.SaveTweetTrove(trove, true)
|
||||
app.Profile.SaveTweetTrove(trove, false, &app.API)
|
||||
go app.Profile.SaveTweetTrove(trove, true, &app.API)
|
||||
|
||||
// Scrape the user's followers
|
||||
trove, err = app.API.GetFollowees(user.ID, 1000)
|
||||
@ -97,9 +97,9 @@ func (app *Application) after_login(w http.ResponseWriter, r *http.Request, api
|
||||
app.ErrorLog.Printf("Failed to scrape followers: %s", err.Error())
|
||||
http.Redirect(w, r, "/", 303)
|
||||
}
|
||||
app.Profile.SaveTweetTrove(trove, false)
|
||||
app.Profile.SaveTweetTrove(trove, false, &app.API)
|
||||
app.Profile.SaveAsFolloweesList(user.ID, trove)
|
||||
go app.Profile.SaveTweetTrove(trove, true)
|
||||
go app.Profile.SaveTweetTrove(trove, true, &app.API)
|
||||
|
||||
// Redirect to Timeline
|
||||
http.Redirect(w, r, "/", 303)
|
||||
|
@ -70,8 +70,8 @@ func (app *Application) message_send(w http.ResponseWriter, r *http.Request) {
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
app.Profile.SaveTweetTrove(trove, false)
|
||||
go app.Profile.SaveTweetTrove(trove, true)
|
||||
app.Profile.SaveTweetTrove(trove, false, &app.API)
|
||||
go app.Profile.SaveTweetTrove(trove, true, &app.API)
|
||||
}
|
||||
|
||||
func (app *Application) message_detail(w http.ResponseWriter, r *http.Request) {
|
||||
@ -122,8 +122,8 @@ func (app *Application) message_detail(w http.ResponseWriter, r *http.Request) {
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
app.Profile.SaveTweetTrove(trove, false)
|
||||
go app.Profile.SaveTweetTrove(trove, true) // Download the content in the background
|
||||
app.Profile.SaveTweetTrove(trove, false, &app.API)
|
||||
go app.Profile.SaveTweetTrove(trove, true, &app.API) // Download the content in the background
|
||||
}
|
||||
|
||||
// `LatestPollingTimestamp` sort of passes-through the function; if we're not updating it, it
|
||||
|
@ -108,8 +108,8 @@ func (app *Application) Search(w http.ResponseWriter, r *http.Request) {
|
||||
app.ErrorLog.Print(err)
|
||||
// TOOD: show error in UI
|
||||
}
|
||||
app.Profile.SaveTweetTrove(trove, false)
|
||||
go app.Profile.SaveTweetTrove(trove, true)
|
||||
app.Profile.SaveTweetTrove(trove, false, &app.API)
|
||||
go app.Profile.SaveTweetTrove(trove, true, &app.API)
|
||||
}
|
||||
|
||||
c, err := persistence.NewCursorFromSearchQuery(search_text)
|
||||
|
@ -53,8 +53,8 @@ func (app *Application) ensure_tweet(id scraper.TweetID, is_forced bool, is_conv
|
||||
|
||||
// Save the trove unless there was an unrecoverable error
|
||||
if err == nil || errors.Is(err, scraper.END_OF_FEED) || errors.Is(err, scraper.ErrRateLimited) {
|
||||
app.Profile.SaveTweetTrove(trove, false)
|
||||
go app.Profile.SaveTweetTrove(trove, true) // Download the content in the background
|
||||
app.Profile.SaveTweetTrove(trove, false, &app.API)
|
||||
go app.Profile.SaveTweetTrove(trove, true, &app.API) // Download the content in the background
|
||||
_, is_available = trove.Tweets[id]
|
||||
}
|
||||
|
||||
|
@ -25,7 +25,7 @@ func (app *Application) UserFeed(w http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
panic_if(app.Profile.SaveUser(&user))
|
||||
panic_if(app.Profile.DownloadUserContentFor(&user))
|
||||
panic_if(app.Profile.DownloadUserContentFor(&user, &app.API))
|
||||
}
|
||||
|
||||
if len(parts) > 1 && parts[1] == "followers" {
|
||||
@ -51,16 +51,16 @@ func (app *Application) UserFeed(w http.ResponseWriter, r *http.Request) {
|
||||
app.ErrorLog.Print(err)
|
||||
// TOOD: show error in UI
|
||||
}
|
||||
app.Profile.SaveTweetTrove(trove, false)
|
||||
go app.Profile.SaveTweetTrove(trove, true)
|
||||
app.Profile.SaveTweetTrove(trove, false, &app.API)
|
||||
go app.Profile.SaveTweetTrove(trove, true, &app.API)
|
||||
} else if len(parts) == 2 && parts[1] == "likes" {
|
||||
trove, err := app.API.GetUserLikes(user.ID, 50) // TODO: parameterizable
|
||||
if err != nil {
|
||||
app.ErrorLog.Print(err)
|
||||
// TOOD: show error in UI
|
||||
}
|
||||
app.Profile.SaveTweetTrove(trove, false)
|
||||
go app.Profile.SaveTweetTrove(trove, true)
|
||||
app.Profile.SaveTweetTrove(trove, false, &app.API)
|
||||
go app.Profile.SaveTweetTrove(trove, true, &app.API)
|
||||
}
|
||||
}
|
||||
|
||||
@ -163,9 +163,9 @@ func (app *Application) UserFollowees(w http.ResponseWriter, r *http.Request, us
|
||||
app.ErrorLog.Print(err)
|
||||
// TOOD: show error in UI
|
||||
}
|
||||
app.Profile.SaveTweetTrove(trove, false)
|
||||
app.Profile.SaveTweetTrove(trove, false, &app.API)
|
||||
app.Profile.SaveAsFolloweesList(user.ID, trove)
|
||||
go app.Profile.SaveTweetTrove(trove, true)
|
||||
go app.Profile.SaveTweetTrove(trove, true, &app.API)
|
||||
}
|
||||
|
||||
data, trove := NewFollowsData(app.Profile.GetFollowees(user.ID))
|
||||
@ -189,9 +189,9 @@ func (app *Application) UserFollowers(w http.ResponseWriter, r *http.Request, us
|
||||
app.ErrorLog.Print(err)
|
||||
// TOOD: show error in UI
|
||||
}
|
||||
app.Profile.SaveTweetTrove(trove, false)
|
||||
app.Profile.SaveTweetTrove(trove, false, &app.API)
|
||||
app.Profile.SaveAsFollowersList(user.ID, trove)
|
||||
go app.Profile.SaveTweetTrove(trove, true)
|
||||
go app.Profile.SaveTweetTrove(trove, true, &app.API)
|
||||
}
|
||||
|
||||
data, trove := NewFollowsData(app.Profile.GetFollowers(user.ID))
|
||||
|
@ -37,8 +37,8 @@ func (app *Application) background_scrape() {
|
||||
return
|
||||
}
|
||||
fmt.Println("Saving scrape results...")
|
||||
app.Profile.SaveTweetTrove(trove, false)
|
||||
go app.Profile.SaveTweetTrove(trove, true)
|
||||
app.Profile.SaveTweetTrove(trove, false, &app.API)
|
||||
go app.Profile.SaveTweetTrove(trove, true, &app.API)
|
||||
fmt.Println("Scraping succeeded.")
|
||||
is_following_only = false
|
||||
}
|
||||
@ -72,8 +72,8 @@ func (app *Application) background_user_likes_scrape() {
|
||||
return
|
||||
}
|
||||
fmt.Println("Saving scrape results...")
|
||||
app.Profile.SaveTweetTrove(trove, false)
|
||||
go app.Profile.SaveTweetTrove(trove, true)
|
||||
app.Profile.SaveTweetTrove(trove, false, &app.API)
|
||||
go app.Profile.SaveTweetTrove(trove, true, &app.API)
|
||||
fmt.Println("Scraping succeeded.")
|
||||
}
|
||||
|
||||
@ -113,8 +113,8 @@ func (app *Application) background_dm_polling_scrape() {
|
||||
panic(err)
|
||||
}
|
||||
fmt.Println("Saving DM results...")
|
||||
app.Profile.SaveTweetTrove(trove, false)
|
||||
go app.Profile.SaveTweetTrove(trove, true)
|
||||
app.Profile.SaveTweetTrove(trove, false, &app.API)
|
||||
go app.Profile.SaveTweetTrove(trove, true, &app.API)
|
||||
fmt.Println("Scraping DMs succeeded.")
|
||||
}
|
||||
|
||||
|
@ -13,7 +13,9 @@ type MediaDownloader interface {
|
||||
Curl(url string, outpath string) error
|
||||
}
|
||||
|
||||
type DefaultDownloader struct{}
|
||||
type DefaultDownloader struct {
|
||||
*scraper.API
|
||||
}
|
||||
|
||||
// Download a file over HTTP and save it.
|
||||
//
|
||||
@ -21,7 +23,7 @@ type DefaultDownloader struct{}
|
||||
// - url: the remote file to download
|
||||
// - outpath: the path on disk to save it to
|
||||
func (d DefaultDownloader) Curl(url string, outpath string) error {
|
||||
data, err := scraper.DownloadMedia(url)
|
||||
data, err := d.API.DownloadMedia(url)
|
||||
if err != nil {
|
||||
return fmt.Errorf("downloading %q:\n %w", url, err)
|
||||
}
|
||||
@ -98,8 +100,8 @@ func (p Profile) download_link_thumbnail(url *scraper.Url, downloader MediaDownl
|
||||
|
||||
// Download a tweet's video and picture content.
|
||||
// Wraps the `DownloadTweetContentWithInjector` method with the default (i.e., real) downloader.
|
||||
func (p Profile) DownloadTweetContentFor(t *scraper.Tweet) error {
|
||||
return p.DownloadTweetContentWithInjector(t, DefaultDownloader{})
|
||||
func (p Profile) DownloadTweetContentFor(t *scraper.Tweet, api *scraper.API) error {
|
||||
return p.DownloadTweetContentWithInjector(t, DefaultDownloader{API: api})
|
||||
}
|
||||
|
||||
// Enable injecting a custom MediaDownloader (i.e., for testing)
|
||||
@ -139,8 +141,8 @@ func (p Profile) DownloadTweetContentWithInjector(t *scraper.Tweet, downloader M
|
||||
}
|
||||
|
||||
// Download a user's banner and profile images
|
||||
func (p Profile) DownloadUserContentFor(u *scraper.User) error {
|
||||
return p.DownloadUserContentWithInjector(u, DefaultDownloader{})
|
||||
func (p Profile) DownloadUserContentFor(u *scraper.User, api *scraper.API) error {
|
||||
return p.DownloadUserContentWithInjector(u, DefaultDownloader{API: api})
|
||||
}
|
||||
|
||||
// Enable injecting a custom MediaDownloader (i.e., for testing)
|
||||
@ -184,12 +186,12 @@ func (p Profile) DownloadUserContentWithInjector(u *scraper.User, downloader Med
|
||||
// Download a User's tiny profile image, if it hasn't been downloaded yet.
|
||||
// If it has been downloaded, do nothing.
|
||||
// If this user should have a big profile picture, defer to the regular `DownloadUserContentFor` method.
|
||||
func (p Profile) DownloadUserProfileImageTiny(u *scraper.User) error {
|
||||
func (p Profile) DownloadUserProfileImageTiny(u *scraper.User, api *scraper.API) error {
|
||||
if p.IsFollowing(*u) {
|
||||
return p.DownloadUserContentFor(u)
|
||||
return p.DownloadUserContentFor(u, api)
|
||||
}
|
||||
|
||||
d := DefaultDownloader{}
|
||||
d := DefaultDownloader{API: api}
|
||||
|
||||
outfile := path.Join(p.ProfileDir, "profile_images", u.GetTinyProfileImageLocalPath())
|
||||
if file_exists(outfile) {
|
||||
|
@ -10,7 +10,7 @@ import (
|
||||
|
||||
// Convenience function that saves all the objects in a TweetTrove.
|
||||
// Panics if anything goes wrong.
|
||||
func (p Profile) SaveTweetTrove(trove TweetTrove, should_download bool) {
|
||||
func (p Profile) SaveTweetTrove(trove TweetTrove, should_download bool, api *API) {
|
||||
for i, u := range trove.Users {
|
||||
err := p.SaveUser(&u)
|
||||
if err != nil {
|
||||
@ -41,7 +41,7 @@ func (p Profile) SaveTweetTrove(trove TweetTrove, should_download bool) {
|
||||
|
||||
if should_download {
|
||||
// Download their tiny profile image
|
||||
err = p.DownloadUserProfileImageTiny(&u)
|
||||
err = p.DownloadUserProfileImageTiny(&u, api)
|
||||
if errors.Is(err, ErrRequestTimeout) {
|
||||
// Forget about it; if it's important someone will try again
|
||||
fmt.Printf("Failed to @%s's tiny profile image (%q): %s\n", u.Handle, u.ProfileImageUrl, err.Error())
|
||||
@ -65,7 +65,7 @@ func (p Profile) SaveTweetTrove(trove TweetTrove, should_download bool) {
|
||||
}
|
||||
|
||||
if should_download {
|
||||
err = p.DownloadTweetContentFor(&t)
|
||||
err = p.DownloadTweetContentFor(&t, api)
|
||||
if errors.Is(err, ErrRequestTimeout) || errors.Is(err, ErrMediaDownload404) {
|
||||
// Forget about it; if it's important someone will try again
|
||||
fmt.Printf("Failed to download tweet ID %d: %s\n", t.ID, err.Error())
|
||||
@ -120,7 +120,7 @@ func (p Profile) SaveTweetTrove(trove TweetTrove, should_download bool) {
|
||||
|
||||
// Download content if needed
|
||||
if should_download {
|
||||
downloader := DefaultDownloader{}
|
||||
downloader := DefaultDownloader{API: api}
|
||||
|
||||
for _, img := range m.Images {
|
||||
// Check if it's already downloaded
|
||||
|
@ -24,17 +24,6 @@ type API struct {
|
||||
CSRFToken string
|
||||
}
|
||||
|
||||
// Use a global API variable since it is needed in so many utility functions (e.g.,
|
||||
// tweet_trove.FillSpaceDetails, tweet_trove.FetchTombstoneUsers, etc.); this avoids having
|
||||
// to inject it everywhere.
|
||||
//
|
||||
// Should be set by the caller (main program) depending on the session file used.
|
||||
var the_api API
|
||||
|
||||
// Initializer for the global api variable
|
||||
func InitApi(newApi API) {
|
||||
the_api = newApi
|
||||
}
|
||||
|
||||
type api_outstruct struct {
|
||||
Cookies []*http.Cookie
|
||||
@ -327,10 +316,6 @@ func add_tweet_query_params(query *url.Values) {
|
||||
query.Add("count", "20")
|
||||
}
|
||||
|
||||
func DownloadMedia(url string) ([]byte, error) {
|
||||
return the_api.DownloadMedia(url)
|
||||
}
|
||||
|
||||
func (api *API) DownloadMedia(remote_url string) ([]byte, error) {
|
||||
fmt.Printf("Downloading: %s\n", remote_url)
|
||||
req, err := http.NewRequest("GET", remote_url, nil)
|
||||
|
Loading…
x
Reference in New Issue
Block a user