REFACTOR: SaveTweetTrove
no longer calls scraper.GetUserByID
, removing another scraper dependency from persistence
package
- it was calling it to handle conflicting user handles - now user handle conflicts are handled by the callee - Add a utility function in `main` and `webserver` packages to rescrape the conflicting users
This commit is contained in:
parent
fc66d1d8a6
commit
4132eb4bca
@ -96,3 +96,30 @@ func is_scrape_failure(err error) bool {
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// DUPE: full_save_tweet_trove
|
||||
func full_save_tweet_trove(trove scraper.TweetTrove) {
|
||||
conflicting_users := profile.SaveTweetTrove(trove, true, api.DownloadMedia)
|
||||
for _, u_id := range conflicting_users {
|
||||
fmt.Printf(terminal_utils.COLOR_YELLOW+
|
||||
"Conflicting user handle found (ID %d); old user has been marked deleted. Rescraping manually"+
|
||||
terminal_utils.COLOR_RESET+"\n",
|
||||
u_id)
|
||||
// Rescrape
|
||||
updated_user, err := scraper.GetUserByID(u_id)
|
||||
if errors.Is(err, scraper.ErrDoesntExist) {
|
||||
// Mark them as deleted.
|
||||
// Handle and display name won't be updated if the user exists.
|
||||
updated_user = scraper.User{ID: u_id, DisplayName: "<Unknown User>", Handle: "<UNKNOWN USER>", IsDeleted: true}
|
||||
} else if err != nil {
|
||||
panic(fmt.Errorf("error scraping conflicting user (ID %d): %w", u_id, err))
|
||||
}
|
||||
err = profile.SaveUser(&updated_user)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf(
|
||||
"error saving rescraped conflicting user with ID %d and handle %q: %w",
|
||||
updated_user.ID, updated_user.Handle, err,
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -410,7 +410,7 @@ func fetch_tweet_conversation(tweet_identifier string, how_many int) {
|
||||
if is_scrape_failure(err) {
|
||||
die(err.Error(), false, -1)
|
||||
}
|
||||
profile.SaveTweetTrove(trove, true, api.DownloadMedia)
|
||||
full_save_tweet_trove(trove)
|
||||
|
||||
happy_exit(fmt.Sprintf("Saved %d tweets and %d users", len(trove.Tweets), len(trove.Users)), err)
|
||||
}
|
||||
@ -431,7 +431,7 @@ func fetch_user_feed(handle string, how_many int) {
|
||||
if is_scrape_failure(err) {
|
||||
die(fmt.Sprintf("Error scraping feed: %s\n %s", handle, err.Error()), false, -2)
|
||||
}
|
||||
profile.SaveTweetTrove(trove, true, api.DownloadMedia)
|
||||
full_save_tweet_trove(trove)
|
||||
|
||||
happy_exit(
|
||||
fmt.Sprintf("Saved %d tweets, %d retweets and %d users", len(trove.Tweets), len(trove.Retweets), len(trove.Users)),
|
||||
@ -449,7 +449,7 @@ func get_user_likes(handle string, how_many int) {
|
||||
if is_scrape_failure(err) {
|
||||
die(fmt.Sprintf("Error scraping feed: %s\n %s", handle, err.Error()), false, -2)
|
||||
}
|
||||
profile.SaveTweetTrove(trove, true, api.DownloadMedia)
|
||||
full_save_tweet_trove(trove)
|
||||
|
||||
happy_exit(
|
||||
fmt.Sprintf("Saved %d tweets, %d retweets and %d users", len(trove.Tweets), len(trove.Retweets), len(trove.Users)),
|
||||
@ -467,7 +467,7 @@ func get_followees(handle string, how_many int) {
|
||||
if is_scrape_failure(err) {
|
||||
die(fmt.Sprintf("Error getting followees: %s\n %s", handle, err.Error()), false, -2)
|
||||
}
|
||||
profile.SaveTweetTrove(trove, true, api.DownloadMedia)
|
||||
full_save_tweet_trove(trove)
|
||||
profile.SaveAsFolloweesList(user.ID, trove)
|
||||
|
||||
happy_exit(fmt.Sprintf("Saved %d followees", len(trove.Users)), err)
|
||||
@ -481,7 +481,7 @@ func get_followers(handle string, how_many int) {
|
||||
if is_scrape_failure(err) {
|
||||
die(fmt.Sprintf("Error getting followees: %s\n %s", handle, err.Error()), false, -2)
|
||||
}
|
||||
profile.SaveTweetTrove(trove, true, api.DownloadMedia)
|
||||
full_save_tweet_trove(trove)
|
||||
profile.SaveAsFollowersList(user.ID, trove)
|
||||
|
||||
happy_exit(fmt.Sprintf("Saved %d followers", len(trove.Users)), err)
|
||||
@ -491,7 +491,7 @@ func get_bookmarks(how_many int) {
|
||||
if is_scrape_failure(err) {
|
||||
die(fmt.Sprintf("Error scraping bookmarks:\n %s", err.Error()), false, -2)
|
||||
}
|
||||
profile.SaveTweetTrove(trove, true, api.DownloadMedia)
|
||||
full_save_tweet_trove(trove)
|
||||
|
||||
happy_exit(fmt.Sprintf(
|
||||
"Saved %d tweets, %d retweets, %d users, and %d bookmarks",
|
||||
@ -504,7 +504,7 @@ func fetch_timeline(is_following_only bool) {
|
||||
if is_scrape_failure(err) {
|
||||
die(fmt.Sprintf("Error fetching timeline:\n %s", err.Error()), false, -2)
|
||||
}
|
||||
profile.SaveTweetTrove(trove, true, api.DownloadMedia)
|
||||
full_save_tweet_trove(trove)
|
||||
|
||||
happy_exit(
|
||||
fmt.Sprintf("Saved %d tweets, %d retweets and %d users", len(trove.Tweets), len(trove.Retweets), len(trove.Users)),
|
||||
@ -544,7 +544,7 @@ func search(query string, how_many int) {
|
||||
if is_scrape_failure(err) {
|
||||
die(fmt.Sprintf("Error scraping search results: %s", err.Error()), false, -100)
|
||||
}
|
||||
profile.SaveTweetTrove(trove, true, api.DownloadMedia)
|
||||
full_save_tweet_trove(trove)
|
||||
|
||||
happy_exit(fmt.Sprintf("Saved %d tweets and %d users", len(trove.Tweets), len(trove.Users)), err)
|
||||
}
|
||||
@ -607,7 +607,7 @@ func fetch_inbox(how_many int) {
|
||||
if err != nil {
|
||||
die(fmt.Sprintf("Failed to fetch inbox:\n %s", err.Error()), false, 1)
|
||||
}
|
||||
profile.SaveTweetTrove(trove, true, api.DownloadMedia)
|
||||
full_save_tweet_trove(trove)
|
||||
happy_exit(fmt.Sprintf("Saved %d messages from %d chats", len(trove.Messages), len(trove.Rooms)), nil)
|
||||
}
|
||||
|
||||
@ -621,7 +621,7 @@ func fetch_dm(id string, how_many int) {
|
||||
if err != nil {
|
||||
die(fmt.Sprintf("Failed to fetch dm:\n %s", err.Error()), false, 1)
|
||||
}
|
||||
profile.SaveTweetTrove(trove, true, api.DownloadMedia)
|
||||
full_save_tweet_trove(trove)
|
||||
happy_exit(
|
||||
fmt.Sprintf("Saved %d messages from %d chats", len(trove.Messages), len(trove.Rooms)),
|
||||
err,
|
||||
@ -638,7 +638,7 @@ func send_dm(room_id string, text string, in_reply_to_id int) {
|
||||
if err != nil {
|
||||
die(fmt.Sprintf("Failed to send dm:\n %s", err.Error()), false, 1)
|
||||
}
|
||||
profile.SaveTweetTrove(trove, true, api.DownloadMedia)
|
||||
full_save_tweet_trove(trove)
|
||||
happy_exit(fmt.Sprintf("Saved %d messages from %d chats", len(trove.Messages), len(trove.Rooms)), nil)
|
||||
}
|
||||
|
||||
@ -670,7 +670,7 @@ func get_notifications(how_many int) {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
profile.SaveTweetTrove(trove, true, api.DownloadMedia)
|
||||
full_save_tweet_trove(trove)
|
||||
happy_exit(fmt.Sprintf("Saved %d notifications, %d tweets and %d users",
|
||||
len(trove.Notifications), len(trove.Tweets), len(trove.Users),
|
||||
), nil)
|
||||
|
@ -26,8 +26,7 @@ func (app *Application) Bookmarks(w http.ResponseWriter, r *http.Request) {
|
||||
panic(err) // Return a toast
|
||||
}
|
||||
|
||||
app.Profile.SaveTweetTrove(trove, false, app.API.DownloadMedia)
|
||||
go app.Profile.SaveTweetTrove(trove, true, app.API.DownloadMedia)
|
||||
app.full_save_tweet_trove(trove)
|
||||
}
|
||||
|
||||
c := persistence.NewUserFeedBookmarksCursor(app.ActiveUser.Handle)
|
||||
|
@ -89,8 +89,7 @@ func (app *Application) after_login(w http.ResponseWriter, r *http.Request, api
|
||||
http.Redirect(w, r, "/", 303)
|
||||
}
|
||||
fmt.Println("Saving initial feed results...")
|
||||
app.Profile.SaveTweetTrove(trove, false, app.API.DownloadMedia)
|
||||
go app.Profile.SaveTweetTrove(trove, true, app.API.DownloadMedia)
|
||||
app.full_save_tweet_trove(trove)
|
||||
|
||||
// Scrape the user's followers
|
||||
trove, err = app.API.GetFollowees(user.ID, 1000)
|
||||
@ -98,9 +97,8 @@ func (app *Application) after_login(w http.ResponseWriter, r *http.Request, api
|
||||
app.ErrorLog.Printf("Failed to scrape followers: %s", err.Error())
|
||||
http.Redirect(w, r, "/", 303)
|
||||
}
|
||||
app.Profile.SaveTweetTrove(trove, false, app.API.DownloadMedia)
|
||||
app.full_save_tweet_trove(trove)
|
||||
app.Profile.SaveAsFolloweesList(user.ID, trove)
|
||||
go app.Profile.SaveTweetTrove(trove, true, app.API.DownloadMedia)
|
||||
|
||||
// Redirect to Timeline
|
||||
http.Redirect(w, r, "/", 303)
|
||||
@ -129,8 +127,7 @@ func (app *Application) ChangeSession(w http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
// We have to save the notifications first, otherwise it'll just report 0 since the last-read sort index
|
||||
app.Profile.SaveTweetTrove(trove, false, app.API.DownloadMedia)
|
||||
go app.Profile.SaveTweetTrove(trove, true, app.API.DownloadMedia)
|
||||
app.full_save_tweet_trove(trove)
|
||||
// Set the notifications count
|
||||
app.LastReadNotificationSortIndex = last_unread_notification_sort_index
|
||||
}()
|
||||
|
@ -80,8 +80,7 @@ func (app *Application) message_send(w http.ResponseWriter, r *http.Request) {
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
app.Profile.SaveTweetTrove(trove, false, app.API.DownloadMedia)
|
||||
go app.Profile.SaveTweetTrove(trove, true, app.API.DownloadMedia)
|
||||
app.full_save_tweet_trove(trove)
|
||||
}
|
||||
|
||||
func (app *Application) message_detail(w http.ResponseWriter, r *http.Request) {
|
||||
@ -153,8 +152,7 @@ func (app *Application) message_detail(w http.ResponseWriter, r *http.Request) {
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
app.Profile.SaveTweetTrove(trove, false, app.API.DownloadMedia)
|
||||
go app.Profile.SaveTweetTrove(trove, true, app.API.DownloadMedia) // Download the content in the background
|
||||
app.full_save_tweet_trove(trove)
|
||||
}
|
||||
|
||||
// `LatestPollingTimestamp` sort of passes-through the function; if we're not updating it, it
|
||||
@ -244,8 +242,7 @@ func (app *Application) Messages(w http.ResponseWriter, r *http.Request) {
|
||||
panic(err)
|
||||
}
|
||||
inbox_cursor = new_cursor
|
||||
app.Profile.SaveTweetTrove(trove, false, app.API.DownloadMedia)
|
||||
go app.Profile.SaveTweetTrove(trove, true, app.API.DownloadMedia)
|
||||
app.full_save_tweet_trove(trove)
|
||||
}
|
||||
|
||||
parts := strings.Split(strings.Trim(r.URL.Path, "/"), "/")
|
||||
|
@ -107,8 +107,7 @@ func (app *Application) Search(w http.ResponseWriter, r *http.Request) {
|
||||
app.ErrorLog.Print(err)
|
||||
// TOOD: show error in UI
|
||||
}
|
||||
app.Profile.SaveTweetTrove(trove, false, app.API.DownloadMedia)
|
||||
go app.Profile.SaveTweetTrove(trove, true, app.API.DownloadMedia)
|
||||
app.full_save_tweet_trove(trove)
|
||||
}
|
||||
|
||||
c, err := persistence.NewCursorFromSearchQuery(search_text)
|
||||
|
@ -53,8 +53,7 @@ func (app *Application) ensure_tweet(id scraper.TweetID, is_forced bool, is_conv
|
||||
|
||||
// Save the trove unless there was an unrecoverable error
|
||||
if err == nil || errors.Is(err, scraper.END_OF_FEED) || errors.Is(err, scraper.ErrRateLimited) {
|
||||
app.Profile.SaveTweetTrove(trove, false, app.API.DownloadMedia)
|
||||
go app.Profile.SaveTweetTrove(trove, true, app.API.DownloadMedia) // Download the content in the background
|
||||
app.full_save_tweet_trove(trove)
|
||||
_, is_available = trove.Tweets[id]
|
||||
}
|
||||
|
||||
|
@ -59,16 +59,14 @@ func (app *Application) UserFeed(w http.ResponseWriter, r *http.Request) {
|
||||
app.ErrorLog.Print(err)
|
||||
// TOOD: show error in UI
|
||||
}
|
||||
app.Profile.SaveTweetTrove(trove, false, app.API.DownloadMedia)
|
||||
go app.Profile.SaveTweetTrove(trove, true, app.API.DownloadMedia)
|
||||
app.full_save_tweet_trove(trove)
|
||||
} else if len(parts) == 2 && parts[1] == "likes" {
|
||||
trove, err := app.API.GetUserLikes(user.ID, 50) // TODO: parameterizable
|
||||
if err != nil {
|
||||
app.ErrorLog.Print(err)
|
||||
// TOOD: show error in UI
|
||||
}
|
||||
app.Profile.SaveTweetTrove(trove, false, app.API.DownloadMedia)
|
||||
go app.Profile.SaveTweetTrove(trove, true, app.API.DownloadMedia)
|
||||
app.full_save_tweet_trove(trove)
|
||||
}
|
||||
}
|
||||
|
||||
@ -171,9 +169,8 @@ func (app *Application) UserFollowees(w http.ResponseWriter, r *http.Request, us
|
||||
app.ErrorLog.Print(err)
|
||||
// TOOD: show error in UI
|
||||
}
|
||||
app.Profile.SaveTweetTrove(trove, false, app.API.DownloadMedia)
|
||||
app.full_save_tweet_trove(trove)
|
||||
app.Profile.SaveAsFolloweesList(user.ID, trove)
|
||||
go app.Profile.SaveTweetTrove(trove, true, app.API.DownloadMedia)
|
||||
}
|
||||
|
||||
data, trove := NewFollowsData(app.Profile.GetFollowees(user.ID))
|
||||
@ -197,9 +194,8 @@ func (app *Application) UserFollowers(w http.ResponseWriter, r *http.Request, us
|
||||
app.ErrorLog.Print(err)
|
||||
// TOOD: show error in UI
|
||||
}
|
||||
app.Profile.SaveTweetTrove(trove, false, app.API.DownloadMedia)
|
||||
app.full_save_tweet_trove(trove)
|
||||
app.Profile.SaveAsFollowersList(user.ID, trove)
|
||||
go app.Profile.SaveTweetTrove(trove, true, app.API.DownloadMedia)
|
||||
}
|
||||
|
||||
data, trove := NewFollowsData(app.Profile.GetFollowers(user.ID))
|
||||
|
@ -49,8 +49,7 @@ func (t *BackgroundTask) Do() {
|
||||
// Run the task
|
||||
trove := t.GetTroveFunc(&t.app.API)
|
||||
t.log.Print("saving results")
|
||||
t.app.Profile.SaveTweetTrove(trove, false, t.app.API.DownloadMedia)
|
||||
go t.app.Profile.SaveTweetTrove(trove, true, t.app.API.DownloadMedia)
|
||||
t.app.full_save_tweet_trove(trove)
|
||||
t.log.Print("success")
|
||||
}
|
||||
|
||||
|
38
internal/webserver/temp_utils.go
Normal file
38
internal/webserver/temp_utils.go
Normal file
@ -0,0 +1,38 @@
|
||||
package webserver
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
|
||||
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||
)
|
||||
|
||||
// DUPE: full_save_tweet_trove
|
||||
func (app *Application) full_save_tweet_trove(trove TweetTrove) {
|
||||
// Save the initial trove
|
||||
conflicting_users := app.Profile.SaveTweetTrove(trove, false, app.API.DownloadMedia)
|
||||
|
||||
// Handle conflicting users
|
||||
for _, u_id := range conflicting_users {
|
||||
app.InfoLog.Printf("Conflicting user handle found (ID %d); old user has been marked deleted. Rescraping manually", u_id)
|
||||
// Rescrape
|
||||
updated_user, err := GetUserByID(u_id)
|
||||
if errors.Is(err, ErrDoesntExist) {
|
||||
// Mark them as deleted.
|
||||
// Handle and display name won't be updated if the user exists.
|
||||
updated_user = User{ID: u_id, DisplayName: "<Unknown User>", Handle: "<UNKNOWN USER>", IsDeleted: true}
|
||||
} else if err != nil {
|
||||
panic(fmt.Errorf("error scraping conflicting user (ID %d): %w", u_id, err))
|
||||
}
|
||||
err = app.Profile.SaveUser(&updated_user)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf(
|
||||
"error saving rescraped conflicting user with ID %d and handle %q: %w",
|
||||
updated_user.ID, updated_user.Handle, err,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
// Download media content in background
|
||||
go app.Profile.SaveTweetTrove(trove, true, app.API.DownloadMedia)
|
||||
}
|
@ -5,37 +5,18 @@ import (
|
||||
"fmt"
|
||||
"path"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||
)
|
||||
|
||||
// Convenience function that saves all the objects in a TweetTrove.
|
||||
// Panics if anything goes wrong.
|
||||
func (p Profile) SaveTweetTrove(trove TweetTrove, should_download bool, download func(string) ([]byte, error)) {
|
||||
// Returns a list of UserIDs that had conflicting handles with another user.
|
||||
func (p Profile) SaveTweetTrove(trove TweetTrove, should_download bool, download func(string) ([]byte, error)) (conflict_u_ids []UserID) {
|
||||
for i, u := range trove.Users {
|
||||
err := p.SaveUser(&u)
|
||||
// Check for handle conflicts and handle them in place
|
||||
// TODO: this is hacky, it doesn't go here. We should return a list of conflicting users
|
||||
// who were marked as deleted, and then let the callee re-scrape and re-save them.
|
||||
// Check for user-handle conflicts
|
||||
var conflict_err ErrConflictingUserHandle
|
||||
if errors.As(err, &conflict_err) {
|
||||
log.Warnf(
|
||||
"Conflicting user handle found (ID %d); old user has been marked deleted. Rescraping them\n",
|
||||
conflict_err.ConflictingUserID,
|
||||
)
|
||||
user, err := GetUserByID(conflict_err.ConflictingUserID)
|
||||
if errors.Is(err, ErrDoesntExist) {
|
||||
// Mark them as deleted.
|
||||
// Handle and display name won't be updated if the user exists.
|
||||
user = User{ID: conflict_err.ConflictingUserID, DisplayName: "<Unknown User>", Handle: "<UNKNOWN USER>", IsDeleted: true}
|
||||
} else if err != nil {
|
||||
panic(fmt.Errorf("error scraping conflicting user (ID %d): %w", conflict_err.ConflictingUserID, err))
|
||||
}
|
||||
err = p.SaveUser(&user)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("error saving rescraped conflicting user with ID %d and handle %q: %w", user.ID, user.Handle, err))
|
||||
}
|
||||
conflict_u_ids = append(conflict_u_ids, conflict_err.ConflictingUserID)
|
||||
} else if err != nil {
|
||||
panic(fmt.Errorf("Error saving user with ID %d and handle %s:\n %w", u.ID, u.Handle, err))
|
||||
}
|
||||
@ -253,4 +234,5 @@ func (p Profile) SaveTweetTrove(trove TweetTrove, should_download bool, download
|
||||
}
|
||||
}
|
||||
}
|
||||
return conflict_u_ids // If there are any
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user