REFACTOR: SaveTweetTrove
no longer calls scraper.GetUserByID
, removing another scraper dependency from persistence
package
- it was calling it to handle conflicting user handles - now user handle conflicts are handled by the callee - Add a utility function in `main` and `webserver` packages to rescrape the conflicting users
This commit is contained in:
parent
fc66d1d8a6
commit
4132eb4bca
@ -96,3 +96,30 @@ func is_scrape_failure(err error) bool {
|
|||||||
}
|
}
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// DUPE: full_save_tweet_trove
|
||||||
|
func full_save_tweet_trove(trove scraper.TweetTrove) {
|
||||||
|
conflicting_users := profile.SaveTweetTrove(trove, true, api.DownloadMedia)
|
||||||
|
for _, u_id := range conflicting_users {
|
||||||
|
fmt.Printf(terminal_utils.COLOR_YELLOW+
|
||||||
|
"Conflicting user handle found (ID %d); old user has been marked deleted. Rescraping manually"+
|
||||||
|
terminal_utils.COLOR_RESET+"\n",
|
||||||
|
u_id)
|
||||||
|
// Rescrape
|
||||||
|
updated_user, err := scraper.GetUserByID(u_id)
|
||||||
|
if errors.Is(err, scraper.ErrDoesntExist) {
|
||||||
|
// Mark them as deleted.
|
||||||
|
// Handle and display name won't be updated if the user exists.
|
||||||
|
updated_user = scraper.User{ID: u_id, DisplayName: "<Unknown User>", Handle: "<UNKNOWN USER>", IsDeleted: true}
|
||||||
|
} else if err != nil {
|
||||||
|
panic(fmt.Errorf("error scraping conflicting user (ID %d): %w", u_id, err))
|
||||||
|
}
|
||||||
|
err = profile.SaveUser(&updated_user)
|
||||||
|
if err != nil {
|
||||||
|
panic(fmt.Errorf(
|
||||||
|
"error saving rescraped conflicting user with ID %d and handle %q: %w",
|
||||||
|
updated_user.ID, updated_user.Handle, err,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -410,7 +410,7 @@ func fetch_tweet_conversation(tweet_identifier string, how_many int) {
|
|||||||
if is_scrape_failure(err) {
|
if is_scrape_failure(err) {
|
||||||
die(err.Error(), false, -1)
|
die(err.Error(), false, -1)
|
||||||
}
|
}
|
||||||
profile.SaveTweetTrove(trove, true, api.DownloadMedia)
|
full_save_tweet_trove(trove)
|
||||||
|
|
||||||
happy_exit(fmt.Sprintf("Saved %d tweets and %d users", len(trove.Tweets), len(trove.Users)), err)
|
happy_exit(fmt.Sprintf("Saved %d tweets and %d users", len(trove.Tweets), len(trove.Users)), err)
|
||||||
}
|
}
|
||||||
@ -431,7 +431,7 @@ func fetch_user_feed(handle string, how_many int) {
|
|||||||
if is_scrape_failure(err) {
|
if is_scrape_failure(err) {
|
||||||
die(fmt.Sprintf("Error scraping feed: %s\n %s", handle, err.Error()), false, -2)
|
die(fmt.Sprintf("Error scraping feed: %s\n %s", handle, err.Error()), false, -2)
|
||||||
}
|
}
|
||||||
profile.SaveTweetTrove(trove, true, api.DownloadMedia)
|
full_save_tweet_trove(trove)
|
||||||
|
|
||||||
happy_exit(
|
happy_exit(
|
||||||
fmt.Sprintf("Saved %d tweets, %d retweets and %d users", len(trove.Tweets), len(trove.Retweets), len(trove.Users)),
|
fmt.Sprintf("Saved %d tweets, %d retweets and %d users", len(trove.Tweets), len(trove.Retweets), len(trove.Users)),
|
||||||
@ -449,7 +449,7 @@ func get_user_likes(handle string, how_many int) {
|
|||||||
if is_scrape_failure(err) {
|
if is_scrape_failure(err) {
|
||||||
die(fmt.Sprintf("Error scraping feed: %s\n %s", handle, err.Error()), false, -2)
|
die(fmt.Sprintf("Error scraping feed: %s\n %s", handle, err.Error()), false, -2)
|
||||||
}
|
}
|
||||||
profile.SaveTweetTrove(trove, true, api.DownloadMedia)
|
full_save_tweet_trove(trove)
|
||||||
|
|
||||||
happy_exit(
|
happy_exit(
|
||||||
fmt.Sprintf("Saved %d tweets, %d retweets and %d users", len(trove.Tweets), len(trove.Retweets), len(trove.Users)),
|
fmt.Sprintf("Saved %d tweets, %d retweets and %d users", len(trove.Tweets), len(trove.Retweets), len(trove.Users)),
|
||||||
@ -467,7 +467,7 @@ func get_followees(handle string, how_many int) {
|
|||||||
if is_scrape_failure(err) {
|
if is_scrape_failure(err) {
|
||||||
die(fmt.Sprintf("Error getting followees: %s\n %s", handle, err.Error()), false, -2)
|
die(fmt.Sprintf("Error getting followees: %s\n %s", handle, err.Error()), false, -2)
|
||||||
}
|
}
|
||||||
profile.SaveTweetTrove(trove, true, api.DownloadMedia)
|
full_save_tweet_trove(trove)
|
||||||
profile.SaveAsFolloweesList(user.ID, trove)
|
profile.SaveAsFolloweesList(user.ID, trove)
|
||||||
|
|
||||||
happy_exit(fmt.Sprintf("Saved %d followees", len(trove.Users)), err)
|
happy_exit(fmt.Sprintf("Saved %d followees", len(trove.Users)), err)
|
||||||
@ -481,7 +481,7 @@ func get_followers(handle string, how_many int) {
|
|||||||
if is_scrape_failure(err) {
|
if is_scrape_failure(err) {
|
||||||
die(fmt.Sprintf("Error getting followees: %s\n %s", handle, err.Error()), false, -2)
|
die(fmt.Sprintf("Error getting followees: %s\n %s", handle, err.Error()), false, -2)
|
||||||
}
|
}
|
||||||
profile.SaveTweetTrove(trove, true, api.DownloadMedia)
|
full_save_tweet_trove(trove)
|
||||||
profile.SaveAsFollowersList(user.ID, trove)
|
profile.SaveAsFollowersList(user.ID, trove)
|
||||||
|
|
||||||
happy_exit(fmt.Sprintf("Saved %d followers", len(trove.Users)), err)
|
happy_exit(fmt.Sprintf("Saved %d followers", len(trove.Users)), err)
|
||||||
@ -491,7 +491,7 @@ func get_bookmarks(how_many int) {
|
|||||||
if is_scrape_failure(err) {
|
if is_scrape_failure(err) {
|
||||||
die(fmt.Sprintf("Error scraping bookmarks:\n %s", err.Error()), false, -2)
|
die(fmt.Sprintf("Error scraping bookmarks:\n %s", err.Error()), false, -2)
|
||||||
}
|
}
|
||||||
profile.SaveTweetTrove(trove, true, api.DownloadMedia)
|
full_save_tweet_trove(trove)
|
||||||
|
|
||||||
happy_exit(fmt.Sprintf(
|
happy_exit(fmt.Sprintf(
|
||||||
"Saved %d tweets, %d retweets, %d users, and %d bookmarks",
|
"Saved %d tweets, %d retweets, %d users, and %d bookmarks",
|
||||||
@ -504,7 +504,7 @@ func fetch_timeline(is_following_only bool) {
|
|||||||
if is_scrape_failure(err) {
|
if is_scrape_failure(err) {
|
||||||
die(fmt.Sprintf("Error fetching timeline:\n %s", err.Error()), false, -2)
|
die(fmt.Sprintf("Error fetching timeline:\n %s", err.Error()), false, -2)
|
||||||
}
|
}
|
||||||
profile.SaveTweetTrove(trove, true, api.DownloadMedia)
|
full_save_tweet_trove(trove)
|
||||||
|
|
||||||
happy_exit(
|
happy_exit(
|
||||||
fmt.Sprintf("Saved %d tweets, %d retweets and %d users", len(trove.Tweets), len(trove.Retweets), len(trove.Users)),
|
fmt.Sprintf("Saved %d tweets, %d retweets and %d users", len(trove.Tweets), len(trove.Retweets), len(trove.Users)),
|
||||||
@ -544,7 +544,7 @@ func search(query string, how_many int) {
|
|||||||
if is_scrape_failure(err) {
|
if is_scrape_failure(err) {
|
||||||
die(fmt.Sprintf("Error scraping search results: %s", err.Error()), false, -100)
|
die(fmt.Sprintf("Error scraping search results: %s", err.Error()), false, -100)
|
||||||
}
|
}
|
||||||
profile.SaveTweetTrove(trove, true, api.DownloadMedia)
|
full_save_tweet_trove(trove)
|
||||||
|
|
||||||
happy_exit(fmt.Sprintf("Saved %d tweets and %d users", len(trove.Tweets), len(trove.Users)), err)
|
happy_exit(fmt.Sprintf("Saved %d tweets and %d users", len(trove.Tweets), len(trove.Users)), err)
|
||||||
}
|
}
|
||||||
@ -607,7 +607,7 @@ func fetch_inbox(how_many int) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
die(fmt.Sprintf("Failed to fetch inbox:\n %s", err.Error()), false, 1)
|
die(fmt.Sprintf("Failed to fetch inbox:\n %s", err.Error()), false, 1)
|
||||||
}
|
}
|
||||||
profile.SaveTweetTrove(trove, true, api.DownloadMedia)
|
full_save_tweet_trove(trove)
|
||||||
happy_exit(fmt.Sprintf("Saved %d messages from %d chats", len(trove.Messages), len(trove.Rooms)), nil)
|
happy_exit(fmt.Sprintf("Saved %d messages from %d chats", len(trove.Messages), len(trove.Rooms)), nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -621,7 +621,7 @@ func fetch_dm(id string, how_many int) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
die(fmt.Sprintf("Failed to fetch dm:\n %s", err.Error()), false, 1)
|
die(fmt.Sprintf("Failed to fetch dm:\n %s", err.Error()), false, 1)
|
||||||
}
|
}
|
||||||
profile.SaveTweetTrove(trove, true, api.DownloadMedia)
|
full_save_tweet_trove(trove)
|
||||||
happy_exit(
|
happy_exit(
|
||||||
fmt.Sprintf("Saved %d messages from %d chats", len(trove.Messages), len(trove.Rooms)),
|
fmt.Sprintf("Saved %d messages from %d chats", len(trove.Messages), len(trove.Rooms)),
|
||||||
err,
|
err,
|
||||||
@ -638,7 +638,7 @@ func send_dm(room_id string, text string, in_reply_to_id int) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
die(fmt.Sprintf("Failed to send dm:\n %s", err.Error()), false, 1)
|
die(fmt.Sprintf("Failed to send dm:\n %s", err.Error()), false, 1)
|
||||||
}
|
}
|
||||||
profile.SaveTweetTrove(trove, true, api.DownloadMedia)
|
full_save_tweet_trove(trove)
|
||||||
happy_exit(fmt.Sprintf("Saved %d messages from %d chats", len(trove.Messages), len(trove.Rooms)), nil)
|
happy_exit(fmt.Sprintf("Saved %d messages from %d chats", len(trove.Messages), len(trove.Rooms)), nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -670,7 +670,7 @@ func get_notifications(how_many int) {
|
|||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
profile.SaveTweetTrove(trove, true, api.DownloadMedia)
|
full_save_tweet_trove(trove)
|
||||||
happy_exit(fmt.Sprintf("Saved %d notifications, %d tweets and %d users",
|
happy_exit(fmt.Sprintf("Saved %d notifications, %d tweets and %d users",
|
||||||
len(trove.Notifications), len(trove.Tweets), len(trove.Users),
|
len(trove.Notifications), len(trove.Tweets), len(trove.Users),
|
||||||
), nil)
|
), nil)
|
||||||
|
@ -26,8 +26,7 @@ func (app *Application) Bookmarks(w http.ResponseWriter, r *http.Request) {
|
|||||||
panic(err) // Return a toast
|
panic(err) // Return a toast
|
||||||
}
|
}
|
||||||
|
|
||||||
app.Profile.SaveTweetTrove(trove, false, app.API.DownloadMedia)
|
app.full_save_tweet_trove(trove)
|
||||||
go app.Profile.SaveTweetTrove(trove, true, app.API.DownloadMedia)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
c := persistence.NewUserFeedBookmarksCursor(app.ActiveUser.Handle)
|
c := persistence.NewUserFeedBookmarksCursor(app.ActiveUser.Handle)
|
||||||
|
@ -89,8 +89,7 @@ func (app *Application) after_login(w http.ResponseWriter, r *http.Request, api
|
|||||||
http.Redirect(w, r, "/", 303)
|
http.Redirect(w, r, "/", 303)
|
||||||
}
|
}
|
||||||
fmt.Println("Saving initial feed results...")
|
fmt.Println("Saving initial feed results...")
|
||||||
app.Profile.SaveTweetTrove(trove, false, app.API.DownloadMedia)
|
app.full_save_tweet_trove(trove)
|
||||||
go app.Profile.SaveTweetTrove(trove, true, app.API.DownloadMedia)
|
|
||||||
|
|
||||||
// Scrape the user's followers
|
// Scrape the user's followers
|
||||||
trove, err = app.API.GetFollowees(user.ID, 1000)
|
trove, err = app.API.GetFollowees(user.ID, 1000)
|
||||||
@ -98,9 +97,8 @@ func (app *Application) after_login(w http.ResponseWriter, r *http.Request, api
|
|||||||
app.ErrorLog.Printf("Failed to scrape followers: %s", err.Error())
|
app.ErrorLog.Printf("Failed to scrape followers: %s", err.Error())
|
||||||
http.Redirect(w, r, "/", 303)
|
http.Redirect(w, r, "/", 303)
|
||||||
}
|
}
|
||||||
app.Profile.SaveTweetTrove(trove, false, app.API.DownloadMedia)
|
app.full_save_tweet_trove(trove)
|
||||||
app.Profile.SaveAsFolloweesList(user.ID, trove)
|
app.Profile.SaveAsFolloweesList(user.ID, trove)
|
||||||
go app.Profile.SaveTweetTrove(trove, true, app.API.DownloadMedia)
|
|
||||||
|
|
||||||
// Redirect to Timeline
|
// Redirect to Timeline
|
||||||
http.Redirect(w, r, "/", 303)
|
http.Redirect(w, r, "/", 303)
|
||||||
@ -129,8 +127,7 @@ func (app *Application) ChangeSession(w http.ResponseWriter, r *http.Request) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
// We have to save the notifications first, otherwise it'll just report 0 since the last-read sort index
|
// We have to save the notifications first, otherwise it'll just report 0 since the last-read sort index
|
||||||
app.Profile.SaveTweetTrove(trove, false, app.API.DownloadMedia)
|
app.full_save_tweet_trove(trove)
|
||||||
go app.Profile.SaveTweetTrove(trove, true, app.API.DownloadMedia)
|
|
||||||
// Set the notifications count
|
// Set the notifications count
|
||||||
app.LastReadNotificationSortIndex = last_unread_notification_sort_index
|
app.LastReadNotificationSortIndex = last_unread_notification_sort_index
|
||||||
}()
|
}()
|
||||||
|
@ -80,8 +80,7 @@ func (app *Application) message_send(w http.ResponseWriter, r *http.Request) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
app.Profile.SaveTweetTrove(trove, false, app.API.DownloadMedia)
|
app.full_save_tweet_trove(trove)
|
||||||
go app.Profile.SaveTweetTrove(trove, true, app.API.DownloadMedia)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (app *Application) message_detail(w http.ResponseWriter, r *http.Request) {
|
func (app *Application) message_detail(w http.ResponseWriter, r *http.Request) {
|
||||||
@ -153,8 +152,7 @@ func (app *Application) message_detail(w http.ResponseWriter, r *http.Request) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
app.Profile.SaveTweetTrove(trove, false, app.API.DownloadMedia)
|
app.full_save_tweet_trove(trove)
|
||||||
go app.Profile.SaveTweetTrove(trove, true, app.API.DownloadMedia) // Download the content in the background
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// `LatestPollingTimestamp` sort of passes-through the function; if we're not updating it, it
|
// `LatestPollingTimestamp` sort of passes-through the function; if we're not updating it, it
|
||||||
@ -244,8 +242,7 @@ func (app *Application) Messages(w http.ResponseWriter, r *http.Request) {
|
|||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
inbox_cursor = new_cursor
|
inbox_cursor = new_cursor
|
||||||
app.Profile.SaveTweetTrove(trove, false, app.API.DownloadMedia)
|
app.full_save_tweet_trove(trove)
|
||||||
go app.Profile.SaveTweetTrove(trove, true, app.API.DownloadMedia)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
parts := strings.Split(strings.Trim(r.URL.Path, "/"), "/")
|
parts := strings.Split(strings.Trim(r.URL.Path, "/"), "/")
|
||||||
|
@ -107,8 +107,7 @@ func (app *Application) Search(w http.ResponseWriter, r *http.Request) {
|
|||||||
app.ErrorLog.Print(err)
|
app.ErrorLog.Print(err)
|
||||||
// TOOD: show error in UI
|
// TOOD: show error in UI
|
||||||
}
|
}
|
||||||
app.Profile.SaveTweetTrove(trove, false, app.API.DownloadMedia)
|
app.full_save_tweet_trove(trove)
|
||||||
go app.Profile.SaveTweetTrove(trove, true, app.API.DownloadMedia)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
c, err := persistence.NewCursorFromSearchQuery(search_text)
|
c, err := persistence.NewCursorFromSearchQuery(search_text)
|
||||||
|
@ -53,8 +53,7 @@ func (app *Application) ensure_tweet(id scraper.TweetID, is_forced bool, is_conv
|
|||||||
|
|
||||||
// Save the trove unless there was an unrecoverable error
|
// Save the trove unless there was an unrecoverable error
|
||||||
if err == nil || errors.Is(err, scraper.END_OF_FEED) || errors.Is(err, scraper.ErrRateLimited) {
|
if err == nil || errors.Is(err, scraper.END_OF_FEED) || errors.Is(err, scraper.ErrRateLimited) {
|
||||||
app.Profile.SaveTweetTrove(trove, false, app.API.DownloadMedia)
|
app.full_save_tweet_trove(trove)
|
||||||
go app.Profile.SaveTweetTrove(trove, true, app.API.DownloadMedia) // Download the content in the background
|
|
||||||
_, is_available = trove.Tweets[id]
|
_, is_available = trove.Tweets[id]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -59,16 +59,14 @@ func (app *Application) UserFeed(w http.ResponseWriter, r *http.Request) {
|
|||||||
app.ErrorLog.Print(err)
|
app.ErrorLog.Print(err)
|
||||||
// TOOD: show error in UI
|
// TOOD: show error in UI
|
||||||
}
|
}
|
||||||
app.Profile.SaveTweetTrove(trove, false, app.API.DownloadMedia)
|
app.full_save_tweet_trove(trove)
|
||||||
go app.Profile.SaveTweetTrove(trove, true, app.API.DownloadMedia)
|
|
||||||
} else if len(parts) == 2 && parts[1] == "likes" {
|
} else if len(parts) == 2 && parts[1] == "likes" {
|
||||||
trove, err := app.API.GetUserLikes(user.ID, 50) // TODO: parameterizable
|
trove, err := app.API.GetUserLikes(user.ID, 50) // TODO: parameterizable
|
||||||
if err != nil {
|
if err != nil {
|
||||||
app.ErrorLog.Print(err)
|
app.ErrorLog.Print(err)
|
||||||
// TOOD: show error in UI
|
// TOOD: show error in UI
|
||||||
}
|
}
|
||||||
app.Profile.SaveTweetTrove(trove, false, app.API.DownloadMedia)
|
app.full_save_tweet_trove(trove)
|
||||||
go app.Profile.SaveTweetTrove(trove, true, app.API.DownloadMedia)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -171,9 +169,8 @@ func (app *Application) UserFollowees(w http.ResponseWriter, r *http.Request, us
|
|||||||
app.ErrorLog.Print(err)
|
app.ErrorLog.Print(err)
|
||||||
// TOOD: show error in UI
|
// TOOD: show error in UI
|
||||||
}
|
}
|
||||||
app.Profile.SaveTweetTrove(trove, false, app.API.DownloadMedia)
|
app.full_save_tweet_trove(trove)
|
||||||
app.Profile.SaveAsFolloweesList(user.ID, trove)
|
app.Profile.SaveAsFolloweesList(user.ID, trove)
|
||||||
go app.Profile.SaveTweetTrove(trove, true, app.API.DownloadMedia)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
data, trove := NewFollowsData(app.Profile.GetFollowees(user.ID))
|
data, trove := NewFollowsData(app.Profile.GetFollowees(user.ID))
|
||||||
@ -197,9 +194,8 @@ func (app *Application) UserFollowers(w http.ResponseWriter, r *http.Request, us
|
|||||||
app.ErrorLog.Print(err)
|
app.ErrorLog.Print(err)
|
||||||
// TOOD: show error in UI
|
// TOOD: show error in UI
|
||||||
}
|
}
|
||||||
app.Profile.SaveTweetTrove(trove, false, app.API.DownloadMedia)
|
app.full_save_tweet_trove(trove)
|
||||||
app.Profile.SaveAsFollowersList(user.ID, trove)
|
app.Profile.SaveAsFollowersList(user.ID, trove)
|
||||||
go app.Profile.SaveTweetTrove(trove, true, app.API.DownloadMedia)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
data, trove := NewFollowsData(app.Profile.GetFollowers(user.ID))
|
data, trove := NewFollowsData(app.Profile.GetFollowers(user.ID))
|
||||||
|
@ -49,8 +49,7 @@ func (t *BackgroundTask) Do() {
|
|||||||
// Run the task
|
// Run the task
|
||||||
trove := t.GetTroveFunc(&t.app.API)
|
trove := t.GetTroveFunc(&t.app.API)
|
||||||
t.log.Print("saving results")
|
t.log.Print("saving results")
|
||||||
t.app.Profile.SaveTweetTrove(trove, false, t.app.API.DownloadMedia)
|
t.app.full_save_tweet_trove(trove)
|
||||||
go t.app.Profile.SaveTweetTrove(trove, true, t.app.API.DownloadMedia)
|
|
||||||
t.log.Print("success")
|
t.log.Print("success")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
38
internal/webserver/temp_utils.go
Normal file
38
internal/webserver/temp_utils.go
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
package webserver
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||||
|
)
|
||||||
|
|
||||||
|
// DUPE: full_save_tweet_trove
|
||||||
|
func (app *Application) full_save_tweet_trove(trove TweetTrove) {
|
||||||
|
// Save the initial trove
|
||||||
|
conflicting_users := app.Profile.SaveTweetTrove(trove, false, app.API.DownloadMedia)
|
||||||
|
|
||||||
|
// Handle conflicting users
|
||||||
|
for _, u_id := range conflicting_users {
|
||||||
|
app.InfoLog.Printf("Conflicting user handle found (ID %d); old user has been marked deleted. Rescraping manually", u_id)
|
||||||
|
// Rescrape
|
||||||
|
updated_user, err := GetUserByID(u_id)
|
||||||
|
if errors.Is(err, ErrDoesntExist) {
|
||||||
|
// Mark them as deleted.
|
||||||
|
// Handle and display name won't be updated if the user exists.
|
||||||
|
updated_user = User{ID: u_id, DisplayName: "<Unknown User>", Handle: "<UNKNOWN USER>", IsDeleted: true}
|
||||||
|
} else if err != nil {
|
||||||
|
panic(fmt.Errorf("error scraping conflicting user (ID %d): %w", u_id, err))
|
||||||
|
}
|
||||||
|
err = app.Profile.SaveUser(&updated_user)
|
||||||
|
if err != nil {
|
||||||
|
panic(fmt.Errorf(
|
||||||
|
"error saving rescraped conflicting user with ID %d and handle %q: %w",
|
||||||
|
updated_user.ID, updated_user.Handle, err,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Download media content in background
|
||||||
|
go app.Profile.SaveTweetTrove(trove, true, app.API.DownloadMedia)
|
||||||
|
}
|
@ -5,37 +5,18 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"path"
|
"path"
|
||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
|
||||||
|
|
||||||
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Convenience function that saves all the objects in a TweetTrove.
|
// Convenience function that saves all the objects in a TweetTrove.
|
||||||
// Panics if anything goes wrong.
|
// Returns a list of UserIDs that had conflicting handles with another user.
|
||||||
func (p Profile) SaveTweetTrove(trove TweetTrove, should_download bool, download func(string) ([]byte, error)) {
|
func (p Profile) SaveTweetTrove(trove TweetTrove, should_download bool, download func(string) ([]byte, error)) (conflict_u_ids []UserID) {
|
||||||
for i, u := range trove.Users {
|
for i, u := range trove.Users {
|
||||||
err := p.SaveUser(&u)
|
err := p.SaveUser(&u)
|
||||||
// Check for handle conflicts and handle them in place
|
// Check for user-handle conflicts
|
||||||
// TODO: this is hacky, it doesn't go here. We should return a list of conflicting users
|
|
||||||
// who were marked as deleted, and then let the callee re-scrape and re-save them.
|
|
||||||
var conflict_err ErrConflictingUserHandle
|
var conflict_err ErrConflictingUserHandle
|
||||||
if errors.As(err, &conflict_err) {
|
if errors.As(err, &conflict_err) {
|
||||||
log.Warnf(
|
conflict_u_ids = append(conflict_u_ids, conflict_err.ConflictingUserID)
|
||||||
"Conflicting user handle found (ID %d); old user has been marked deleted. Rescraping them\n",
|
|
||||||
conflict_err.ConflictingUserID,
|
|
||||||
)
|
|
||||||
user, err := GetUserByID(conflict_err.ConflictingUserID)
|
|
||||||
if errors.Is(err, ErrDoesntExist) {
|
|
||||||
// Mark them as deleted.
|
|
||||||
// Handle and display name won't be updated if the user exists.
|
|
||||||
user = User{ID: conflict_err.ConflictingUserID, DisplayName: "<Unknown User>", Handle: "<UNKNOWN USER>", IsDeleted: true}
|
|
||||||
} else if err != nil {
|
|
||||||
panic(fmt.Errorf("error scraping conflicting user (ID %d): %w", conflict_err.ConflictingUserID, err))
|
|
||||||
}
|
|
||||||
err = p.SaveUser(&user)
|
|
||||||
if err != nil {
|
|
||||||
panic(fmt.Errorf("error saving rescraped conflicting user with ID %d and handle %q: %w", user.ID, user.Handle, err))
|
|
||||||
}
|
|
||||||
} else if err != nil {
|
} else if err != nil {
|
||||||
panic(fmt.Errorf("Error saving user with ID %d and handle %s:\n %w", u.ID, u.Handle, err))
|
panic(fmt.Errorf("Error saving user with ID %d and handle %s:\n %w", u.ID, u.Handle, err))
|
||||||
}
|
}
|
||||||
@ -253,4 +234,5 @@ func (p Profile) SaveTweetTrove(trove TweetTrove, should_download bool, download
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return conflict_u_ids // If there are any
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user