SaveTweetTrove now re-fetches users with conflicting handles as well

This commit is contained in:
Alessio 2024-09-23 00:23:07 -07:00
parent 11b03a7927
commit 3539d9418d
4 changed files with 32 additions and 3 deletions

View File

@ -247,6 +247,14 @@ test $(sqlite3 twitter.db "select count(*) from users where handle like '9Monsie
test $(sqlite3 twitter.db "select count(*) from users where handle like '9MonsieurChat9' and not is_deleted") = "1" test $(sqlite3 twitter.db "select count(*) from users where handle like '9MonsieurChat9' and not is_deleted") = "1"
test $(sqlite3 twitter.db "select is_deleted from users where id = 1615394007961731072") = "1" test $(sqlite3 twitter.db "select is_deleted from users where id = 1615394007961731072") = "1"
# Fetch a tweet from such a new account with the same handle as an old one
sqlite3 twitter.db "delete from users where handle like '9MonsieurChat9' and not is_deleted"
test $(sqlite3 twitter.db "select count(*) from users where handle like '9MonsieurChat9'") = "1"
tw fetch_tweet https://x.com/9MonsieurChat9/status/1834121200584589600 # Should update the user as well
test $(sqlite3 twitter.db "select count(*) from users where handle like '9MonsieurChat9'") = "2"
test $(sqlite3 twitter.db "select count(*) from users where handle like '9MonsieurChat9' and not is_deleted") = "1"
test $(sqlite3 twitter.db "select is_deleted from users where id = 1615394007961731072") = "1"
# Test tweets with URLs # Test tweets with URLs
tw fetch_user RoninGreg tw fetch_user RoninGreg

View File

@ -74,7 +74,7 @@ func (app *Application) after_login(w http.ResponseWriter, r *http.Request, api
app.error_404(w, r) app.error_404(w, r)
return return
} }
panic_if(app.Profile.SaveUser(&user)) panic_if(app.Profile.SaveUser(&user)) // TODO: handle conflicting users
panic_if(app.Profile.DownloadUserContentFor(&user, &app.API)) panic_if(app.Profile.DownloadUserContentFor(&user, &app.API))
// Now that the user is scraped for sure, set them as the logged-in user // Now that the user is scraped for sure, set them as the logged-in user

View File

@ -24,7 +24,7 @@ func (app *Application) UserFeed(w http.ResponseWriter, r *http.Request) {
app.error_404(w, r) app.error_404(w, r)
return return
} }
panic_if(app.Profile.SaveUser(&user)) panic_if(app.Profile.SaveUser(&user)) // TODO: handle conflicting users
panic_if(app.Profile.DownloadUserContentFor(&user, &app.API)) panic_if(app.Profile.DownloadUserContentFor(&user, &app.API))
} else if err != nil { } else if err != nil {
panic(err) panic(err)

View File

@ -13,7 +13,28 @@ import (
func (p Profile) SaveTweetTrove(trove TweetTrove, should_download bool, api *API) { func (p Profile) SaveTweetTrove(trove TweetTrove, should_download bool, api *API) {
for i, u := range trove.Users { for i, u := range trove.Users {
err := p.SaveUser(&u) err := p.SaveUser(&u)
// Check for handle conflicts and handle them in place
// TODO: this is hacky, it doesn't go here. We should return a list of conflicting users
// who were marked as deleted, and then let the callee re-scrape and re-save them.
var conflict_err ErrConflictingUserHandle
if errors.As(err, &conflict_err) {
fmt.Printf(
"Conflicting user handle found (ID %d); old user has been marked deleted. Rescraping them\n",
conflict_err.ConflictingUserID,
)
user, err := GetUserByID(conflict_err.ConflictingUserID)
if errors.Is(err, ErrDoesntExist) {
// Mark them as deleted.
// Handle and display name won't be updated if the user exists.
user = User{ID: conflict_err.ConflictingUserID, DisplayName: "<Unknown User>", Handle: "<UNKNOWN USER>", IsDeleted: true}
} else if err != nil {
panic(fmt.Errorf("error scraping conflicting user (ID %d): %w", conflict_err.ConflictingUserID, err))
}
err = p.SaveUser(&user)
if err != nil { if err != nil {
panic(fmt.Errorf("error saving rescraped conflicting user with ID %d and handle %q: %w", user.ID, user.Handle, err))
}
} else if err != nil {
panic(fmt.Errorf("Error saving user with ID %d and handle %s:\n %w", u.ID, u.Handle, err)) panic(fmt.Errorf("Error saving user with ID %d and handle %s:\n %w", u.ID, u.Handle, err))
} }
fmt.Println(u.Handle, u.ID) fmt.Println(u.Handle, u.ID)