REFACTOR: convert tweet trove methods that make API calls to API methods instead that take a TweetTrove

This commit is contained in:
Alessio 2025-02-03 08:03:13 -08:00
parent 368209a1cc
commit e0ce1f205a
3 changed files with 91 additions and 90 deletions

View File

@ -0,0 +1,85 @@
package scraper
import (
"errors"
"fmt"
log "github.com/sirupsen/logrus"
)
func (api *API) FillSpaceDetails(trove *TweetTrove) error {
fmt.Println("Filling space details")
for i := range trove.Spaces {
fmt.Printf("Getting space: %q\n", trove.Spaces[i].ID)
new_trove, err := api.FetchSpaceDetail(trove.Spaces[i].ID)
if err != nil {
return err
}
// Replace the old space in the trove with the new, updated one
new_space, is_ok := new_trove.Spaces[i]
if new_space.ShortUrl == "" {
// Copy over the short-url, which doesn't seem to exist on a full Space response
new_space.ShortUrl = trove.Spaces[i].ShortUrl
}
if is_ok {
// Necessary to check is_ok because the space response could be empty, in which case
// we don't want to overwrite it
trove.Spaces[i] = new_space
}
}
return nil
}
func (api *API) PostProcess(trove *TweetTrove) error {
api.FetchTombstoneUsers(trove)
trove.FillMissingUserIDs()
err := api.FillSpaceDetails(trove)
if err != nil {
return err
}
return nil
}
// Tries to fetch every User that's been identified in a tombstone in this trove
func (api *API) FetchTombstoneUsers(trove *TweetTrove) {
for _, handle := range trove.TombstoneUsers {
// Skip fetching if this user is already in the trove
user, already_fetched := trove.FindUserByHandle(handle)
if already_fetched {
// If the user is already fetched and it's an intact user, don't fetch it again
if user.JoinDate.Unix() != (Timestamp{}).Unix() && user.JoinDate.Unix() != 0 {
log.Debugf("Skipping %q due to intact user", handle)
continue
}
// A user needs a valid handle or ID to fetch it by
if user.IsIdFake && user.Handle == "<UNKNOWN USER>" {
log.Debugf("Skipping %q due to completely unknown user (not fetchable)", handle)
continue
}
}
log.Debug("Getting tombstone user: " + handle)
user, err := api.GetUser(handle)
if errors.Is(err, ErrDoesntExist) {
user = GetUnknownUserWithHandle(handle)
user.IsDeleted = true
} else if err != nil {
panic(fmt.Errorf("Error getting tombstoned user with handle %q: \n %w", handle, err))
}
if user.ID == 0 {
// Find some random ID to fit it into the trove
for i := 1; ; i++ {
_, ok := trove.Users[UserID(i)]
if !ok {
user.ID = UserID(i)
break
}
}
}
trove.Users[user.ID] = user
}
}

View File

@ -964,7 +964,7 @@ func (api *API) GetPaginatedQuery(pq PaginatedQuery, count int) (TweetTrove, err
} }
fmt.Println("------------") fmt.Println("------------")
err2 = trove.PostProcess(api) err2 = api.PostProcess(&trove)
if err2 != nil { if err2 != nil {
return TweetTrove{}, fmt.Errorf("failed to post-process tweet trove: %w", err2) return TweetTrove{}, fmt.Errorf("failed to post-process tweet trove: %w", err2)
} }

View File

@ -1,11 +1,8 @@
package scraper package scraper
import ( import (
"errors"
"fmt" "fmt"
"strings" "strings"
log "github.com/sirupsen/logrus"
) )
type TweetTrove struct { type TweetTrove struct {
@ -87,59 +84,11 @@ func (t1 *TweetTrove) MergeWith(t2 TweetTrove) {
} }
} }
/** // Checks for tombstoned tweets and fills in their UserIDs based on the collected tombstoned users.
* Tries to fetch every User that's been identified in a tombstone in this trove // To be called after calling "scraper.GetUser" on all the tombstoned users.
*/ //
func (trove *TweetTrove) FetchTombstoneUsers(api *API) { // At this point, those users should have been added to this trove's Users collection, and the
for _, handle := range trove.TombstoneUsers { // Tweets have a field `UserHandle` which can be used to pair them with newly fetched Users.
// Skip fetching if this user is already in the trove
user, already_fetched := trove.FindUserByHandle(handle)
if already_fetched {
// If the user is already fetched and it's an intact user, don't fetch it again
if user.JoinDate.Unix() != (Timestamp{}).Unix() && user.JoinDate.Unix() != 0 {
log.Debugf("Skipping %q due to intact user", handle)
continue
}
// A user needs a valid handle or ID to fetch it by
if user.IsIdFake && user.Handle == "<UNKNOWN USER>" {
log.Debugf("Skipping %q due to completely unknown user (not fetchable)", handle)
continue
}
}
log.Debug("Getting tombstone user: " + handle)
user, err := api.GetUser(handle)
if errors.Is(err, ErrDoesntExist) {
user = GetUnknownUserWithHandle(handle)
user.IsDeleted = true
} else if err != nil {
panic(fmt.Errorf("Error getting tombstoned user with handle %q: \n %w", handle, err))
}
if user.ID == 0 {
// Find some random ID to fit it into the trove
for i := 1; ; i++ {
_, ok := trove.Users[UserID(i)]
if !ok {
user.ID = UserID(i)
break
}
}
}
trove.Users[user.ID] = user
}
}
/**
* Checks for tombstoned tweets and fills in their UserIDs based on the collected tombstoned users.
* To be called after calling "scraper.GetUser" on all the tombstoned users.
*
* At this point, those users should have been added to this trove's Users collection, and the
* Tweets have a field `UserHandle` which can be used to pair them with newly fetched Users.
*/
func (trove *TweetTrove) FillMissingUserIDs() { func (trove *TweetTrove) FillMissingUserIDs() {
for i := range trove.Tweets { for i := range trove.Tweets {
tweet := trove.Tweets[i] tweet := trove.Tweets[i]
@ -163,39 +112,6 @@ func (trove *TweetTrove) FillMissingUserIDs() {
} }
} }
func (trove *TweetTrove) FillSpaceDetails(api *API) error {
fmt.Println("Filling space details")
for i := range trove.Spaces {
fmt.Printf("Getting space: %q\n", trove.Spaces[i].ID)
new_trove, err := api.FetchSpaceDetail(trove.Spaces[i].ID)
if err != nil {
return err
}
// Replace the old space in the trove with the new, updated one
new_space, is_ok := new_trove.Spaces[i]
if new_space.ShortUrl == "" {
// Copy over the short-url, which doesn't seem to exist on a full Space response
new_space.ShortUrl = trove.Spaces[i].ShortUrl
}
if is_ok {
// Necessary to check is_ok because the space response could be empty, in which case
// we don't want to overwrite it
trove.Spaces[i] = new_space
}
}
return nil
}
func (trove *TweetTrove) PostProcess(api *API) error {
trove.FetchTombstoneUsers(api)
trove.FillMissingUserIDs()
err := trove.FillSpaceDetails(api)
if err != nil {
return err
}
return nil
}
func (t TweetTrove) GetOldestMessage(id DMChatRoomID) DMMessageID { func (t TweetTrove) GetOldestMessage(id DMChatRoomID) DMMessageID {
oldest := DMMessageID(^uint(0) >> 1) // Max integer oldest := DMMessageID(^uint(0) >> 1) // Max integer
for _, m := range t.Messages { for _, m := range t.Messages {