REFACTOR: convert tweet trove methods that make API calls to API methods instead that take a TweetTrove
This commit is contained in:
parent
368209a1cc
commit
e0ce1f205a
85
pkg/scraper/api_tweet_trove_utils.go
Normal file
85
pkg/scraper/api_tweet_trove_utils.go
Normal file
@ -0,0 +1,85 @@
|
|||||||
|
package scraper
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
)
|
||||||
|
|
||||||
|
func (api *API) FillSpaceDetails(trove *TweetTrove) error {
|
||||||
|
fmt.Println("Filling space details")
|
||||||
|
for i := range trove.Spaces {
|
||||||
|
fmt.Printf("Getting space: %q\n", trove.Spaces[i].ID)
|
||||||
|
new_trove, err := api.FetchSpaceDetail(trove.Spaces[i].ID)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// Replace the old space in the trove with the new, updated one
|
||||||
|
new_space, is_ok := new_trove.Spaces[i]
|
||||||
|
if new_space.ShortUrl == "" {
|
||||||
|
// Copy over the short-url, which doesn't seem to exist on a full Space response
|
||||||
|
new_space.ShortUrl = trove.Spaces[i].ShortUrl
|
||||||
|
}
|
||||||
|
if is_ok {
|
||||||
|
// Necessary to check is_ok because the space response could be empty, in which case
|
||||||
|
// we don't want to overwrite it
|
||||||
|
trove.Spaces[i] = new_space
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (api *API) PostProcess(trove *TweetTrove) error {
|
||||||
|
api.FetchTombstoneUsers(trove)
|
||||||
|
trove.FillMissingUserIDs()
|
||||||
|
err := api.FillSpaceDetails(trove)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Tries to fetch every User that's been identified in a tombstone in this trove
|
||||||
|
func (api *API) FetchTombstoneUsers(trove *TweetTrove) {
|
||||||
|
for _, handle := range trove.TombstoneUsers {
|
||||||
|
// Skip fetching if this user is already in the trove
|
||||||
|
user, already_fetched := trove.FindUserByHandle(handle)
|
||||||
|
|
||||||
|
if already_fetched {
|
||||||
|
// If the user is already fetched and it's an intact user, don't fetch it again
|
||||||
|
if user.JoinDate.Unix() != (Timestamp{}).Unix() && user.JoinDate.Unix() != 0 {
|
||||||
|
log.Debugf("Skipping %q due to intact user", handle)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// A user needs a valid handle or ID to fetch it by
|
||||||
|
if user.IsIdFake && user.Handle == "<UNKNOWN USER>" {
|
||||||
|
log.Debugf("Skipping %q due to completely unknown user (not fetchable)", handle)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Debug("Getting tombstone user: " + handle)
|
||||||
|
user, err := api.GetUser(handle)
|
||||||
|
if errors.Is(err, ErrDoesntExist) {
|
||||||
|
user = GetUnknownUserWithHandle(handle)
|
||||||
|
user.IsDeleted = true
|
||||||
|
} else if err != nil {
|
||||||
|
panic(fmt.Errorf("Error getting tombstoned user with handle %q: \n %w", handle, err))
|
||||||
|
}
|
||||||
|
|
||||||
|
if user.ID == 0 {
|
||||||
|
// Find some random ID to fit it into the trove
|
||||||
|
for i := 1; ; i++ {
|
||||||
|
_, ok := trove.Users[UserID(i)]
|
||||||
|
if !ok {
|
||||||
|
user.ID = UserID(i)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
trove.Users[user.ID] = user
|
||||||
|
}
|
||||||
|
}
|
@ -964,7 +964,7 @@ func (api *API) GetPaginatedQuery(pq PaginatedQuery, count int) (TweetTrove, err
|
|||||||
}
|
}
|
||||||
|
|
||||||
fmt.Println("------------")
|
fmt.Println("------------")
|
||||||
err2 = trove.PostProcess(api)
|
err2 = api.PostProcess(&trove)
|
||||||
if err2 != nil {
|
if err2 != nil {
|
||||||
return TweetTrove{}, fmt.Errorf("failed to post-process tweet trove: %w", err2)
|
return TweetTrove{}, fmt.Errorf("failed to post-process tweet trove: %w", err2)
|
||||||
}
|
}
|
||||||
|
@ -1,11 +1,8 @@
|
|||||||
package scraper
|
package scraper
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"errors"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type TweetTrove struct {
|
type TweetTrove struct {
|
||||||
@ -87,59 +84,11 @@ func (t1 *TweetTrove) MergeWith(t2 TweetTrove) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
// Checks for tombstoned tweets and fills in their UserIDs based on the collected tombstoned users.
|
||||||
* Tries to fetch every User that's been identified in a tombstone in this trove
|
// To be called after calling "scraper.GetUser" on all the tombstoned users.
|
||||||
*/
|
//
|
||||||
func (trove *TweetTrove) FetchTombstoneUsers(api *API) {
|
// At this point, those users should have been added to this trove's Users collection, and the
|
||||||
for _, handle := range trove.TombstoneUsers {
|
// Tweets have a field `UserHandle` which can be used to pair them with newly fetched Users.
|
||||||
// Skip fetching if this user is already in the trove
|
|
||||||
user, already_fetched := trove.FindUserByHandle(handle)
|
|
||||||
|
|
||||||
if already_fetched {
|
|
||||||
// If the user is already fetched and it's an intact user, don't fetch it again
|
|
||||||
if user.JoinDate.Unix() != (Timestamp{}).Unix() && user.JoinDate.Unix() != 0 {
|
|
||||||
log.Debugf("Skipping %q due to intact user", handle)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// A user needs a valid handle or ID to fetch it by
|
|
||||||
if user.IsIdFake && user.Handle == "<UNKNOWN USER>" {
|
|
||||||
log.Debugf("Skipping %q due to completely unknown user (not fetchable)", handle)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
log.Debug("Getting tombstone user: " + handle)
|
|
||||||
user, err := api.GetUser(handle)
|
|
||||||
if errors.Is(err, ErrDoesntExist) {
|
|
||||||
user = GetUnknownUserWithHandle(handle)
|
|
||||||
user.IsDeleted = true
|
|
||||||
} else if err != nil {
|
|
||||||
panic(fmt.Errorf("Error getting tombstoned user with handle %q: \n %w", handle, err))
|
|
||||||
}
|
|
||||||
|
|
||||||
if user.ID == 0 {
|
|
||||||
// Find some random ID to fit it into the trove
|
|
||||||
for i := 1; ; i++ {
|
|
||||||
_, ok := trove.Users[UserID(i)]
|
|
||||||
if !ok {
|
|
||||||
user.ID = UserID(i)
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
trove.Users[user.ID] = user
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Checks for tombstoned tweets and fills in their UserIDs based on the collected tombstoned users.
|
|
||||||
* To be called after calling "scraper.GetUser" on all the tombstoned users.
|
|
||||||
*
|
|
||||||
* At this point, those users should have been added to this trove's Users collection, and the
|
|
||||||
* Tweets have a field `UserHandle` which can be used to pair them with newly fetched Users.
|
|
||||||
*/
|
|
||||||
func (trove *TweetTrove) FillMissingUserIDs() {
|
func (trove *TweetTrove) FillMissingUserIDs() {
|
||||||
for i := range trove.Tweets {
|
for i := range trove.Tweets {
|
||||||
tweet := trove.Tweets[i]
|
tweet := trove.Tweets[i]
|
||||||
@ -163,39 +112,6 @@ func (trove *TweetTrove) FillMissingUserIDs() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (trove *TweetTrove) FillSpaceDetails(api *API) error {
|
|
||||||
fmt.Println("Filling space details")
|
|
||||||
for i := range trove.Spaces {
|
|
||||||
fmt.Printf("Getting space: %q\n", trove.Spaces[i].ID)
|
|
||||||
new_trove, err := api.FetchSpaceDetail(trove.Spaces[i].ID)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
// Replace the old space in the trove with the new, updated one
|
|
||||||
new_space, is_ok := new_trove.Spaces[i]
|
|
||||||
if new_space.ShortUrl == "" {
|
|
||||||
// Copy over the short-url, which doesn't seem to exist on a full Space response
|
|
||||||
new_space.ShortUrl = trove.Spaces[i].ShortUrl
|
|
||||||
}
|
|
||||||
if is_ok {
|
|
||||||
// Necessary to check is_ok because the space response could be empty, in which case
|
|
||||||
// we don't want to overwrite it
|
|
||||||
trove.Spaces[i] = new_space
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (trove *TweetTrove) PostProcess(api *API) error {
|
|
||||||
trove.FetchTombstoneUsers(api)
|
|
||||||
trove.FillMissingUserIDs()
|
|
||||||
err := trove.FillSpaceDetails(api)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t TweetTrove) GetOldestMessage(id DMChatRoomID) DMMessageID {
|
func (t TweetTrove) GetOldestMessage(id DMChatRoomID) DMMessageID {
|
||||||
oldest := DMMessageID(^uint(0) >> 1) // Max integer
|
oldest := DMMessageID(^uint(0) >> 1) // Max integer
|
||||||
for _, m := range t.Messages {
|
for _, m := range t.Messages {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user