Some housekeeping
This commit is contained in:
parent
b78cef34ce
commit
d54e77b169
@ -61,7 +61,7 @@ func (p Profile) SaveTweet(t scraper.Tweet) error {
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("Error executing SaveTweet(ID %d):\n %w", t.ID, err)
|
||||
return fmt.Errorf("Error executing SaveTweet(ID %d). Info: %#v:\n %w", t.ID, t, err)
|
||||
}
|
||||
for _, url := range t.Urls {
|
||||
err := p.SaveUrl(url)
|
||||
|
@ -18,6 +18,19 @@ func (p Profile) SaveTweetTrove(trove TweetTrove) {
|
||||
}
|
||||
fmt.Println(u.Handle, u.ID)
|
||||
// If the User's ID was updated in saving (i.e., Unknown User), update it in the Trove too
|
||||
// Also update tweets, retweets and spaces that reference this UserID
|
||||
for j, tweet := range trove.Tweets {
|
||||
if tweet.UserID == trove.Users[i].ID {
|
||||
tweet.UserID = u.ID
|
||||
trove.Tweets[j] = tweet
|
||||
}
|
||||
}
|
||||
for j, retweet := range trove.Retweets {
|
||||
if retweet.RetweetedByID == trove.Users[i].ID {
|
||||
retweet.RetweetedByID = u.ID
|
||||
trove.Retweets[j] = retweet
|
||||
}
|
||||
}
|
||||
trove.Users[i] = u
|
||||
|
||||
// Download their tiny profile image
|
||||
@ -28,7 +41,9 @@ func (p Profile) SaveTweetTrove(trove TweetTrove) {
|
||||
}
|
||||
|
||||
// TODO: this is called earlier in the process as well, before parsing. Is that call redundant? Too tired to figure out right now
|
||||
trove.FillMissingUserIDs()
|
||||
// Update: Yes it's redundant. Places that return tweet troves should call `PostProcess`
|
||||
// before returning, which includes `FillMissingUserIDs`.
|
||||
// trove.FillMissingUserIDs()
|
||||
|
||||
for _, t := range trove.Tweets {
|
||||
err := p.SaveTweet(t)
|
||||
|
@ -23,7 +23,9 @@ func (p Profile) SaveUser(u *scraper.User) error {
|
||||
// We need to continue-- create a new fake user
|
||||
u.ID = p.NextFakeUserID()
|
||||
} else if err == nil {
|
||||
// We're done; everything is fine (ID has already been scanned into the User)
|
||||
// We're done; a user exists with this handle already. No need to fake anything, and we have no new data
|
||||
// to provide (since the ID is fake).
|
||||
// ID has already been scanned into the User, for use by the caller.
|
||||
return nil
|
||||
} else {
|
||||
// A real error occurred
|
||||
|
@ -220,7 +220,7 @@ func (api API) GetUser(handle UserHandle) (APIUser, error) {
|
||||
if err != nil {
|
||||
return APIUser{}, fmt.Errorf("Error retrieving API response to GetUser(%s):\n %w", handle, err)
|
||||
}
|
||||
log.Debug(string(body))
|
||||
log.Debug("GetUser(" + string(handle) + "): " + string(body))
|
||||
|
||||
err = json.Unmarshal(body, &response)
|
||||
if err != nil {
|
||||
|
@ -2,6 +2,7 @@ package scraper
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
func TimestampToDateString(timestamp int) string {
|
||||
@ -32,5 +33,14 @@ func Search(query string, min_results int) (trove TweetTrove, err error) {
|
||||
}
|
||||
}
|
||||
|
||||
return ParseTweetResponse(tweet_response)
|
||||
trove, err = ParseTweetResponse(tweet_response)
|
||||
if err != nil {
|
||||
err = fmt.Errorf("Error parsing the tweet trove for search query %q:\n %w", query, err)
|
||||
return
|
||||
}
|
||||
|
||||
// Filling tombstones and tombstoned users is probably not necessary here, but we still
|
||||
// need to fetch Spaces
|
||||
err = trove.PostProcess()
|
||||
return
|
||||
}
|
||||
|
@ -256,10 +256,13 @@ func GetTweetFull(id TweetID) (trove TweetTrove, err error) {
|
||||
panic(err)
|
||||
}
|
||||
trove.TombstoneUsers = tombstoned_users
|
||||
trove.FetchTombstoneUsers()
|
||||
|
||||
// Quoted tombstones need their user_id filled out from the tombstoned_users list
|
||||
trove.FillMissingUserIDs()
|
||||
err = trove.PostProcess()
|
||||
if err != nil {
|
||||
err = fmt.Errorf("Error getting tweet (id %d):\n %w", id, err)
|
||||
return
|
||||
}
|
||||
|
||||
// Find the main tweet and update its "is_conversation_downloaded" and "last_scraped_at"
|
||||
tweet, ok := trove.Tweets[id]
|
||||
@ -270,8 +273,6 @@ func GetTweetFull(id TweetID) (trove TweetTrove, err error) {
|
||||
tweet.IsConversationScraped = true
|
||||
trove.Tweets[id] = tweet
|
||||
|
||||
// tweets, retweets, users = trove.Transform()
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -227,11 +227,10 @@ func TestParseTweetResponse(t *testing.T) {
|
||||
|
||||
trove, err := ParseTweetResponse(tweet_resp)
|
||||
require.NoError(t, err)
|
||||
tweets, retweets, users := trove.Transform()
|
||||
|
||||
assert.Len(tweets, 29-3)
|
||||
assert.Len(retweets, 3)
|
||||
assert.Len(users, 9)
|
||||
assert.Len(trove.Tweets, 29-3)
|
||||
assert.Len(trove.Retweets, 3)
|
||||
assert.Len(trove.Users, 9)
|
||||
}
|
||||
|
||||
func TestParseTweetResponseWithTombstones(t *testing.T) {
|
||||
@ -249,9 +248,8 @@ func TestParseTweetResponseWithTombstones(t *testing.T) {
|
||||
|
||||
trove, err := ParseTweetResponse(tweet_resp)
|
||||
require.NoError(t, err)
|
||||
tweets, retweets, users := trove.Transform()
|
||||
|
||||
assert.Len(tweets, 2)
|
||||
assert.Len(retweets, 0)
|
||||
assert.Len(users, 1)
|
||||
assert.Len(trove.Tweets, 2)
|
||||
assert.Len(trove.Retweets, 0)
|
||||
assert.Len(trove.Users, 1)
|
||||
}
|
||||
|
@ -24,22 +24,6 @@ func NewTweetTrove() TweetTrove {
|
||||
return ret
|
||||
}
|
||||
|
||||
/**
|
||||
* Make it compatible with previous silly interface if needed
|
||||
*/
|
||||
func (trove TweetTrove) Transform() (tweets []Tweet, retweets []Retweet, users []User) {
|
||||
for _, val := range trove.Tweets {
|
||||
tweets = append(tweets, val)
|
||||
}
|
||||
for _, val := range trove.Users {
|
||||
users = append(users, val)
|
||||
}
|
||||
for _, val := range trove.Retweets {
|
||||
retweets = append(retweets, val)
|
||||
}
|
||||
return
|
||||
} // TODO: refactor until this function isn't needed anymore
|
||||
|
||||
/**
|
||||
* Search for a user by handle. Second param is whether the user was found or not.
|
||||
*/
|
||||
@ -107,9 +91,6 @@ func (trove *TweetTrove) FetchTombstoneUsers() {
|
||||
*
|
||||
* At this point, those users should have been added to this trove's Users collection, and the
|
||||
* Tweets have a field `UserHandle` which can be used to pair them with newly fetched Users.
|
||||
*
|
||||
* This will still fail if the user deleted their account (instead of getting banned, blocking the
|
||||
* quote-tweeter, etc), because then that user won't show up .
|
||||
*/
|
||||
func (trove *TweetTrove) FillMissingUserIDs() {
|
||||
for i := range trove.Tweets {
|
||||
@ -133,3 +114,9 @@ func (trove *TweetTrove) FillMissingUserIDs() {
|
||||
trove.Tweets[i] = tweet
|
||||
}
|
||||
}
|
||||
|
||||
func (trove *TweetTrove) PostProcess() error {
|
||||
trove.FetchTombstoneUsers()
|
||||
trove.FillMissingUserIDs()
|
||||
return nil
|
||||
}
|
||||
|
@ -54,8 +54,6 @@ func GetUserFeedGraphqlFor(user_id UserID, min_tweets int) (trove TweetTrove, er
|
||||
}
|
||||
|
||||
fmt.Println("------------")
|
||||
trove.FetchTombstoneUsers()
|
||||
trove.FillMissingUserIDs()
|
||||
|
||||
return trove, nil
|
||||
err = trove.PostProcess()
|
||||
return trove, err
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user