Some housekeeping
This commit is contained in:
parent
b78cef34ce
commit
d54e77b169
@ -61,7 +61,7 @@ func (p Profile) SaveTweet(t scraper.Tweet) error {
|
|||||||
)
|
)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("Error executing SaveTweet(ID %d):\n %w", t.ID, err)
|
return fmt.Errorf("Error executing SaveTweet(ID %d). Info: %#v:\n %w", t.ID, t, err)
|
||||||
}
|
}
|
||||||
for _, url := range t.Urls {
|
for _, url := range t.Urls {
|
||||||
err := p.SaveUrl(url)
|
err := p.SaveUrl(url)
|
||||||
|
@ -18,6 +18,19 @@ func (p Profile) SaveTweetTrove(trove TweetTrove) {
|
|||||||
}
|
}
|
||||||
fmt.Println(u.Handle, u.ID)
|
fmt.Println(u.Handle, u.ID)
|
||||||
// If the User's ID was updated in saving (i.e., Unknown User), update it in the Trove too
|
// If the User's ID was updated in saving (i.e., Unknown User), update it in the Trove too
|
||||||
|
// Also update tweets, retweets and spaces that reference this UserID
|
||||||
|
for j, tweet := range trove.Tweets {
|
||||||
|
if tweet.UserID == trove.Users[i].ID {
|
||||||
|
tweet.UserID = u.ID
|
||||||
|
trove.Tweets[j] = tweet
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for j, retweet := range trove.Retweets {
|
||||||
|
if retweet.RetweetedByID == trove.Users[i].ID {
|
||||||
|
retweet.RetweetedByID = u.ID
|
||||||
|
trove.Retweets[j] = retweet
|
||||||
|
}
|
||||||
|
}
|
||||||
trove.Users[i] = u
|
trove.Users[i] = u
|
||||||
|
|
||||||
// Download their tiny profile image
|
// Download their tiny profile image
|
||||||
@ -28,7 +41,9 @@ func (p Profile) SaveTweetTrove(trove TweetTrove) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// TODO: this is called earlier in the process as well, before parsing. Is that call redundant? Too tired to figure out right now
|
// TODO: this is called earlier in the process as well, before parsing. Is that call redundant? Too tired to figure out right now
|
||||||
trove.FillMissingUserIDs()
|
// Update: Yes it's redundant. Places that return tweet troves should call `PostProcess`
|
||||||
|
// before returning, which includes `FillMissingUserIDs`.
|
||||||
|
// trove.FillMissingUserIDs()
|
||||||
|
|
||||||
for _, t := range trove.Tweets {
|
for _, t := range trove.Tweets {
|
||||||
err := p.SaveTweet(t)
|
err := p.SaveTweet(t)
|
||||||
|
@ -23,7 +23,9 @@ func (p Profile) SaveUser(u *scraper.User) error {
|
|||||||
// We need to continue-- create a new fake user
|
// We need to continue-- create a new fake user
|
||||||
u.ID = p.NextFakeUserID()
|
u.ID = p.NextFakeUserID()
|
||||||
} else if err == nil {
|
} else if err == nil {
|
||||||
// We're done; everything is fine (ID has already been scanned into the User)
|
// We're done; a user exists with this handle already. No need to fake anything, and we have no new data
|
||||||
|
// to provide (since the ID is fake).
|
||||||
|
// ID has already been scanned into the User, for use by the caller.
|
||||||
return nil
|
return nil
|
||||||
} else {
|
} else {
|
||||||
// A real error occurred
|
// A real error occurred
|
||||||
|
@ -220,7 +220,7 @@ func (api API) GetUser(handle UserHandle) (APIUser, error) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return APIUser{}, fmt.Errorf("Error retrieving API response to GetUser(%s):\n %w", handle, err)
|
return APIUser{}, fmt.Errorf("Error retrieving API response to GetUser(%s):\n %w", handle, err)
|
||||||
}
|
}
|
||||||
log.Debug(string(body))
|
log.Debug("GetUser(" + string(handle) + "): " + string(body))
|
||||||
|
|
||||||
err = json.Unmarshal(body, &response)
|
err = json.Unmarshal(body, &response)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -2,6 +2,7 @@ package scraper
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"errors"
|
"errors"
|
||||||
|
"fmt"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TimestampToDateString(timestamp int) string {
|
func TimestampToDateString(timestamp int) string {
|
||||||
@ -32,5 +33,14 @@ func Search(query string, min_results int) (trove TweetTrove, err error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return ParseTweetResponse(tweet_response)
|
trove, err = ParseTweetResponse(tweet_response)
|
||||||
|
if err != nil {
|
||||||
|
err = fmt.Errorf("Error parsing the tweet trove for search query %q:\n %w", query, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Filling tombstones and tombstoned users is probably not necessary here, but we still
|
||||||
|
// need to fetch Spaces
|
||||||
|
err = trove.PostProcess()
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
@ -256,10 +256,13 @@ func GetTweetFull(id TweetID) (trove TweetTrove, err error) {
|
|||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
trove.TombstoneUsers = tombstoned_users
|
trove.TombstoneUsers = tombstoned_users
|
||||||
trove.FetchTombstoneUsers()
|
|
||||||
|
|
||||||
// Quoted tombstones need their user_id filled out from the tombstoned_users list
|
// Quoted tombstones need their user_id filled out from the tombstoned_users list
|
||||||
trove.FillMissingUserIDs()
|
err = trove.PostProcess()
|
||||||
|
if err != nil {
|
||||||
|
err = fmt.Errorf("Error getting tweet (id %d):\n %w", id, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
// Find the main tweet and update its "is_conversation_downloaded" and "last_scraped_at"
|
// Find the main tweet and update its "is_conversation_downloaded" and "last_scraped_at"
|
||||||
tweet, ok := trove.Tweets[id]
|
tweet, ok := trove.Tweets[id]
|
||||||
@ -270,8 +273,6 @@ func GetTweetFull(id TweetID) (trove TweetTrove, err error) {
|
|||||||
tweet.IsConversationScraped = true
|
tweet.IsConversationScraped = true
|
||||||
trove.Tweets[id] = tweet
|
trove.Tweets[id] = tweet
|
||||||
|
|
||||||
// tweets, retweets, users = trove.Transform()
|
|
||||||
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -227,11 +227,10 @@ func TestParseTweetResponse(t *testing.T) {
|
|||||||
|
|
||||||
trove, err := ParseTweetResponse(tweet_resp)
|
trove, err := ParseTweetResponse(tweet_resp)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
tweets, retweets, users := trove.Transform()
|
|
||||||
|
|
||||||
assert.Len(tweets, 29-3)
|
assert.Len(trove.Tweets, 29-3)
|
||||||
assert.Len(retweets, 3)
|
assert.Len(trove.Retweets, 3)
|
||||||
assert.Len(users, 9)
|
assert.Len(trove.Users, 9)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestParseTweetResponseWithTombstones(t *testing.T) {
|
func TestParseTweetResponseWithTombstones(t *testing.T) {
|
||||||
@ -249,9 +248,8 @@ func TestParseTweetResponseWithTombstones(t *testing.T) {
|
|||||||
|
|
||||||
trove, err := ParseTweetResponse(tweet_resp)
|
trove, err := ParseTweetResponse(tweet_resp)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
tweets, retweets, users := trove.Transform()
|
|
||||||
|
|
||||||
assert.Len(tweets, 2)
|
assert.Len(trove.Tweets, 2)
|
||||||
assert.Len(retweets, 0)
|
assert.Len(trove.Retweets, 0)
|
||||||
assert.Len(users, 1)
|
assert.Len(trove.Users, 1)
|
||||||
}
|
}
|
||||||
|
@ -24,22 +24,6 @@ func NewTweetTrove() TweetTrove {
|
|||||||
return ret
|
return ret
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Make it compatible with previous silly interface if needed
|
|
||||||
*/
|
|
||||||
func (trove TweetTrove) Transform() (tweets []Tweet, retweets []Retweet, users []User) {
|
|
||||||
for _, val := range trove.Tweets {
|
|
||||||
tweets = append(tweets, val)
|
|
||||||
}
|
|
||||||
for _, val := range trove.Users {
|
|
||||||
users = append(users, val)
|
|
||||||
}
|
|
||||||
for _, val := range trove.Retweets {
|
|
||||||
retweets = append(retweets, val)
|
|
||||||
}
|
|
||||||
return
|
|
||||||
} // TODO: refactor until this function isn't needed anymore
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Search for a user by handle. Second param is whether the user was found or not.
|
* Search for a user by handle. Second param is whether the user was found or not.
|
||||||
*/
|
*/
|
||||||
@ -107,9 +91,6 @@ func (trove *TweetTrove) FetchTombstoneUsers() {
|
|||||||
*
|
*
|
||||||
* At this point, those users should have been added to this trove's Users collection, and the
|
* At this point, those users should have been added to this trove's Users collection, and the
|
||||||
* Tweets have a field `UserHandle` which can be used to pair them with newly fetched Users.
|
* Tweets have a field `UserHandle` which can be used to pair them with newly fetched Users.
|
||||||
*
|
|
||||||
* This will still fail if the user deleted their account (instead of getting banned, blocking the
|
|
||||||
* quote-tweeter, etc), because then that user won't show up .
|
|
||||||
*/
|
*/
|
||||||
func (trove *TweetTrove) FillMissingUserIDs() {
|
func (trove *TweetTrove) FillMissingUserIDs() {
|
||||||
for i := range trove.Tweets {
|
for i := range trove.Tweets {
|
||||||
@ -133,3 +114,9 @@ func (trove *TweetTrove) FillMissingUserIDs() {
|
|||||||
trove.Tweets[i] = tweet
|
trove.Tweets[i] = tweet
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (trove *TweetTrove) PostProcess() error {
|
||||||
|
trove.FetchTombstoneUsers()
|
||||||
|
trove.FillMissingUserIDs()
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
@ -54,8 +54,6 @@ func GetUserFeedGraphqlFor(user_id UserID, min_tweets int) (trove TweetTrove, er
|
|||||||
}
|
}
|
||||||
|
|
||||||
fmt.Println("------------")
|
fmt.Println("------------")
|
||||||
trove.FetchTombstoneUsers()
|
err = trove.PostProcess()
|
||||||
trove.FillMissingUserIDs()
|
return trove, err
|
||||||
|
|
||||||
return trove, nil
|
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user