From 1e1e97b5ca8be3e13ec0bad90b8ddeaab0e99634 Mon Sep 17 00:00:00 2001 From: Alessio Date: Wed, 2 Feb 2022 14:43:17 -0800 Subject: [PATCH] Add TweetTrove#FillMissingUserIDs helper method --- scraper/tweet_trove.go | 42 ++++++++++++++++++++++++++++++++++++- scraper/tweet_trove_test.go | 19 +++++++++++++++++ 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/scraper/tweet_trove.go b/scraper/tweet_trove.go index 298e688..385b9da 100644 --- a/scraper/tweet_trove.go +++ b/scraper/tweet_trove.go @@ -1,5 +1,9 @@ package scraper +import ( + "fmt" +) + type TweetTrove struct { Tweets map[TweetID]Tweet Users map[UserID]User @@ -30,7 +34,6 @@ func (trove TweetTrove) Transform() (tweets []Tweet, retweets []Retweet, users [ for _, val := range trove.Retweets { retweets = append(retweets, val) } - return } // TODO: refactor until this function isn't needed anymore @@ -51,3 +54,40 @@ func (t1 *TweetTrove) MergeWith(t2 TweetTrove) { t1.TombstoneUsers = append(t1.TombstoneUsers, t2.TombstoneUsers...) } + +/** + * Checks for tombstoned tweets and fills in their UserIDs based on the collected tombstoned users. + + * To be called after calling "scraper.GetUser" on all the tombstoned users. + * + * At this point, those users should have been added to this trove's Users collection, and the + * Tweets have a field `UserHandle` which can be used to pair them with newly fetched Users. + * + * This will still fail if the user deleted their account (instead of getting banned, blocking the + * quote-tweeter, etc), because then that user won't show up . + */ +func (trove *TweetTrove) FillMissingUserIDs() { + for i := range trove.Tweets { + tweet := trove.Tweets[i] + if tweet.UserID != 0 { + // No need to fill this tweet's user_id, it's already filled + continue + } + + handle := tweet.UserHandle + is_user_found := false + for _, u := range trove.Users { + if u.Handle == handle { + tweet.UserID = u.ID + is_user_found = true + break + } + } + if !is_user_found { + // The user probably deleted deleted their account, and thus `scraper.GetUser` failed. So + // they're not in this trove's Users. + panic(fmt.Sprintf("Couldn't fill out this Tweet's UserID: %d, %s", tweet.ID, tweet.UserHandle)) + } + trove.Tweets[i] = tweet + } +} diff --git a/scraper/tweet_trove_test.go b/scraper/tweet_trove_test.go index 67e667c..77d8729 100644 --- a/scraper/tweet_trove_test.go +++ b/scraper/tweet_trove_test.go @@ -47,3 +47,22 @@ func TestMergeTweetTroves(t *testing.T) { assert.Equal(3, len(trove1.Retweets)) assert.Equal(3, len(trove1.TombstoneUsers)) } + +func TestFillMissingUserIDs(t *testing.T) { + assert := assert.New(t) + u1 := User{ID: 1, Handle: "a"} + + t1 := Tweet{ID: 1, UserID: 1} + t2 := Tweet{ID: 2, UserHandle: "a"} + + trove := NewTweetTrove() + trove.Users[u1.ID] = u1 + trove.Tweets[t1.ID] = t1 + trove.Tweets[t2.ID] = t2 + + assert.NotEqual(trove.Tweets[2].UserID, UserID(1)) + + trove.FillMissingUserIDs() + + assert.Equal(trove.Tweets[2].UserID, UserID(1)) +}