diff --git a/cmd/twitter/main.go b/cmd/twitter/main.go index 2a74815..80e04e4 100644 --- a/cmd/twitter/main.go +++ b/cmd/twitter/main.go @@ -172,10 +172,11 @@ func fetch_tweet_conversation(tweet_identifier string) { fmt.Println("Tweet is already in database. Updating...") } - tweets, _, users, err := scraper.GetTweetFull(tweet_id) + trove, err := scraper.GetTweetFull(tweet_id) if err != nil { die(err.Error(), false, -1) } + tweets, _, users := trove.Transform() for _, u := range users { fmt.Println(u.Handle) @@ -285,10 +286,11 @@ func download_user_content(handle scraper.UserHandle) { func search(query string) { - tweets, retweets, users, err := scraper.Search(query, 1000); + trove, err := scraper.Search(query, 1000) if err != nil { die("Error scraping search results: " + err.Error(), false, -100) } + tweets, retweets, users := trove.Transform() for _, u := range users { fmt.Println(u.Handle) diff --git a/scraper/search.go b/scraper/search.go index 83ca0f8..012ade1 100644 --- a/scraper/search.go +++ b/scraper/search.go @@ -13,7 +13,7 @@ func TimestampToDateString(timestamp int) string { * - photos * - videos */ -func Search(query string, min_results int) (tweets []Tweet, retweets []Retweet, users []User, err error) { +func Search(query string, min_results int) (trove TweetTrove, err error) { api := API{} tweet_response, err := api.Search(query, "") if err != nil { diff --git a/scraper/tweet.go b/scraper/tweet.go index 3d7a07b..4f1e11d 100644 --- a/scraper/tweet.go +++ b/scraper/tweet.go @@ -213,7 +213,7 @@ func GetTweet(id TweetID) (Tweet, error) { * * returns: the tweet, list of its replies and context, and users associated with those replies */ -func GetTweetFull(id TweetID) (tweets []Tweet, retweets []Retweet, users []User, err error) { +func GetTweetFull(id TweetID) (trove TweetTrove, err error) { api := API{} tweet_response, err := api.GetTweet(id, "") if err != nil { @@ -226,49 +226,31 @@ func GetTweetFull(id TweetID) (tweets []Tweet, retweets []Retweet, users []User, return } } - tombstone_users := tweet_response.HandleTombstones() - fmt.Printf("%v\n", tombstone_users) - for _, u := range tombstone_users { - fetched_user, err1 := GetUser(u) - fetched_user.Handle = u - if err != nil { - err = err1 - return - } - fmt.Println(fetched_user) - users = append(users, fetched_user) + + // This has to be called BEFORE ParseTweetResponse, because it modifies the TweetResponse (adds tombstone tweets to its tweets list) + tombstoned_users := tweet_response.HandleTombstones() + + trove, err = ParseTweetResponse(tweet_response) + if err != nil { + panic(err) } - tweets, retweets, _users, err := ParseTweetResponse(tweet_response) + trove.TombstoneUsers = tombstoned_users + trove.FetchTombstoneUsers() // Quoted tombstones need their user_id filled out from the tombstoned_users list - for i := range tweets { - if tweets[i].UserID != 0 { - continue - } - handle := tweet_response.GlobalObjects.Tweets[fmt.Sprint(tweets[i].ID)].UserHandle - is_found := false - for _, u := range users { // The tombstoned users, not from the tweet response - if u.Handle == UserHandle(handle) { - tweets[i].UserID = u.ID - is_found = true - break - } - } - if !is_found { - panic("Couldn't find the user handle in the list of tombstoned users!") - } - } + trove.FillMissingUserIDs() // Find the main tweet and update its "is_conversation_downloaded" and "last_scraped_at" - scrape_time := time.Now() - for i, t := range(tweets) { - if t.ID == id { - // Index the slice because `tweets[i]` is a reference, whereas `t` is a copy - tweets[i].LastScrapedAt = scrape_time - tweets[i].IsConversationScraped = true - } + tweet, ok := trove.Tweets[id] + if !ok { + panic("Trove didn't contain its own tweet!") } - users = append(users, _users...) + tweet.LastScrapedAt = time.Now() + tweet.IsConversationScraped = true + trove.Tweets[id] = tweet + + // tweets, retweets, users = trove.Transform() + return } @@ -280,32 +262,31 @@ func GetTweetFull(id TweetID) (tweets []Tweet, retweets []Retweet, users []User, * * returns: a list of tweets, retweets and users in that response object */ -func ParseTweetResponse(resp TweetResponse) (tweets []Tweet, retweets []Retweet, users []User, err error) { - // TODO: TweetCollection maybe should be its own type - var new_tweet Tweet - var new_retweet Retweet +func ParseTweetResponse(resp TweetResponse) (TweetTrove, error) { + trove := NewTweetTrove() + for _, single_tweet := range resp.GlobalObjects.Tweets { if single_tweet.RetweetedStatusIDStr == "" { - new_tweet, err = ParseSingleTweet(single_tweet) + new_tweet, err := ParseSingleTweet(single_tweet) if err != nil { - return + return trove, err } - tweets = append(tweets, new_tweet) + trove.Tweets[new_tweet.ID] = new_tweet } else { - new_retweet, err = ParseSingleRetweet(single_tweet) + new_retweet, err := ParseSingleRetweet(single_tweet) if err != nil { - return + return trove, err } - retweets = append(retweets, new_retweet) + trove.Retweets[new_retweet.RetweetID] = new_retweet } } - var new_user User + for _, user := range resp.GlobalObjects.Users { - new_user, err = ParseSingleUser(user) + new_user, err := ParseSingleUser(user) if err != nil { - return + return trove, err } - users = append(users, new_user) + trove.Users[new_user.ID] = new_user } - return + return trove, nil } diff --git a/scraper/tweet_test.go b/scraper/tweet_test.go index 621abd8..3476160 100644 --- a/scraper/tweet_test.go +++ b/scraper/tweet_test.go @@ -161,8 +161,9 @@ func TestParseTweetResponse(t *testing.T) { err = json.Unmarshal(data, &tweet_resp) require.NoError(t, err) - tweets, retweets, users, err := ParseTweetResponse(tweet_resp) + trove, err := ParseTweetResponse(tweet_resp) require.NoError(t, err) + tweets, retweets, users := trove.Transform() assert.Len(tweets, 29 - 3) assert.Len(retweets, 3) @@ -182,8 +183,9 @@ func TestParseTweetResponseWithTombstones(t *testing.T) { extra_users := tweet_resp.HandleTombstones() assert.Len(extra_users, 1) - tweets, retweets, users, err := ParseTweetResponse(tweet_resp) + trove, err := ParseTweetResponse(tweet_resp) require.NoError(t, err) + tweets, retweets, users := trove.Transform() assert.Len(tweets, 2) assert.Len(retweets, 0) diff --git a/scraper/tweet_trove.go b/scraper/tweet_trove.go index 385b9da..5abc04e 100644 --- a/scraper/tweet_trove.go +++ b/scraper/tweet_trove.go @@ -37,6 +37,17 @@ func (trove TweetTrove) Transform() (tweets []Tweet, retweets []Retweet, users [ return } // TODO: refactor until this function isn't needed anymore +/** + * Search for a user by handle. Second param is whether the user was found or not. + */ +func (trove TweetTrove) FindUserByHandle(handle UserHandle) (User, bool) { + for _, user := range trove.Users { + if user.Handle == handle { + return user, true + } + } + return User{}, false +} /** * Combine two troves into one @@ -55,6 +66,30 @@ func (t1 *TweetTrove) MergeWith(t2 TweetTrove) { t1.TombstoneUsers = append(t1.TombstoneUsers, t2.TombstoneUsers...) } +/** + * Tries to fetch every User that's been identified in a tombstone in this trove + */ +func (trove *TweetTrove) FetchTombstoneUsers() { + for _, handle := range trove.TombstoneUsers { + // Skip fetching if this user is already in the trove + _, already_fetched := trove.FindUserByHandle(handle) + if already_fetched { + continue + } + + user, err := GetUser(handle) + if err != nil { + panic(fmt.Sprintf("Error getting tombstoned user: %s\n %s", handle, err.Error())) + } + + if user.ID == 0 { + panic(fmt.Sprintf("UserID == 0 (@%s)", handle)) + } + + trove.Users[user.ID] = user + } +} + /** * Checks for tombstoned tweets and fills in their UserIDs based on the collected tombstoned users. diff --git a/scraper/tweet_trove_test.go b/scraper/tweet_trove_test.go index 77d8729..de0b549 100644 --- a/scraper/tweet_trove_test.go +++ b/scraper/tweet_trove_test.go @@ -66,3 +66,21 @@ func TestFillMissingUserIDs(t *testing.T) { assert.Equal(trove.Tweets[2].UserID, UserID(1)) } + +func TestFindUserByHandle(t *testing.T) { + assert := assert.New(t) + + u1 := User{ID: 1, Handle: "1", DisplayName: "One"} + u2 := User{ID: 2, Handle: "2", DisplayName: "Two"} + + trove := NewTweetTrove() + trove.Users[u1.ID] = u1 + trove.Users[u2.ID] = u2 + + user_2, ok := trove.FindUserByHandle("2") + assert.True(ok) + assert.Equal(user_2.DisplayName, "Two") + + _, ok = trove.FindUserByHandle("3") + assert.False(ok) +} diff --git a/scraper/user_feed.go b/scraper/user_feed.go index cea79ec..2bafd66 100644 --- a/scraper/user_feed.go +++ b/scraper/user_feed.go @@ -14,7 +14,7 @@ import ( * * returns: a slice of Tweets, Retweets, and Users */ -func GetUserFeedFor(user_id UserID, min_tweets int) (tweets []Tweet, retweets []Retweet, users []User, err error) { +func GetUserFeedFor(user_id UserID, min_tweets int) (trove TweetTrove, err error) { api := API{} tweet_response, err := api.GetFeedFor(user_id, "") if err != nil { @@ -47,33 +47,14 @@ func GetUserFeedGraphqlFor(user_id UserID, min_tweets int) (trove TweetTrove, er } } - trove, err = api_response.ToTweetTrove() if err != nil { panic(err) } - // DUPE tombstone-user-processing fmt.Println("------------") - for _, handle := range trove.TombstoneUsers { - fmt.Println(handle) - - user, err := GetUser(handle) - if err != nil { - panic(err) - } - fmt.Println(user) - - if user.ID == 0 { - panic(fmt.Sprintf("UserID == 0 (@%s)", handle)) - } - - trove.Users[user.ID] = user - } - // Quoted tombstones need their user_id filled out from the tombstoned_users list + trove.FetchTombstoneUsers() trove.FillMissingUserIDs() - // <<<<<<< DUPE tombstone-user-processing - return trove, nil }