From 4a3c12b04a10eb6d62c4a4c40446a64c94fc22fb Mon Sep 17 00:00:00 2001 From: Alessio Date: Mon, 16 Aug 2021 20:35:15 -0700 Subject: [PATCH] Update some docstrings, rename a few methods --- cmd/fetch_user_feed_to_stdout/main.go | 2 +- scraper/api_request_utils.go | 15 +++++++++++--- scraper/tweet.go | 28 ++++++++++++++++++++++++--- scraper/user_feed.go | 18 ++++++++++++----- 4 files changed, 51 insertions(+), 12 deletions(-) diff --git a/cmd/fetch_user_feed_to_stdout/main.go b/cmd/fetch_user_feed_to_stdout/main.go index 5909b86..43aebc3 100644 --- a/cmd/fetch_user_feed_to_stdout/main.go +++ b/cmd/fetch_user_feed_to_stdout/main.go @@ -19,7 +19,7 @@ func main() { log.Fatal("Error getting user profile: " + err.Error()) } - tweets, retweets, users, err := scraper.GetFeedFull(user.ID, 1) + tweets, retweets, users, err := scraper.GetUserFeedFor(user.ID, 1) if err != nil { log.Fatal("Error getting user feed: " + err.Error()) } diff --git a/scraper/api_request_utils.go b/scraper/api_request_utils.go index c91cd07..70b75a9 100644 --- a/scraper/api_request_utils.go +++ b/scraper/api_request_utils.go @@ -52,10 +52,19 @@ func (api API) GetFeedFor(user_id UserID, cursor string) (TweetResponse, error) return response, err } -// Resend the request to get more tweets if necessary -func (api API) GetMoreTweets(user_id UserID, response *TweetResponse, max_tweets int) error { +/** + * Resend the request to get more tweets if necessary + * + * args: + * - user_id: the user's UserID + * - response: an "out" parameter; the TweetResponse that tweets, RTs and users will be appended to + * - min_tweets: the desired minimum amount of tweets to get + */ +func (api API) GetMoreTweetsFromFeed(user_id UserID, response *TweetResponse, min_tweets int) error { + // TODO user-feed-infinite-fetch: what if you reach the end of the user's timeline? Might loop + // forever getting no new tweets last_response := response - for last_response.GetCursor() != "" && len(response.GlobalObjects.Tweets) < max_tweets { + for last_response.GetCursor() != "" && len(response.GlobalObjects.Tweets) < min_tweets { fresh_response, err := api.GetFeedFor(user_id, last_response.GetCursor()) if err != nil { return err diff --git a/scraper/tweet.go b/scraper/tweet.go index a3c20e8..2a0ec04 100644 --- a/scraper/tweet.go +++ b/scraper/tweet.go @@ -125,7 +125,14 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) { } -// Return a single tweet, nothing else +/** + * Get a single tweet with no replies from the API. + * + * args: + * - id: the ID of the tweet to get + * + * returns: the single Tweet + */ func GetTweet(id TweetID) (Tweet, error) { api := API{} tweet_response, err := api.GetTweet(id, "") @@ -143,8 +150,15 @@ func GetTweet(id TweetID) (Tweet, error) { } -// Return a list of tweets, including the original and the rest of its thread, -// along with a list of associated users +/** + * Return a list of tweets, including the original and the rest of its thread, + * along with a list of associated users. + * + * args: + * - id: the ID of the tweet to get + * + * returns: the tweet, list of its replies and context, and users associated with those replies + */ func GetTweetFull(id TweetID) (tweets []Tweet, retweets []Retweet, users []User, err error) { api := API{} tweet_response, err := api.GetTweet(id, "") @@ -162,6 +176,14 @@ func GetTweetFull(id TweetID) (tweets []Tweet, retweets []Retweet, users []User, return ParseTweetResponse(tweet_response) } +/** + * Parse an API response object into a list of tweets, retweets and users + * + * args: + * - resp: the response from the API + * + * returns: a list of tweets, retweets and users in that response object + */ func ParseTweetResponse(resp TweetResponse) (tweets []Tweet, retweets []Retweet, users []User, err error) { var new_tweet Tweet var new_retweet Retweet diff --git a/scraper/user_feed.go b/scraper/user_feed.go index d323fcc..f177086 100644 --- a/scraper/user_feed.go +++ b/scraper/user_feed.go @@ -1,18 +1,26 @@ package scraper -// Return a list of tweets, including the original and the rest of its thread, -// along with a list of associated users -func GetFeedFull(user_id UserID, max_tweets int) (tweets []Tweet, retweets []Retweet, users []User, err error) { +/** + * Get a list of tweets that appear on the given user's page, along with a list of associated + * users for any retweets. + * + * args: + * - user_id: the ID of the user whomst feed to fetch + * - min_tweets: get at least this many tweets, if there are any + * + * returns: a slice of Tweets, Retweets, and Users + */ +func GetUserFeedFor(user_id UserID, min_tweets int) (tweets []Tweet, retweets []Retweet, users []User, err error) { api := API{} tweet_response, err := api.GetFeedFor(user_id, "") if err != nil { return } - if len(tweet_response.GlobalObjects.Tweets) < max_tweets && + if len(tweet_response.GlobalObjects.Tweets) < min_tweets && tweet_response.GetCursor() != "" { - err = api.GetMoreTweets(user_id, &tweet_response, max_tweets) + err = api.GetMoreTweetsFromFeed(user_id, &tweet_response, min_tweets) if err != nil { return }