From 1f5ebf9f8dd044baefbba55ef902acb715e6183c Mon Sep 17 00:00:00 2001 From: Alessio Date: Sun, 22 Aug 2021 18:22:06 -0700 Subject: [PATCH] Enable fetching tweets to the limit of scrollback --- cmd/twitter/main.go | 9 ++++++--- persistence/profile.go | 2 +- scraper/api_request_utils.go | 17 +++++++++++++++++ scraper/user_feed.go | 2 +- 4 files changed, 25 insertions(+), 5 deletions(-) diff --git a/cmd/twitter/main.go b/cmd/twitter/main.go index 1e01a24..a8d96b3 100644 --- a/cmd/twitter/main.go +++ b/cmd/twitter/main.go @@ -55,7 +55,9 @@ func main() { case "fetch_tweet": fetch_full_tweet(target) case "get_user_tweets": - fetch_user_feed(target) + fetch_user_feed(target, 50) + case "get_user_tweets_all": + fetch_user_feed(target, 999999999) case "download_tweet_content": download_tweet_content(target) case "download_user_content": @@ -174,13 +176,14 @@ func fetch_full_tweet(tweet_url string) { * args: * - handle: the user handle to get */ -func fetch_user_feed(handle string) { +func fetch_user_feed(handle string, how_many int) { user, err := profile.GetUserByHandle(scraper.UserHandle(handle)) + if err != nil { die(err.Error(), false, -1) } - tweets, retweets, users, err := scraper.GetUserFeedFor(user.ID, 50); + tweets, retweets, users, err := scraper.GetUserFeedFor(user.ID, how_many); if err != nil { die("Error scraping feed: " + err.Error(), false, -2) } diff --git a/persistence/profile.go b/persistence/profile.go index 0b427e5..1ef75ee 100644 --- a/persistence/profile.go +++ b/persistence/profile.go @@ -158,7 +158,7 @@ func LoadProfile(profile_dir string) (Profile, error) { if err != nil { return Profile{}, err } - db, err := sql.Open("sqlite3", sqlite_file + "?_foreign_keys=on") + db, err := sql.Open("sqlite3", sqlite_file + "?_foreign_keys=on&_journal_mode=WAL") if err != nil { return Profile{}, err } diff --git a/scraper/api_request_utils.go b/scraper/api_request_utils.go index 20b7238..453bb76 100644 --- a/scraper/api_request_utils.go +++ b/scraper/api_request_utils.go @@ -11,6 +11,13 @@ import ( const API_CONVERSATION_BASE_PATH = "https://twitter.com/i/api/2/timeline/conversation/" const API_USER_TIMELINE_BASE_PATH = "https://api.twitter.com/2/timeline/profile/" +type APIError string +func (e APIError) Error() string { + return string(e) +} + +const END_OF_FEED = APIError("End of feed") + type API struct{} func (api API) GetFeedFor(user_id UserID, cursor string) (TweetResponse, error) { @@ -70,6 +77,15 @@ func (api API) GetMoreTweetsFromFeed(user_id UserID, response *TweetResponse, mi return err } + if fresh_response.GetCursor() == last_response.GetCursor() && len(fresh_response.GlobalObjects.Tweets) == 0 { + // Empty response, cursor same as previous: end of feed has been reached + return END_OF_FEED + } + if fresh_response.IsEndOfFeed() { + // Response has a pinned tweet, but no other content: end of feed has been reached + return END_OF_FEED + } + last_response = &fresh_response // Copy over the tweets and the users @@ -79,6 +95,7 @@ func (api API) GetMoreTweetsFromFeed(user_id UserID, response *TweetResponse, mi for id, user := range last_response.GlobalObjects.Users { response.GlobalObjects.Users[id] = user } + fmt.Printf("Have %d tweets, and %d users so far\n", len(response.GlobalObjects.Tweets), len(response.GlobalObjects.Users)) } return nil } diff --git a/scraper/user_feed.go b/scraper/user_feed.go index f177086..aec24b4 100644 --- a/scraper/user_feed.go +++ b/scraper/user_feed.go @@ -21,7 +21,7 @@ func GetUserFeedFor(user_id UserID, min_tweets int) (tweets []Tweet, retweets [] if len(tweet_response.GlobalObjects.Tweets) < min_tweets && tweet_response.GetCursor() != "" { err = api.GetMoreTweetsFromFeed(user_id, &tweet_response, min_tweets) - if err != nil { + if err != nil && err != END_OF_FEED { return } }