Enable fetching tweets to the limit of scrollback

This commit is contained in:
Alessio 2021-08-22 18:22:06 -07:00
parent 9c944cdc5e
commit 1f5ebf9f8d
4 changed files with 25 additions and 5 deletions

View File

@ -55,7 +55,9 @@ func main() {
case "fetch_tweet":
fetch_full_tweet(target)
case "get_user_tweets":
fetch_user_feed(target)
fetch_user_feed(target, 50)
case "get_user_tweets_all":
fetch_user_feed(target, 999999999)
case "download_tweet_content":
download_tweet_content(target)
case "download_user_content":
@ -174,13 +176,14 @@ func fetch_full_tweet(tweet_url string) {
* args:
* - handle: the user handle to get
*/
func fetch_user_feed(handle string) {
func fetch_user_feed(handle string, how_many int) {
user, err := profile.GetUserByHandle(scraper.UserHandle(handle))
if err != nil {
die(err.Error(), false, -1)
}
tweets, retweets, users, err := scraper.GetUserFeedFor(user.ID, 50);
tweets, retweets, users, err := scraper.GetUserFeedFor(user.ID, how_many);
if err != nil {
die("Error scraping feed: " + err.Error(), false, -2)
}

View File

@ -158,7 +158,7 @@ func LoadProfile(profile_dir string) (Profile, error) {
if err != nil {
return Profile{}, err
}
db, err := sql.Open("sqlite3", sqlite_file + "?_foreign_keys=on")
db, err := sql.Open("sqlite3", sqlite_file + "?_foreign_keys=on&_journal_mode=WAL")
if err != nil {
return Profile{}, err
}

View File

@ -11,6 +11,13 @@ import (
const API_CONVERSATION_BASE_PATH = "https://twitter.com/i/api/2/timeline/conversation/"
const API_USER_TIMELINE_BASE_PATH = "https://api.twitter.com/2/timeline/profile/"
type APIError string
func (e APIError) Error() string {
return string(e)
}
const END_OF_FEED = APIError("End of feed")
type API struct{}
func (api API) GetFeedFor(user_id UserID, cursor string) (TweetResponse, error) {
@ -70,6 +77,15 @@ func (api API) GetMoreTweetsFromFeed(user_id UserID, response *TweetResponse, mi
return err
}
if fresh_response.GetCursor() == last_response.GetCursor() && len(fresh_response.GlobalObjects.Tweets) == 0 {
// Empty response, cursor same as previous: end of feed has been reached
return END_OF_FEED
}
if fresh_response.IsEndOfFeed() {
// Response has a pinned tweet, but no other content: end of feed has been reached
return END_OF_FEED
}
last_response = &fresh_response
// Copy over the tweets and the users
@ -79,6 +95,7 @@ func (api API) GetMoreTweetsFromFeed(user_id UserID, response *TweetResponse, mi
for id, user := range last_response.GlobalObjects.Users {
response.GlobalObjects.Users[id] = user
}
fmt.Printf("Have %d tweets, and %d users so far\n", len(response.GlobalObjects.Tweets), len(response.GlobalObjects.Users))
}
return nil
}

View File

@ -21,7 +21,7 @@ func GetUserFeedFor(user_id UserID, min_tweets int) (tweets []Tweet, retweets []
if len(tweet_response.GlobalObjects.Tweets) < min_tweets &&
tweet_response.GetCursor() != "" {
err = api.GetMoreTweetsFromFeed(user_id, &tweet_response, min_tweets)
if err != nil {
if err != nil && err != END_OF_FEED {
return
}
}