Don't throw away all previous results if you get rate-limited, just save them
This commit is contained in:
parent
b23a6a7e05
commit
a1faacaf6b
@ -2,6 +2,7 @@ package main
|
||||
|
||||
import (
|
||||
_ "embed"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
@ -82,3 +83,11 @@ func get_default_profile() string {
|
||||
}
|
||||
return filepath.Join(app_data_dir, "twitter")
|
||||
}
|
||||
|
||||
// Returns whether this error should be treated as a failure
|
||||
func is_scrape_failure(err error) bool {
|
||||
if err == nil || errors.Is(err, scraper.END_OF_FEED) || errors.Is(err, scraper.ErrRateLimited) {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
@ -261,7 +261,7 @@ func create_profile(target_dir string) {
|
||||
*/
|
||||
func fetch_user(handle scraper.UserHandle) {
|
||||
user, err := scraper.GetUser(handle)
|
||||
if err != nil {
|
||||
if is_scrape_failure(err) {
|
||||
die(err.Error(), false, -1)
|
||||
}
|
||||
log.Debug(user)
|
||||
@ -288,7 +288,7 @@ func fetch_tweet_only(tweet_identifier string) {
|
||||
}
|
||||
|
||||
tweet, err := scraper.GetTweet(tweet_id)
|
||||
if err != nil {
|
||||
if is_scrape_failure(err) {
|
||||
die(fmt.Sprintf("Error fetching tweet: %s", err.Error()), false, -1)
|
||||
}
|
||||
log.Debug(tweet)
|
||||
@ -313,7 +313,7 @@ func fetch_tweet_conversation(tweet_identifier string, how_many int) {
|
||||
}
|
||||
|
||||
trove, err := scraper.GetTweetFullAPIV2(tweet_id, how_many)
|
||||
if err != nil {
|
||||
if is_scrape_failure(err) {
|
||||
die(err.Error(), false, -1)
|
||||
}
|
||||
profile.SaveTweetTrove(trove, true)
|
||||
@ -329,12 +329,12 @@ func fetch_tweet_conversation(tweet_identifier string, how_many int) {
|
||||
*/
|
||||
func fetch_user_feed(handle string, how_many int) {
|
||||
user, err := profile.GetUserByHandle(scraper.UserHandle(handle))
|
||||
if err != nil {
|
||||
if is_scrape_failure(err) {
|
||||
die(fmt.Sprintf("Error getting user: %s\n %s", handle, err.Error()), false, -1)
|
||||
}
|
||||
|
||||
trove, err := scraper.GetUserFeedGraphqlFor(user.ID, how_many)
|
||||
if err != nil {
|
||||
if is_scrape_failure(err) {
|
||||
die(fmt.Sprintf("Error scraping feed: %s\n %s", handle, err.Error()), false, -2)
|
||||
}
|
||||
profile.SaveTweetTrove(trove, true)
|
||||
@ -349,7 +349,7 @@ func get_user_likes(handle string, how_many int) {
|
||||
}
|
||||
|
||||
trove, err := scraper.GetUserLikes(user.ID, how_many)
|
||||
if err != nil {
|
||||
if is_scrape_failure(err) {
|
||||
die(fmt.Sprintf("Error scraping feed: %s\n %s", handle, err.Error()), false, -2)
|
||||
}
|
||||
profile.SaveTweetTrove(trove, true)
|
||||
@ -364,7 +364,7 @@ func get_followees(handle string, how_many int) {
|
||||
}
|
||||
|
||||
trove, err := scraper.GetFollowees(user.ID, how_many)
|
||||
if err != nil {
|
||||
if is_scrape_failure(err) {
|
||||
die(fmt.Sprintf("Error getting followees: %s\n %s", handle, err.Error()), false, -2)
|
||||
}
|
||||
profile.SaveTweetTrove(trove, true)
|
||||
@ -378,7 +378,7 @@ func get_followers(handle string, how_many int) {
|
||||
die(fmt.Sprintf("Error getting user: %s\n %s", handle, err.Error()), false, -1)
|
||||
}
|
||||
trove, err := scraper.GetFollowers(user.ID, how_many)
|
||||
if err != nil {
|
||||
if is_scrape_failure(err) {
|
||||
die(fmt.Sprintf("Error getting followees: %s\n %s", handle, err.Error()), false, -2)
|
||||
}
|
||||
profile.SaveTweetTrove(trove, true)
|
||||
@ -388,7 +388,7 @@ func get_followers(handle string, how_many int) {
|
||||
}
|
||||
func get_bookmarks(how_many int) {
|
||||
trove, err := scraper.GetBookmarks(how_many)
|
||||
if err != nil {
|
||||
if is_scrape_failure(err) {
|
||||
die(fmt.Sprintf("Error scraping bookmarks:\n %s", err.Error()), false, -2)
|
||||
}
|
||||
profile.SaveTweetTrove(trove, true)
|
||||
@ -400,7 +400,7 @@ func get_bookmarks(how_many int) {
|
||||
}
|
||||
func fetch_timeline(is_following_only bool) {
|
||||
trove, err := scraper.GetHomeTimeline("", is_following_only)
|
||||
if err != nil {
|
||||
if is_scrape_failure(err) {
|
||||
die(fmt.Sprintf("Error fetching timeline:\n %s", err.Error()), false, -2)
|
||||
}
|
||||
profile.SaveTweetTrove(trove, true)
|
||||
@ -437,7 +437,7 @@ func download_user_content(handle scraper.UserHandle) {
|
||||
|
||||
func search(query string, how_many int) {
|
||||
trove, err := scraper.Search(query, how_many)
|
||||
if err != nil {
|
||||
if is_scrape_failure(err) {
|
||||
die(fmt.Sprintf("Error scraping search results: %s", err.Error()), false, -100)
|
||||
}
|
||||
profile.SaveTweetTrove(trove, true)
|
||||
@ -506,7 +506,7 @@ func fetch_inbox(how_many int) {
|
||||
|
||||
func fetch_dm(id string, how_many int) {
|
||||
room, err := profile.GetChatRoom(scraper.DMChatRoomID(id))
|
||||
if err != nil {
|
||||
if is_scrape_failure(err) {
|
||||
panic(err)
|
||||
}
|
||||
max_id := scraper.DMMessageID(^uint(0) >> 1)
|
||||
|
@ -50,10 +50,10 @@ func (app *Application) ensure_tweet(id scraper.TweetID, is_forced bool, is_conv
|
||||
|
||||
if is_needing_scrape && !app.IsScrapingDisabled {
|
||||
trove, err := scraper.GetTweetFullAPIV2(id, 50) // TODO: parameterizable
|
||||
if err == nil {
|
||||
if err == nil || errors.Is(err, scraper.END_OF_FEED) || errors.Is(err, scraper.ErrRateLimited) {
|
||||
app.Profile.SaveTweetTrove(trove, false)
|
||||
go app.Profile.SaveTweetTrove(trove, true) // Download the content in the background
|
||||
is_available = true
|
||||
_, is_available = trove.Tweets[id]
|
||||
} else {
|
||||
app.ErrorLog.Print(err)
|
||||
// TODO: show error in UI
|
||||
|
@ -895,7 +895,7 @@ func (api *API) GetMore(pq PaginatedQuery, response *APIV2Response, count int) e
|
||||
for last_response.GetCursorBottom() != "" && len(response.GetMainInstruction().Entries) < count {
|
||||
fresh_response, err := pq.NextPage(api, last_response.GetCursorBottom())
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting next page for %#v: %w", pq, err)
|
||||
return fmt.Errorf("error getting next page for %#v: %w", pq, err) // e.g., rate limited
|
||||
}
|
||||
|
||||
if fresh_response.GetCursorBottom() == last_response.GetCursorBottom() && len(fresh_response.GetMainInstruction().Entries) == 0 {
|
||||
@ -925,25 +925,31 @@ func (api *API) GetPaginatedQuery(pq PaginatedQuery, count int) (TweetTrove, err
|
||||
fmt.Printf("Paginating %d count\n", count)
|
||||
api_response, err := pq.NextPage(api, "")
|
||||
if err != nil {
|
||||
// End of feed on the first call constitutes an empty result, so returning empty is OK
|
||||
return TweetTrove{}, fmt.Errorf("Error calling API to fetch query %#v:\n %w", pq, err)
|
||||
}
|
||||
if len(api_response.GetMainInstruction().Entries) < count && api_response.GetCursorBottom() != "" {
|
||||
err = api.GetMore(pq, &api_response, count)
|
||||
if errors.Is(err, END_OF_FEED) {
|
||||
println("End of feed!")
|
||||
log.Infof("End of feed!")
|
||||
} else if errors.Is(err, ErrRateLimited) {
|
||||
log.Errorf("Rate limited!")
|
||||
} else if err != nil {
|
||||
return TweetTrove{}, err
|
||||
}
|
||||
}
|
||||
|
||||
trove, err := pq.ToTweetTrove(api_response)
|
||||
if err != nil {
|
||||
return TweetTrove{}, fmt.Errorf("Error parsing the tweet trove for query %#v:\n %w", pq, err)
|
||||
trove, err2 := pq.ToTweetTrove(api_response)
|
||||
if err2 != nil {
|
||||
return TweetTrove{}, fmt.Errorf("Error parsing the tweet trove for query %#v:\n %w", pq, err2)
|
||||
}
|
||||
|
||||
fmt.Println("------------")
|
||||
err = trove.PostProcess()
|
||||
return trove, err
|
||||
err2 = trove.PostProcess()
|
||||
if err2 != nil {
|
||||
return TweetTrove{}, fmt.Errorf("failed to post-process tweet trove: %w", err2)
|
||||
}
|
||||
return trove, err // `err` will be either nil, END_OF_FEED, or ErrRateLimited
|
||||
}
|
||||
|
||||
// Get a User feed using the new GraphQL twitter api
|
||||
@ -987,7 +993,6 @@ func (api *API) GetGraphqlFeedFor(user_id UserID, cursor string) (APIV2Response,
|
||||
|
||||
var response APIV2Response
|
||||
err = api.do_http(url.String(), cursor, &response)
|
||||
|
||||
return response, err
|
||||
}
|
||||
|
||||
@ -1107,10 +1112,7 @@ func (api *API) GetUserLikes(user_id UserID, cursor string) (APIV2Response, erro
|
||||
|
||||
var response APIV2Response
|
||||
err = api.do_http(url.String(), cursor, &response)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return response, nil
|
||||
return response, err
|
||||
}
|
||||
|
||||
type PaginatedUserLikes struct {
|
||||
@ -1176,10 +1178,7 @@ func (api *API) GetBookmarks(cursor string) (APIV2Response, error) {
|
||||
|
||||
var response APIV2Response
|
||||
err = api.do_http(url.String(), cursor, &response)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return response, nil
|
||||
return response, err
|
||||
}
|
||||
|
||||
type PaginatedBookmarks struct {
|
||||
@ -1259,12 +1258,9 @@ func (api *API) GetHomeTimeline(cursor string, is_following_only bool) (TweetTro
|
||||
panic(err)
|
||||
}
|
||||
err = api.do_http_POST(url, string(body_bytes), &response)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
trove, err := response.ToTweetTrove()
|
||||
if err != nil {
|
||||
return TweetTrove{}, err
|
||||
trove, err2 := response.ToTweetTrove()
|
||||
if err2 != nil {
|
||||
return TweetTrove{}, err2
|
||||
}
|
||||
return trove, err
|
||||
}
|
||||
@ -1312,11 +1308,7 @@ func (api API) GetUser(handle UserHandle) (APIUser, error) {
|
||||
|
||||
var response UserResponse
|
||||
err = api.do_http(url.String(), "", &response)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
return response.ConvertToAPIUser(), nil
|
||||
return response.ConvertToAPIUser(), err
|
||||
}
|
||||
|
||||
func (api *API) Search(query string, cursor string) (APIV2Response, error) {
|
||||
@ -1373,3 +1365,13 @@ func (p PaginatedSearch) NextPage(api *API, cursor string) (APIV2Response, error
|
||||
func (p PaginatedSearch) ToTweetTrove(r APIV2Response) (TweetTrove, error) {
|
||||
return r.ToTweetTrove()
|
||||
}
|
||||
|
||||
// TODO: Search modes:
|
||||
// - regular ("top")
|
||||
// - latest / "live"
|
||||
// - search for users
|
||||
// - photos
|
||||
// - videos
|
||||
func Search(query string, min_results int) (trove TweetTrove, err error) {
|
||||
return the_api.GetPaginatedQuery(PaginatedSearch{query}, min_results)
|
||||
}
|
||||
|
@ -1,17 +0,0 @@
|
||||
package scraper
|
||||
|
||||
func TimestampToDateString(timestamp int) string {
|
||||
panic("???") // TODO
|
||||
}
|
||||
|
||||
/**
|
||||
* TODO: Search modes:
|
||||
* - regular ("top")
|
||||
* - latest / "live"
|
||||
* - search for users
|
||||
* - photos
|
||||
* - videos
|
||||
*/
|
||||
func Search(query string, min_results int) (trove TweetTrove, err error) {
|
||||
return the_api.GetPaginatedQuery(PaginatedSearch{query}, min_results)
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user