Don't throw away all previous results if you get rate-limited, just save them
This commit is contained in:
parent
b23a6a7e05
commit
a1faacaf6b
@ -2,6 +2,7 @@ package main
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
_ "embed"
|
_ "embed"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
@ -82,3 +83,11 @@ func get_default_profile() string {
|
|||||||
}
|
}
|
||||||
return filepath.Join(app_data_dir, "twitter")
|
return filepath.Join(app_data_dir, "twitter")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Returns whether this error should be treated as a failure
|
||||||
|
func is_scrape_failure(err error) bool {
|
||||||
|
if err == nil || errors.Is(err, scraper.END_OF_FEED) || errors.Is(err, scraper.ErrRateLimited) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
@ -261,7 +261,7 @@ func create_profile(target_dir string) {
|
|||||||
*/
|
*/
|
||||||
func fetch_user(handle scraper.UserHandle) {
|
func fetch_user(handle scraper.UserHandle) {
|
||||||
user, err := scraper.GetUser(handle)
|
user, err := scraper.GetUser(handle)
|
||||||
if err != nil {
|
if is_scrape_failure(err) {
|
||||||
die(err.Error(), false, -1)
|
die(err.Error(), false, -1)
|
||||||
}
|
}
|
||||||
log.Debug(user)
|
log.Debug(user)
|
||||||
@ -288,7 +288,7 @@ func fetch_tweet_only(tweet_identifier string) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
tweet, err := scraper.GetTweet(tweet_id)
|
tweet, err := scraper.GetTweet(tweet_id)
|
||||||
if err != nil {
|
if is_scrape_failure(err) {
|
||||||
die(fmt.Sprintf("Error fetching tweet: %s", err.Error()), false, -1)
|
die(fmt.Sprintf("Error fetching tweet: %s", err.Error()), false, -1)
|
||||||
}
|
}
|
||||||
log.Debug(tweet)
|
log.Debug(tweet)
|
||||||
@ -313,7 +313,7 @@ func fetch_tweet_conversation(tweet_identifier string, how_many int) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
trove, err := scraper.GetTweetFullAPIV2(tweet_id, how_many)
|
trove, err := scraper.GetTweetFullAPIV2(tweet_id, how_many)
|
||||||
if err != nil {
|
if is_scrape_failure(err) {
|
||||||
die(err.Error(), false, -1)
|
die(err.Error(), false, -1)
|
||||||
}
|
}
|
||||||
profile.SaveTweetTrove(trove, true)
|
profile.SaveTweetTrove(trove, true)
|
||||||
@ -329,12 +329,12 @@ func fetch_tweet_conversation(tweet_identifier string, how_many int) {
|
|||||||
*/
|
*/
|
||||||
func fetch_user_feed(handle string, how_many int) {
|
func fetch_user_feed(handle string, how_many int) {
|
||||||
user, err := profile.GetUserByHandle(scraper.UserHandle(handle))
|
user, err := profile.GetUserByHandle(scraper.UserHandle(handle))
|
||||||
if err != nil {
|
if is_scrape_failure(err) {
|
||||||
die(fmt.Sprintf("Error getting user: %s\n %s", handle, err.Error()), false, -1)
|
die(fmt.Sprintf("Error getting user: %s\n %s", handle, err.Error()), false, -1)
|
||||||
}
|
}
|
||||||
|
|
||||||
trove, err := scraper.GetUserFeedGraphqlFor(user.ID, how_many)
|
trove, err := scraper.GetUserFeedGraphqlFor(user.ID, how_many)
|
||||||
if err != nil {
|
if is_scrape_failure(err) {
|
||||||
die(fmt.Sprintf("Error scraping feed: %s\n %s", handle, err.Error()), false, -2)
|
die(fmt.Sprintf("Error scraping feed: %s\n %s", handle, err.Error()), false, -2)
|
||||||
}
|
}
|
||||||
profile.SaveTweetTrove(trove, true)
|
profile.SaveTweetTrove(trove, true)
|
||||||
@ -349,7 +349,7 @@ func get_user_likes(handle string, how_many int) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
trove, err := scraper.GetUserLikes(user.ID, how_many)
|
trove, err := scraper.GetUserLikes(user.ID, how_many)
|
||||||
if err != nil {
|
if is_scrape_failure(err) {
|
||||||
die(fmt.Sprintf("Error scraping feed: %s\n %s", handle, err.Error()), false, -2)
|
die(fmt.Sprintf("Error scraping feed: %s\n %s", handle, err.Error()), false, -2)
|
||||||
}
|
}
|
||||||
profile.SaveTweetTrove(trove, true)
|
profile.SaveTweetTrove(trove, true)
|
||||||
@ -364,7 +364,7 @@ func get_followees(handle string, how_many int) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
trove, err := scraper.GetFollowees(user.ID, how_many)
|
trove, err := scraper.GetFollowees(user.ID, how_many)
|
||||||
if err != nil {
|
if is_scrape_failure(err) {
|
||||||
die(fmt.Sprintf("Error getting followees: %s\n %s", handle, err.Error()), false, -2)
|
die(fmt.Sprintf("Error getting followees: %s\n %s", handle, err.Error()), false, -2)
|
||||||
}
|
}
|
||||||
profile.SaveTweetTrove(trove, true)
|
profile.SaveTweetTrove(trove, true)
|
||||||
@ -378,7 +378,7 @@ func get_followers(handle string, how_many int) {
|
|||||||
die(fmt.Sprintf("Error getting user: %s\n %s", handle, err.Error()), false, -1)
|
die(fmt.Sprintf("Error getting user: %s\n %s", handle, err.Error()), false, -1)
|
||||||
}
|
}
|
||||||
trove, err := scraper.GetFollowers(user.ID, how_many)
|
trove, err := scraper.GetFollowers(user.ID, how_many)
|
||||||
if err != nil {
|
if is_scrape_failure(err) {
|
||||||
die(fmt.Sprintf("Error getting followees: %s\n %s", handle, err.Error()), false, -2)
|
die(fmt.Sprintf("Error getting followees: %s\n %s", handle, err.Error()), false, -2)
|
||||||
}
|
}
|
||||||
profile.SaveTweetTrove(trove, true)
|
profile.SaveTweetTrove(trove, true)
|
||||||
@ -388,7 +388,7 @@ func get_followers(handle string, how_many int) {
|
|||||||
}
|
}
|
||||||
func get_bookmarks(how_many int) {
|
func get_bookmarks(how_many int) {
|
||||||
trove, err := scraper.GetBookmarks(how_many)
|
trove, err := scraper.GetBookmarks(how_many)
|
||||||
if err != nil {
|
if is_scrape_failure(err) {
|
||||||
die(fmt.Sprintf("Error scraping bookmarks:\n %s", err.Error()), false, -2)
|
die(fmt.Sprintf("Error scraping bookmarks:\n %s", err.Error()), false, -2)
|
||||||
}
|
}
|
||||||
profile.SaveTweetTrove(trove, true)
|
profile.SaveTweetTrove(trove, true)
|
||||||
@ -400,7 +400,7 @@ func get_bookmarks(how_many int) {
|
|||||||
}
|
}
|
||||||
func fetch_timeline(is_following_only bool) {
|
func fetch_timeline(is_following_only bool) {
|
||||||
trove, err := scraper.GetHomeTimeline("", is_following_only)
|
trove, err := scraper.GetHomeTimeline("", is_following_only)
|
||||||
if err != nil {
|
if is_scrape_failure(err) {
|
||||||
die(fmt.Sprintf("Error fetching timeline:\n %s", err.Error()), false, -2)
|
die(fmt.Sprintf("Error fetching timeline:\n %s", err.Error()), false, -2)
|
||||||
}
|
}
|
||||||
profile.SaveTweetTrove(trove, true)
|
profile.SaveTweetTrove(trove, true)
|
||||||
@ -437,7 +437,7 @@ func download_user_content(handle scraper.UserHandle) {
|
|||||||
|
|
||||||
func search(query string, how_many int) {
|
func search(query string, how_many int) {
|
||||||
trove, err := scraper.Search(query, how_many)
|
trove, err := scraper.Search(query, how_many)
|
||||||
if err != nil {
|
if is_scrape_failure(err) {
|
||||||
die(fmt.Sprintf("Error scraping search results: %s", err.Error()), false, -100)
|
die(fmt.Sprintf("Error scraping search results: %s", err.Error()), false, -100)
|
||||||
}
|
}
|
||||||
profile.SaveTweetTrove(trove, true)
|
profile.SaveTweetTrove(trove, true)
|
||||||
@ -506,7 +506,7 @@ func fetch_inbox(how_many int) {
|
|||||||
|
|
||||||
func fetch_dm(id string, how_many int) {
|
func fetch_dm(id string, how_many int) {
|
||||||
room, err := profile.GetChatRoom(scraper.DMChatRoomID(id))
|
room, err := profile.GetChatRoom(scraper.DMChatRoomID(id))
|
||||||
if err != nil {
|
if is_scrape_failure(err) {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
max_id := scraper.DMMessageID(^uint(0) >> 1)
|
max_id := scraper.DMMessageID(^uint(0) >> 1)
|
||||||
|
@ -50,10 +50,10 @@ func (app *Application) ensure_tweet(id scraper.TweetID, is_forced bool, is_conv
|
|||||||
|
|
||||||
if is_needing_scrape && !app.IsScrapingDisabled {
|
if is_needing_scrape && !app.IsScrapingDisabled {
|
||||||
trove, err := scraper.GetTweetFullAPIV2(id, 50) // TODO: parameterizable
|
trove, err := scraper.GetTweetFullAPIV2(id, 50) // TODO: parameterizable
|
||||||
if err == nil {
|
if err == nil || errors.Is(err, scraper.END_OF_FEED) || errors.Is(err, scraper.ErrRateLimited) {
|
||||||
app.Profile.SaveTweetTrove(trove, false)
|
app.Profile.SaveTweetTrove(trove, false)
|
||||||
go app.Profile.SaveTweetTrove(trove, true) // Download the content in the background
|
go app.Profile.SaveTweetTrove(trove, true) // Download the content in the background
|
||||||
is_available = true
|
_, is_available = trove.Tweets[id]
|
||||||
} else {
|
} else {
|
||||||
app.ErrorLog.Print(err)
|
app.ErrorLog.Print(err)
|
||||||
// TODO: show error in UI
|
// TODO: show error in UI
|
||||||
|
@ -895,7 +895,7 @@ func (api *API) GetMore(pq PaginatedQuery, response *APIV2Response, count int) e
|
|||||||
for last_response.GetCursorBottom() != "" && len(response.GetMainInstruction().Entries) < count {
|
for last_response.GetCursorBottom() != "" && len(response.GetMainInstruction().Entries) < count {
|
||||||
fresh_response, err := pq.NextPage(api, last_response.GetCursorBottom())
|
fresh_response, err := pq.NextPage(api, last_response.GetCursorBottom())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("error getting next page for %#v: %w", pq, err)
|
return fmt.Errorf("error getting next page for %#v: %w", pq, err) // e.g., rate limited
|
||||||
}
|
}
|
||||||
|
|
||||||
if fresh_response.GetCursorBottom() == last_response.GetCursorBottom() && len(fresh_response.GetMainInstruction().Entries) == 0 {
|
if fresh_response.GetCursorBottom() == last_response.GetCursorBottom() && len(fresh_response.GetMainInstruction().Entries) == 0 {
|
||||||
@ -925,25 +925,31 @@ func (api *API) GetPaginatedQuery(pq PaginatedQuery, count int) (TweetTrove, err
|
|||||||
fmt.Printf("Paginating %d count\n", count)
|
fmt.Printf("Paginating %d count\n", count)
|
||||||
api_response, err := pq.NextPage(api, "")
|
api_response, err := pq.NextPage(api, "")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
// End of feed on the first call constitutes an empty result, so returning empty is OK
|
||||||
return TweetTrove{}, fmt.Errorf("Error calling API to fetch query %#v:\n %w", pq, err)
|
return TweetTrove{}, fmt.Errorf("Error calling API to fetch query %#v:\n %w", pq, err)
|
||||||
}
|
}
|
||||||
if len(api_response.GetMainInstruction().Entries) < count && api_response.GetCursorBottom() != "" {
|
if len(api_response.GetMainInstruction().Entries) < count && api_response.GetCursorBottom() != "" {
|
||||||
err = api.GetMore(pq, &api_response, count)
|
err = api.GetMore(pq, &api_response, count)
|
||||||
if errors.Is(err, END_OF_FEED) {
|
if errors.Is(err, END_OF_FEED) {
|
||||||
println("End of feed!")
|
log.Infof("End of feed!")
|
||||||
|
} else if errors.Is(err, ErrRateLimited) {
|
||||||
|
log.Errorf("Rate limited!")
|
||||||
} else if err != nil {
|
} else if err != nil {
|
||||||
return TweetTrove{}, err
|
return TweetTrove{}, err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
trove, err := pq.ToTweetTrove(api_response)
|
trove, err2 := pq.ToTweetTrove(api_response)
|
||||||
if err != nil {
|
if err2 != nil {
|
||||||
return TweetTrove{}, fmt.Errorf("Error parsing the tweet trove for query %#v:\n %w", pq, err)
|
return TweetTrove{}, fmt.Errorf("Error parsing the tweet trove for query %#v:\n %w", pq, err2)
|
||||||
}
|
}
|
||||||
|
|
||||||
fmt.Println("------------")
|
fmt.Println("------------")
|
||||||
err = trove.PostProcess()
|
err2 = trove.PostProcess()
|
||||||
return trove, err
|
if err2 != nil {
|
||||||
|
return TweetTrove{}, fmt.Errorf("failed to post-process tweet trove: %w", err2)
|
||||||
|
}
|
||||||
|
return trove, err // `err` will be either nil, END_OF_FEED, or ErrRateLimited
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get a User feed using the new GraphQL twitter api
|
// Get a User feed using the new GraphQL twitter api
|
||||||
@ -987,7 +993,6 @@ func (api *API) GetGraphqlFeedFor(user_id UserID, cursor string) (APIV2Response,
|
|||||||
|
|
||||||
var response APIV2Response
|
var response APIV2Response
|
||||||
err = api.do_http(url.String(), cursor, &response)
|
err = api.do_http(url.String(), cursor, &response)
|
||||||
|
|
||||||
return response, err
|
return response, err
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1107,10 +1112,7 @@ func (api *API) GetUserLikes(user_id UserID, cursor string) (APIV2Response, erro
|
|||||||
|
|
||||||
var response APIV2Response
|
var response APIV2Response
|
||||||
err = api.do_http(url.String(), cursor, &response)
|
err = api.do_http(url.String(), cursor, &response)
|
||||||
if err != nil {
|
return response, err
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
return response, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type PaginatedUserLikes struct {
|
type PaginatedUserLikes struct {
|
||||||
@ -1176,10 +1178,7 @@ func (api *API) GetBookmarks(cursor string) (APIV2Response, error) {
|
|||||||
|
|
||||||
var response APIV2Response
|
var response APIV2Response
|
||||||
err = api.do_http(url.String(), cursor, &response)
|
err = api.do_http(url.String(), cursor, &response)
|
||||||
if err != nil {
|
return response, err
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
return response, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type PaginatedBookmarks struct {
|
type PaginatedBookmarks struct {
|
||||||
@ -1259,12 +1258,9 @@ func (api *API) GetHomeTimeline(cursor string, is_following_only bool) (TweetTro
|
|||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
err = api.do_http_POST(url, string(body_bytes), &response)
|
err = api.do_http_POST(url, string(body_bytes), &response)
|
||||||
if err != nil {
|
trove, err2 := response.ToTweetTrove()
|
||||||
panic(err)
|
if err2 != nil {
|
||||||
}
|
return TweetTrove{}, err2
|
||||||
trove, err := response.ToTweetTrove()
|
|
||||||
if err != nil {
|
|
||||||
return TweetTrove{}, err
|
|
||||||
}
|
}
|
||||||
return trove, err
|
return trove, err
|
||||||
}
|
}
|
||||||
@ -1312,11 +1308,7 @@ func (api API) GetUser(handle UserHandle) (APIUser, error) {
|
|||||||
|
|
||||||
var response UserResponse
|
var response UserResponse
|
||||||
err = api.do_http(url.String(), "", &response)
|
err = api.do_http(url.String(), "", &response)
|
||||||
if err != nil {
|
return response.ConvertToAPIUser(), err
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return response.ConvertToAPIUser(), nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (api *API) Search(query string, cursor string) (APIV2Response, error) {
|
func (api *API) Search(query string, cursor string) (APIV2Response, error) {
|
||||||
@ -1373,3 +1365,13 @@ func (p PaginatedSearch) NextPage(api *API, cursor string) (APIV2Response, error
|
|||||||
func (p PaginatedSearch) ToTweetTrove(r APIV2Response) (TweetTrove, error) {
|
func (p PaginatedSearch) ToTweetTrove(r APIV2Response) (TweetTrove, error) {
|
||||||
return r.ToTweetTrove()
|
return r.ToTweetTrove()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: Search modes:
|
||||||
|
// - regular ("top")
|
||||||
|
// - latest / "live"
|
||||||
|
// - search for users
|
||||||
|
// - photos
|
||||||
|
// - videos
|
||||||
|
func Search(query string, min_results int) (trove TweetTrove, err error) {
|
||||||
|
return the_api.GetPaginatedQuery(PaginatedSearch{query}, min_results)
|
||||||
|
}
|
||||||
|
@ -1,17 +0,0 @@
|
|||||||
package scraper
|
|
||||||
|
|
||||||
func TimestampToDateString(timestamp int) string {
|
|
||||||
panic("???") // TODO
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* TODO: Search modes:
|
|
||||||
* - regular ("top")
|
|
||||||
* - latest / "live"
|
|
||||||
* - search for users
|
|
||||||
* - photos
|
|
||||||
* - videos
|
|
||||||
*/
|
|
||||||
func Search(query string, min_results int) (trove TweetTrove, err error) {
|
|
||||||
return the_api.GetPaginatedQuery(PaginatedSearch{query}, min_results)
|
|
||||||
}
|
|
Loading…
x
Reference in New Issue
Block a user