Add --delay flag to force a delay between requests in a large paginated scrape

This commit is contained in:
Alessio 2024-08-19 18:20:12 -07:00
parent 9c8c7cdf65
commit c45b8e7ad8
3 changed files with 12 additions and 0 deletions

View File

@ -8,6 +8,7 @@ import (
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
"golang.org/x/term" "golang.org/x/term"
"io/fs" "io/fs"
"time"
"os" "os"
"strconv" "strconv"
"strings" "strings"
@ -40,6 +41,8 @@ func main() {
how_many := flag.Int("n", 50, "") how_many := flag.Int("n", 50, "")
flag.IntVar(how_many, "number", 50, "") flag.IntVar(how_many, "number", 50, "")
delay := flag.String("delay", "0ms", "")
var default_log_level string var default_log_level string
if version_string == "" { if version_string == "" {
default_log_level = "debug" default_log_level = "debug"
@ -137,6 +140,10 @@ func main() {
log.Warnf("Unable to initialize guest session! Might be a network issue") log.Warnf("Unable to initialize guest session! Might be a network issue")
} // Don't exit here, some operations don't require a connection } // Don't exit here, some operations don't require a connection
} }
api.Delay, err = time.ParseDuration(*delay)
if err != nil {
die(fmt.Sprintf("Invalid delay: %q", *delay), false, 1)
}
switch operation { switch operation {
case "login": case "login":

View File

@ -22,6 +22,7 @@ type API struct {
GuestToken string GuestToken string
Client http.Client Client http.Client
CSRFToken string CSRFToken string
Delay time.Duration
} }
type api_outstruct struct { type api_outstruct struct {

View File

@ -898,6 +898,10 @@ type PaginatedQuery interface {
func (api *API) GetMore(pq PaginatedQuery, response *APIV2Response, count int) error { func (api *API) GetMore(pq PaginatedQuery, response *APIV2Response, count int) error {
last_response := response last_response := response
for last_response.GetCursorBottom() != "" && len(response.GetMainInstruction().Entries) < count { for last_response.GetCursorBottom() != "" && len(response.GetMainInstruction().Entries) < count {
if api.Delay != 0 {
fmt.Printf("Pausing for %s...", api.Delay)
time.Sleep(api.Delay) // Slow down the requests, if applicable
}
fresh_response, err := pq.NextPage(api, last_response.GetCursorBottom()) fresh_response, err := pq.NextPage(api, last_response.GetCursorBottom())
if err != nil { if err != nil {
return fmt.Errorf("error getting next page for %#v: %w", pq, err) // e.g., rate limited return fmt.Errorf("error getting next page for %#v: %w", pq, err) // e.g., rate limited