diff --git a/cmd/tests.sh b/cmd/tests.sh index cd9ea2d..5acb8f5 100755 --- a/cmd/tests.sh +++ b/cmd/tests.sh @@ -100,4 +100,9 @@ test $(sqlite3 twitter.db "select count(*) from retweets where retweeted_by != $ test $(($rts_count + $tweet_count_2 - $tweet_count_1)) -gt "50" +# Fetch a privated user +tw fetch_user HbdNrx +test $(sqlite3 twitter.db "select is_private from users where handle = 'HbdNrx'") = "1" + + echo -e "\033[32mAll tests passed. Finished successfully.\033[0m" diff --git a/cmd/twitter/helpers.go b/cmd/twitter/helpers.go index d42a302..581265c 100644 --- a/cmd/twitter/helpers.go +++ b/cmd/twitter/helpers.go @@ -20,10 +20,11 @@ const help_message = `Usage: twitter [--profile ] must not exist. will be ignored if provided. - fetch_user ( is the user handle) + - download_user_content ( is the user handle of the user whomst banner image and profile to download / back up) + - fetch_tweet_only ( is the full URL of the tweet) - get_user_tweets ( is the user handle whomst feed to get) - download_tweet_content ( is the ID of the tweet whomst contents to download / back up) - - download_user_content ( is the user handle of the user whomst banner image and profile to download / back up) : the path to the directory containing the data directories, database files, and settings files. By default, refers to the current directory. Ignored if is "create_profile". ` diff --git a/cmd/twitter/main.go b/cmd/twitter/main.go index b9f690a..5656f66 100644 --- a/cmd/twitter/main.go +++ b/cmd/twitter/main.go @@ -143,11 +143,10 @@ func fetch_user_feed(handle string) { tweets, retweets, users, err := scraper.GetUserFeedFor(user.ID, 50); if err != nil { - die(err.Error(), false, -2) + die("Error scraping feed: " + err.Error(), false, -2) } for _, u := range users { - fmt.Println(u) err = profile.SaveUser(u) if err != nil { die("Error saving tweet: " + err.Error(), false, 4) @@ -155,7 +154,6 @@ func fetch_user_feed(handle string) { } for _, t := range tweets { - fmt.Println(t) err = profile.SaveTweet(t) if err != nil { die("Error saving tweet: " + err.Error(), false, 4) @@ -163,12 +161,12 @@ func fetch_user_feed(handle string) { } for _, r := range retweets { - fmt.Println(r) err = profile.SaveRetweet(r) if err != nil { die("Error saving retweet: " + err.Error(), false, 4) } } + fmt.Printf("Saved %d tweets, %d retweets and %d users. Exiting successfully\n", len(tweets), len(retweets), len(users)) } diff --git a/scraper/api_request_utils.go b/scraper/api_request_utils.go index 70b75a9..20b7238 100644 --- a/scraper/api_request_utils.go +++ b/scraper/api_request_utils.go @@ -165,24 +165,38 @@ func (api API) GetUser(handle UserHandle) (APIUser, error) { return APIUser{}, err } - resp, err := client.Do(req) - if err != nil { - return APIUser{}, err - } - defer resp.Body.Close() - - if !(resp.StatusCode == http.StatusOK || resp.StatusCode == http.StatusForbidden) { - content, _ := ioutil.ReadAll(resp.Body) - return APIUser{}, fmt.Errorf("response status %s: %s", resp.Status, content) - } - - body, err := ioutil.ReadAll(resp.Body) - if err != nil { - return APIUser{}, err - } - var response UserResponse - err = json.Unmarshal(body, &response) + for retries := 0; retries < 3; retries += 1 { + resp, err := client.Do(req) + if err != nil { + return APIUser{}, err + } + defer resp.Body.Close() + + // Sometimes it randomly gives 403 Forbidden. API's fault, not ours + // We check for this below + if !(resp.StatusCode == http.StatusOK || resp.StatusCode == http.StatusForbidden) { + content, _ := ioutil.ReadAll(resp.Body) + return APIUser{}, fmt.Errorf("response status %s: %s", resp.Status, content) + } + + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + return APIUser{}, err + } + + err = json.Unmarshal(body, &response) + if err != nil { + return APIUser{}, err + } + + if len(response.Errors) == 0 { + break + } + + // Reset the response (remove the Errors) + response = UserResponse{} + } return response.ConvertToAPIUser(), err } diff --git a/scraper/api_types.go b/scraper/api_types.go index 757faf2..1cd57fb 100644 --- a/scraper/api_types.go +++ b/scraper/api_types.go @@ -143,6 +143,10 @@ type UserResponse struct { Legacy APIUser `json:"legacy"` } `json:"user"` } `json:"data"` + Errors []struct { + Message string `json:"message"` + Code int `json:"code",string` + } `json:"errors"` } func (u UserResponse) ConvertToAPIUser() APIUser { ret := u.Data.User.Legacy