Allow fetching of banned users

This commit is contained in:
Alessio 2022-01-07 13:40:22 -05:00
parent fb174df363
commit 6eac56183d
4 changed files with 59 additions and 35 deletions

View File

@ -207,6 +207,11 @@ test "$(sqlite3 twitter.db "select choice1, choice2, choice3, choice4 from polls
test "$(sqlite3 twitter.db "select choice1_votes, choice2_votes, choice3_votes, choice4_votes from polls where tweet_id = 1465534109573390348")" = "1593|624|778|1138" test "$(sqlite3 twitter.db "select choice1_votes, choice2_votes, choice3_votes, choice4_votes from polls where tweet_id = 1465534109573390348")" = "1593|624|778|1138"
# Test fetching a banned user
test $(sqlite3 twitter.db "select is_content_downloaded from users where handle='kanesays23'") = "0"
tw fetch_user kanesays23
test "$(sqlite3 twitter.db "select is_content_downloaded, is_banned from users where handle='kanesays23'")" = "1|1"
# TODO: Maybe this file should be broken up into multiple test scripts # TODO: Maybe this file should be broken up into multiple test scripts
echo -e "\033[32mAll tests passed. Finished successfully.\033[0m" echo -e "\033[32mAll tests passed. Finished successfully.\033[0m"

View File

@ -147,8 +147,18 @@ func (p Profile) DownloadUserContentFor(u *scraper.User) error {
* Enable injecting a custom MediaDownloader (i.e., for testing) * Enable injecting a custom MediaDownloader (i.e., for testing)
*/ */
func (p Profile) DownloadUserContentWithInjector(u *scraper.User, downloader MediaDownloader) error { func (p Profile) DownloadUserContentWithInjector(u *scraper.User, downloader MediaDownloader) error {
outfile := path.Join(p.ProfileDir, "profile_images", u.ProfileImageLocalPath) var outfile string
err := downloader.Curl(u.ProfileImageUrl, outfile) var target_url string
if u.ProfileImageUrl == "" {
outfile = path.Join(p.ProfileDir, "profile_images", path.Base(scraper.DEFAULT_PROFILE_IMAGE_URL))
target_url = scraper.DEFAULT_PROFILE_IMAGE_URL
} else {
outfile = path.Join(p.ProfileDir, "profile_images", u.ProfileImageLocalPath)
target_url = u.ProfileImageUrl
}
err := downloader.Curl(target_url, outfile)
if err != nil { if err != nil {
return err return err
} }

View File

@ -152,6 +152,8 @@ func (p Profile) GetUserByID(id scraper.UserID) (scraper.User, error) {
/** /**
* Returns `true` if content download is needed, `false` otherwise * Returns `true` if content download is needed, `false` otherwise
* *
* If the user is banned, returns false because downloading will be impossible.
*
* If: * If:
* - the user isn't in the DB at all (first time scraping), OR * - the user isn't in the DB at all (first time scraping), OR
* - `is_content_downloaded` is false in the DB, OR * - `is_content_downloaded` is false in the DB, OR
@ -162,6 +164,10 @@ func (p Profile) GetUserByID(id scraper.UserID) (scraper.User, error) {
* why the No Worsening Principle is needed. * why the No Worsening Principle is needed.
*/ */
func (p Profile) CheckUserContentDownloadNeeded(user scraper.User) bool { func (p Profile) CheckUserContentDownloadNeeded(user scraper.User) bool {
if user.IsBanned {
// Check `is_banned` on the live user, since he may have been un-banned since last scraped
return false
}
row := p.DB.QueryRow(`select is_content_downloaded, profile_image_url, banner_image_url from users where id = ?`, user.ID) row := p.DB.QueryRow(`select is_content_downloaded, profile_image_url, banner_image_url from users where id = ?`, user.ID)
var is_content_downloaded bool var is_content_downloaded bool

View File

@ -173,49 +173,52 @@ func UpdateQueryCursor(req *http.Request, new_cursor string, is_tweet bool) {
func (api API) GetUser(handle UserHandle) (APIUser, error) { func (api API) GetUser(handle UserHandle) (APIUser, error) {
client := &http.Client{Timeout: 10 * time.Second} client := &http.Client{Timeout: 10 * time.Second}
req, err := http.NewRequest("GET", "https://api.twitter.com/graphql/4S2ihIKfF3xhp-ENxvUAfQ/UserByScreenName?variables=%7B%22screen_name%22%3A%22" + string(handle) + "%22%2C%22withHighlightedLabel%22%3Atrue%7D", nil) req, err := http.NewRequest("GET", "https://api.twitter.com/graphql/4S2ihIKfF3xhp-ENxvUAfQ/UserByScreenName?variables=%7B%22screen_name%22%3A%22" + string(handle) + "%22%2C%22withHighlightedLabel%22%3Atrue%7D", nil)
if err != nil { if err != nil {
return APIUser{}, err return APIUser{}, err
} }
err = ApiRequestAddTokens(req) err = ApiRequestAddTokens(req)
if err != nil { if err != nil {
return APIUser{}, err return APIUser{}, err
} }
var response UserResponse var response UserResponse
for retries := 0; retries < 3; retries += 1 { for retries := 0; retries < 3; retries += 1 {
resp, err := client.Do(req) resp, err := client.Do(req)
if err != nil { if err != nil {
return APIUser{}, err return APIUser{}, err
} }
defer resp.Body.Close() defer resp.Body.Close()
// Sometimes it randomly gives 403 Forbidden. API's fault, not ours // Sometimes it randomly gives 403 Forbidden. API's fault, not ours
// We check for this below // We check for this below
if !(resp.StatusCode == http.StatusOK || resp.StatusCode == http.StatusForbidden) { if !(resp.StatusCode == http.StatusOK || resp.StatusCode == http.StatusForbidden) {
content, _ := ioutil.ReadAll(resp.Body) content, _ := ioutil.ReadAll(resp.Body)
return APIUser{}, fmt.Errorf("response status %s: %s", resp.Status, content) return APIUser{}, fmt.Errorf("response status %s: %s", resp.Status, content)
} }
body, err := ioutil.ReadAll(resp.Body) body, err := ioutil.ReadAll(resp.Body)
if err != nil { if err != nil {
return APIUser{}, err return APIUser{}, err
} }
err = json.Unmarshal(body, &response) err = json.Unmarshal(body, &response)
if err != nil { if err != nil {
return APIUser{}, err return APIUser{}, err
} }
if len(response.Errors) == 0 { // Retry ONLY if the error is code 50 (random authentication failure), NOT on real errors
break if len(response.Errors) == 1 && response.Errors[0].Code == 50 {
} // Reset the response (remove the Errors)
response = UserResponse{}
// Reset the response (remove the Errors) continue
response = UserResponse{} } else {
} // Do not retry on real errors
return response.ConvertToAPIUser(), err break
}
}
return response.ConvertToAPIUser(), err
} }
func (api API) Search(query string, cursor string) (TweetResponse, error) { func (api API) Search(query string, cursor string) (TweetResponse, error) {