Allow fetching of banned users

This commit is contained in:
Alessio 2022-01-07 13:40:22 -05:00
parent fb174df363
commit 6eac56183d
4 changed files with 59 additions and 35 deletions

View File

@ -207,6 +207,11 @@ test "$(sqlite3 twitter.db "select choice1, choice2, choice3, choice4 from polls
test "$(sqlite3 twitter.db "select choice1_votes, choice2_votes, choice3_votes, choice4_votes from polls where tweet_id = 1465534109573390348")" = "1593|624|778|1138"
# Test fetching a banned user
test $(sqlite3 twitter.db "select is_content_downloaded from users where handle='kanesays23'") = "0"
tw fetch_user kanesays23
test "$(sqlite3 twitter.db "select is_content_downloaded, is_banned from users where handle='kanesays23'")" = "1|1"
# TODO: Maybe this file should be broken up into multiple test scripts
echo -e "\033[32mAll tests passed. Finished successfully.\033[0m"

View File

@ -147,8 +147,18 @@ func (p Profile) DownloadUserContentFor(u *scraper.User) error {
* Enable injecting a custom MediaDownloader (i.e., for testing)
*/
func (p Profile) DownloadUserContentWithInjector(u *scraper.User, downloader MediaDownloader) error {
outfile := path.Join(p.ProfileDir, "profile_images", u.ProfileImageLocalPath)
err := downloader.Curl(u.ProfileImageUrl, outfile)
var outfile string
var target_url string
if u.ProfileImageUrl == "" {
outfile = path.Join(p.ProfileDir, "profile_images", path.Base(scraper.DEFAULT_PROFILE_IMAGE_URL))
target_url = scraper.DEFAULT_PROFILE_IMAGE_URL
} else {
outfile = path.Join(p.ProfileDir, "profile_images", u.ProfileImageLocalPath)
target_url = u.ProfileImageUrl
}
err := downloader.Curl(target_url, outfile)
if err != nil {
return err
}

View File

@ -152,6 +152,8 @@ func (p Profile) GetUserByID(id scraper.UserID) (scraper.User, error) {
/**
* Returns `true` if content download is needed, `false` otherwise
*
* If the user is banned, returns false because downloading will be impossible.
*
* If:
* - the user isn't in the DB at all (first time scraping), OR
* - `is_content_downloaded` is false in the DB, OR
@ -162,6 +164,10 @@ func (p Profile) GetUserByID(id scraper.UserID) (scraper.User, error) {
* why the No Worsening Principle is needed.
*/
func (p Profile) CheckUserContentDownloadNeeded(user scraper.User) bool {
if user.IsBanned {
// Check `is_banned` on the live user, since he may have been un-banned since last scraped
return false
}
row := p.DB.QueryRow(`select is_content_downloaded, profile_image_url, banner_image_url from users where id = ?`, user.ID)
var is_content_downloaded bool

View File

@ -173,49 +173,52 @@ func UpdateQueryCursor(req *http.Request, new_cursor string, is_tweet bool) {
func (api API) GetUser(handle UserHandle) (APIUser, error) {
client := &http.Client{Timeout: 10 * time.Second}
req, err := http.NewRequest("GET", "https://api.twitter.com/graphql/4S2ihIKfF3xhp-ENxvUAfQ/UserByScreenName?variables=%7B%22screen_name%22%3A%22" + string(handle) + "%22%2C%22withHighlightedLabel%22%3Atrue%7D", nil)
if err != nil {
return APIUser{}, err
}
client := &http.Client{Timeout: 10 * time.Second}
req, err := http.NewRequest("GET", "https://api.twitter.com/graphql/4S2ihIKfF3xhp-ENxvUAfQ/UserByScreenName?variables=%7B%22screen_name%22%3A%22" + string(handle) + "%22%2C%22withHighlightedLabel%22%3Atrue%7D", nil)
if err != nil {
return APIUser{}, err
}
err = ApiRequestAddTokens(req)
if err != nil {
return APIUser{}, err
}
var response UserResponse
var response UserResponse
for retries := 0; retries < 3; retries += 1 {
resp, err := client.Do(req)
if err != nil {
return APIUser{}, err
}
defer resp.Body.Close()
resp, err := client.Do(req)
if err != nil {
return APIUser{}, err
}
defer resp.Body.Close()
// Sometimes it randomly gives 403 Forbidden. API's fault, not ours
// We check for this below
if !(resp.StatusCode == http.StatusOK || resp.StatusCode == http.StatusForbidden) {
content, _ := ioutil.ReadAll(resp.Body)
return APIUser{}, fmt.Errorf("response status %s: %s", resp.Status, content)
}
// Sometimes it randomly gives 403 Forbidden. API's fault, not ours
// We check for this below
if !(resp.StatusCode == http.StatusOK || resp.StatusCode == http.StatusForbidden) {
content, _ := ioutil.ReadAll(resp.Body)
return APIUser{}, fmt.Errorf("response status %s: %s", resp.Status, content)
}
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return APIUser{}, err
}
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return APIUser{}, err
}
err = json.Unmarshal(body, &response)
if err != nil {
return APIUser{}, err
}
err = json.Unmarshal(body, &response)
if err != nil {
return APIUser{}, err
}
if len(response.Errors) == 0 {
break
}
// Reset the response (remove the Errors)
response = UserResponse{}
}
return response.ConvertToAPIUser(), err
// Retry ONLY if the error is code 50 (random authentication failure), NOT on real errors
if len(response.Errors) == 1 && response.Errors[0].Code == 50 {
// Reset the response (remove the Errors)
response = UserResponse{}
continue
} else {
// Do not retry on real errors
break
}
}
return response.ConvertToAPIUser(), err
}
func (api API) Search(query string, cursor string) (TweetResponse, error) {