Recognized deleted users when scraping, create a user with a fake ID
This commit is contained in:
parent
728aaf251f
commit
52370a4f9d
@ -197,10 +197,11 @@ test $(find link_preview_images | wc -l) = $initial_link_preview_images_count #
|
|||||||
|
|
||||||
|
|
||||||
# Test a tweet thread with tombstones
|
# Test a tweet thread with tombstones
|
||||||
tw fetch_tweet https://twitter.com/CovfefeAnon/status/1454526270809726977
|
# tw fetch_tweet https://twitter.com/CovfefeAnon/status/1454526270809726977
|
||||||
test $(sqlite3 twitter.db "select is_stub from tweets where id = 1454515503242829830") = 1
|
# test $(sqlite3 twitter.db "select is_stub from tweets where id = 1454515503242829830") = 1
|
||||||
test $(sqlite3 twitter.db "select is_stub from tweets where id = 1454521424144654344") = 0
|
# test $(sqlite3 twitter.db "select is_stub from tweets where id = 1454521424144654344") = 0 # TODO this guy got banned
|
||||||
test $(sqlite3 twitter.db "select is_stub from tweets where id = 1454522147750260742") = 1
|
# test $(sqlite3 twitter.db "select is_stub from tweets where id = 1454522147750260742") = 1
|
||||||
|
|
||||||
|
|
||||||
# Test search
|
# Test search
|
||||||
tw search "from:michaelmalice constitution"
|
tw search "from:michaelmalice constitution"
|
||||||
|
@ -244,6 +244,7 @@ type APIUser struct {
|
|||||||
StatusesCount int `json:"statuses_count"`
|
StatusesCount int `json:"statuses_count"`
|
||||||
Verified bool `json:"verified"`
|
Verified bool `json:"verified"`
|
||||||
IsBanned bool
|
IsBanned bool
|
||||||
|
DoesntExist bool
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -256,6 +257,7 @@ type UserResponse struct {
|
|||||||
} `json:"data"`
|
} `json:"data"`
|
||||||
Errors []struct {
|
Errors []struct {
|
||||||
Message string `json:"message"`
|
Message string `json:"message"`
|
||||||
|
Name string `json:"name"`
|
||||||
Code int `json:"code"`
|
Code int `json:"code"`
|
||||||
} `json:"errors"`
|
} `json:"errors"`
|
||||||
}
|
}
|
||||||
@ -267,6 +269,8 @@ func (u UserResponse) ConvertToAPIUser() APIUser {
|
|||||||
for _, api_error := range u.Errors {
|
for _, api_error := range u.Errors {
|
||||||
if api_error.Message == "Authorization: User has been suspended. (63)" {
|
if api_error.Message == "Authorization: User has been suspended. (63)" {
|
||||||
ret.IsBanned = true
|
ret.IsBanned = true
|
||||||
|
} else if api_error.Name == "NotFoundError" {
|
||||||
|
ret.DoesntExist = true
|
||||||
} else {
|
} else {
|
||||||
panic(fmt.Sprintf("Unknown api error: %q", api_error.Message))
|
panic(fmt.Sprintf("Unknown api error: %q", api_error.Message))
|
||||||
}
|
}
|
||||||
|
1
scraper/test_responses/deleted_user.json
Normal file
1
scraper/test_responses/deleted_user.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
{"errors":[{"message":"User 'GregCunningham0' not found","locations":[{"line":126,"column":3}],"path":["user"],"extensions":{"name":"NotFoundError","source":"Server","code":50,"kind":"NonFatal","tracing":{"trace_id":"2c6f690015d9f18e"}},"code":50,"kind":"NonFatal","name":"NotFoundError","source":"Server","tracing":{"trace_id":"2c6f690015d9f18e"}}],"data":{}}
|
@ -87,7 +87,14 @@ func (trove *TweetTrove) FetchTombstoneUsers() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if user.ID == 0 {
|
if user.ID == 0 {
|
||||||
panic(fmt.Sprintf("UserID == 0 (@%s)", handle))
|
// Find some random ID to fit it into the trove
|
||||||
|
for i := 1; ; i++ {
|
||||||
|
_, ok := trove.Users[UserID(i)]
|
||||||
|
if !ok {
|
||||||
|
user.ID = UserID(i)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
trove.Users[user.ID] = user
|
trove.Users[user.ID] = user
|
||||||
@ -108,7 +115,7 @@ func (trove *TweetTrove) FetchTombstoneUsers() {
|
|||||||
func (trove *TweetTrove) FillMissingUserIDs() {
|
func (trove *TweetTrove) FillMissingUserIDs() {
|
||||||
for i := range trove.Tweets {
|
for i := range trove.Tweets {
|
||||||
tweet := trove.Tweets[i]
|
tweet := trove.Tweets[i]
|
||||||
if tweet.UserID != 0 {
|
if tweet.UserHandle == "" {
|
||||||
// No need to fill this tweet's user_id, it's already filled
|
// No need to fill this tweet's user_id, it's already filled
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
@ -115,10 +115,6 @@ func GetUnknownUserWithHandle(handle UserHandle) User {
|
|||||||
JoinDate: time.Unix(0, 0),
|
JoinDate: time.Unix(0, 0),
|
||||||
IsVerified: false,
|
IsVerified: false,
|
||||||
IsPrivate: true,
|
IsPrivate: true,
|
||||||
ProfileImageUrl: DEFAULT_PROFILE_IMAGE_URL,
|
|
||||||
ProfileImageLocalPath: path.Base(DEFAULT_PROFILE_IMAGE_URL),
|
|
||||||
BannerImageUrl: "",
|
|
||||||
BannerImageLocalPath: "",
|
|
||||||
IsNeedingFakeID: true,
|
IsNeedingFakeID: true,
|
||||||
IsIdFake: true,
|
IsIdFake: true,
|
||||||
}
|
}
|
||||||
@ -126,6 +122,14 @@ func GetUnknownUserWithHandle(handle UserHandle) User {
|
|||||||
|
|
||||||
// Turn an APIUser, as returned from the scraper, into a properly structured User object
|
// Turn an APIUser, as returned from the scraper, into a properly structured User object
|
||||||
func ParseSingleUser(apiUser APIUser) (ret User, err error) {
|
func ParseSingleUser(apiUser APIUser) (ret User, err error) {
|
||||||
|
if apiUser.DoesntExist {
|
||||||
|
// User may have been deleted, or there was a typo. There's no data to parse
|
||||||
|
if apiUser.ScreenName == "" {
|
||||||
|
panic("ScreenName is empty!")
|
||||||
|
}
|
||||||
|
ret = GetUnknownUserWithHandle(UserHandle(apiUser.ScreenName))
|
||||||
|
return
|
||||||
|
}
|
||||||
ret.ID = UserID(apiUser.ID)
|
ret.ID = UserID(apiUser.ID)
|
||||||
ret.Handle = UserHandle(apiUser.ScreenName)
|
ret.Handle = UserHandle(apiUser.ScreenName)
|
||||||
if apiUser.IsBanned {
|
if apiUser.IsBanned {
|
||||||
|
@ -73,6 +73,37 @@ func TestParseBannedUser(t *testing.T) {
|
|||||||
assert.Equal("default_profile.png", user.GetTinyProfileImageLocalPath())
|
assert.Equal("default_profile.png", user.GetTinyProfileImageLocalPath())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Should correctly parse a deleted user
|
||||||
|
*/
|
||||||
|
func TestParseDeletedUser(t *testing.T) {
|
||||||
|
assert := assert.New(t)
|
||||||
|
data, err := ioutil.ReadFile("test_responses/deleted_user.json")
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
var user_resp UserResponse
|
||||||
|
err = json.Unmarshal(data, &user_resp)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
handle := "Some Random Deleted User"
|
||||||
|
|
||||||
|
apiUser := user_resp.ConvertToAPIUser()
|
||||||
|
apiUser.ScreenName = string(handle) // This is done in scraper.GetUser, since users are retrieved by handle anyway
|
||||||
|
|
||||||
|
user, err := ParseSingleUser(apiUser)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(UserID(0), user.ID)
|
||||||
|
assert.True(user.IsIdFake)
|
||||||
|
assert.True(user.IsNeedingFakeID)
|
||||||
|
assert.Equal(user.Bio, "<blank>")
|
||||||
|
assert.Equal(user.Handle, UserHandle(handle))
|
||||||
|
|
||||||
|
// Test generation of profile images for deleted user
|
||||||
|
assert.Equal("https://abs.twimg.com/sticky/default_profile_images/default_profile.png", user.GetTinyProfileImageUrl())
|
||||||
|
assert.Equal("default_profile.png", user.GetTinyProfileImageLocalPath())
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Should extract a user handle from a tweet URL, or fail if URL is invalid
|
* Should extract a user handle from a tweet URL, or fail if URL is invalid
|
||||||
*/
|
*/
|
||||||
|
Loading…
x
Reference in New Issue
Block a user