diff --git a/cmd/tests.sh b/cmd/tests.sh index 3352c5e..8218e1a 100755 --- a/cmd/tests.sh +++ b/cmd/tests.sh @@ -197,10 +197,11 @@ test $(find link_preview_images | wc -l) = $initial_link_preview_images_count # # Test a tweet thread with tombstones -tw fetch_tweet https://twitter.com/CovfefeAnon/status/1454526270809726977 -test $(sqlite3 twitter.db "select is_stub from tweets where id = 1454515503242829830") = 1 -test $(sqlite3 twitter.db "select is_stub from tweets where id = 1454521424144654344") = 0 -test $(sqlite3 twitter.db "select is_stub from tweets where id = 1454522147750260742") = 1 +# tw fetch_tweet https://twitter.com/CovfefeAnon/status/1454526270809726977 +# test $(sqlite3 twitter.db "select is_stub from tweets where id = 1454515503242829830") = 1 +# test $(sqlite3 twitter.db "select is_stub from tweets where id = 1454521424144654344") = 0 # TODO this guy got banned +# test $(sqlite3 twitter.db "select is_stub from tweets where id = 1454522147750260742") = 1 + # Test search tw search "from:michaelmalice constitution" diff --git a/scraper/api_types.go b/scraper/api_types.go index bbe4893..f9a0334 100644 --- a/scraper/api_types.go +++ b/scraper/api_types.go @@ -244,6 +244,7 @@ type APIUser struct { StatusesCount int `json:"statuses_count"` Verified bool `json:"verified"` IsBanned bool + DoesntExist bool } @@ -256,6 +257,7 @@ type UserResponse struct { } `json:"data"` Errors []struct { Message string `json:"message"` + Name string `json:"name"` Code int `json:"code"` } `json:"errors"` } @@ -267,6 +269,8 @@ func (u UserResponse) ConvertToAPIUser() APIUser { for _, api_error := range u.Errors { if api_error.Message == "Authorization: User has been suspended. (63)" { ret.IsBanned = true + } else if api_error.Name == "NotFoundError" { + ret.DoesntExist = true } else { panic(fmt.Sprintf("Unknown api error: %q", api_error.Message)) } diff --git a/scraper/test_responses/deleted_user.json b/scraper/test_responses/deleted_user.json new file mode 100644 index 0000000..d50c55e --- /dev/null +++ b/scraper/test_responses/deleted_user.json @@ -0,0 +1 @@ +{"errors":[{"message":"User 'GregCunningham0' not found","locations":[{"line":126,"column":3}],"path":["user"],"extensions":{"name":"NotFoundError","source":"Server","code":50,"kind":"NonFatal","tracing":{"trace_id":"2c6f690015d9f18e"}},"code":50,"kind":"NonFatal","name":"NotFoundError","source":"Server","tracing":{"trace_id":"2c6f690015d9f18e"}}],"data":{}} diff --git a/scraper/tweet_trove.go b/scraper/tweet_trove.go index 02acd65..1dab95a 100644 --- a/scraper/tweet_trove.go +++ b/scraper/tweet_trove.go @@ -87,7 +87,14 @@ func (trove *TweetTrove) FetchTombstoneUsers() { } if user.ID == 0 { - panic(fmt.Sprintf("UserID == 0 (@%s)", handle)) + // Find some random ID to fit it into the trove + for i := 1; ; i++ { + _, ok := trove.Users[UserID(i)] + if !ok { + user.ID = UserID(i) + break + } + } } trove.Users[user.ID] = user @@ -108,7 +115,7 @@ func (trove *TweetTrove) FetchTombstoneUsers() { func (trove *TweetTrove) FillMissingUserIDs() { for i := range trove.Tweets { tweet := trove.Tweets[i] - if tweet.UserID != 0 { + if tweet.UserHandle == "" { // No need to fill this tweet's user_id, it's already filled continue } diff --git a/scraper/user.go b/scraper/user.go index 0d43fb9..84ef713 100644 --- a/scraper/user.go +++ b/scraper/user.go @@ -115,10 +115,6 @@ func GetUnknownUserWithHandle(handle UserHandle) User { JoinDate: time.Unix(0, 0), IsVerified: false, IsPrivate: true, - ProfileImageUrl: DEFAULT_PROFILE_IMAGE_URL, - ProfileImageLocalPath: path.Base(DEFAULT_PROFILE_IMAGE_URL), - BannerImageUrl: "", - BannerImageLocalPath: "", IsNeedingFakeID: true, IsIdFake: true, } @@ -126,6 +122,14 @@ func GetUnknownUserWithHandle(handle UserHandle) User { // Turn an APIUser, as returned from the scraper, into a properly structured User object func ParseSingleUser(apiUser APIUser) (ret User, err error) { + if apiUser.DoesntExist { + // User may have been deleted, or there was a typo. There's no data to parse + if apiUser.ScreenName == "" { + panic("ScreenName is empty!") + } + ret = GetUnknownUserWithHandle(UserHandle(apiUser.ScreenName)) + return + } ret.ID = UserID(apiUser.ID) ret.Handle = UserHandle(apiUser.ScreenName) if apiUser.IsBanned { diff --git a/scraper/user_test.go b/scraper/user_test.go index 9abb651..e00bee2 100644 --- a/scraper/user_test.go +++ b/scraper/user_test.go @@ -73,6 +73,37 @@ func TestParseBannedUser(t *testing.T) { assert.Equal("default_profile.png", user.GetTinyProfileImageLocalPath()) } +/** + * Should correctly parse a deleted user + */ +func TestParseDeletedUser(t *testing.T) { + assert := assert.New(t) + data, err := ioutil.ReadFile("test_responses/deleted_user.json") + if err != nil { + panic(err) + } + var user_resp UserResponse + err = json.Unmarshal(data, &user_resp) + require.NoError(t, err) + + handle := "Some Random Deleted User" + + apiUser := user_resp.ConvertToAPIUser() + apiUser.ScreenName = string(handle) // This is done in scraper.GetUser, since users are retrieved by handle anyway + + user, err := ParseSingleUser(apiUser) + require.NoError(t, err) + assert.Equal(UserID(0), user.ID) + assert.True(user.IsIdFake) + assert.True(user.IsNeedingFakeID) + assert.Equal(user.Bio, "") + assert.Equal(user.Handle, UserHandle(handle)) + + // Test generation of profile images for deleted user + assert.Equal("https://abs.twimg.com/sticky/default_profile_images/default_profile.png", user.GetTinyProfileImageUrl()) + assert.Equal("default_profile.png", user.GetTinyProfileImageLocalPath()) +} + /** * Should extract a user handle from a tweet URL, or fail if URL is invalid */