From 32531a3bd95aab094bf4c917839590cf91216e95 Mon Sep 17 00:00:00 2001 From: Alessio Date: Mon, 23 Dec 2024 18:41:10 -0800 Subject: [PATCH] Move User Detail query, structures, parsing and tests to new 'api_types_user' file --- pkg/scraper/api_types.go | 60 +----- pkg/scraper/api_types_notifications.go | 2 +- pkg/scraper/api_types_test.go | 58 ++++-- pkg/scraper/api_types_user.go | 179 ++++++++++++++++++ .../{user_test.go => api_types_user_test.go} | 68 ++----- pkg/scraper/api_types_v2.go | 118 ------------ 6 files changed, 242 insertions(+), 243 deletions(-) create mode 100644 pkg/scraper/api_types_user.go rename pkg/scraper/{user_test.go => api_types_user_test.go} (61%) diff --git a/pkg/scraper/api_types.go b/pkg/scraper/api_types.go index e47b474..4ea014c 100644 --- a/pkg/scraper/api_types.go +++ b/pkg/scraper/api_types.go @@ -8,8 +8,8 @@ import ( "net/url" "path" "regexp" - "strconv" "slices" + "strconv" "strings" "time" ) @@ -48,7 +48,7 @@ type APIExtendedMedia struct { Type string `json:"type"` VideoInfo struct { Variants []Variant `json:"variants"` - Duration int `json:"duration_millis"` + Duration int `json:"duration_millis"` } `json:"video_info"` ExtMediaAvailability struct { Status string `json:"status"` @@ -674,62 +674,6 @@ type APINotification struct { } `json:"template"` } -type UserResponse struct { - Data struct { - User struct { - Result struct { - MetaTypename string `json:"__typename"` - ID int64 `json:"rest_id,string"` - Legacy APIUser `json:"legacy"` - IsBlueVerified bool `json:"is_blue_verified"` - UnavailableMessage struct { - Text string `json:"text"` - } `json:"unavailable_message"` - Reason string `json:"reason"` - } `json:"result"` - } `json:"user"` - } `json:"data"` - Errors []struct { - Message string `json:"message"` - Name string `json:"name"` - Code int `json:"code"` - } `json:"errors"` -} - -func (u UserResponse) ConvertToAPIUser() (APIUser, error) { - if u.Data.User.Result.MetaTypename == "" { - // Completely empty response (user not found) - return APIUser{}, ErrDoesntExist - } - - ret := u.Data.User.Result.Legacy - ret.ID = u.Data.User.Result.ID - ret.Verified = u.Data.User.Result.IsBlueVerified - - // Banned users - for _, api_error := range u.Errors { - if api_error.Message == "Authorization: User has been suspended. (63)" { - ret.IsBanned = true - } else if api_error.Name == "NotFoundError" { - ret.DoesntExist = true - } else { - panic(fmt.Errorf("Unknown api error %q:\n %w", api_error.Message, EXTERNAL_API_ERROR)) - } - } - - // Banned users, new version - if u.Data.User.Result.Reason == "Suspended" { - ret.IsBanned = true - } - - // Deleted users - if ret.ID == 0 && ret.ScreenName == "" && u.Data.User.Result.Reason != "Suspended" { - ret.DoesntExist = true - } - - return ret, nil -} - type APIv1Entry struct { EntryID string `json:"entryId"` SortIndex int64 `json:"sortIndex,string"` diff --git a/pkg/scraper/api_types_notifications.go b/pkg/scraper/api_types_notifications.go index 406864c..87d7b78 100644 --- a/pkg/scraper/api_types_notifications.go +++ b/pkg/scraper/api_types_notifications.go @@ -5,9 +5,9 @@ import ( "fmt" "net/url" "regexp" + "slices" "strings" "time" - "slices" log "github.com/sirupsen/logrus" ) diff --git a/pkg/scraper/api_types_test.go b/pkg/scraper/api_types_test.go index 44ea95c..4f0b476 100644 --- a/pkg/scraper/api_types_test.go +++ b/pkg/scraper/api_types_test.go @@ -2,9 +2,11 @@ package scraper_test import ( "encoding/json" + "net/http" "os" "testing" + "github.com/jarcoal/httpmock" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -59,22 +61,6 @@ func TestNormalizeContent(t *testing.T) { } } -func TestUserProfileToAPIUser(t *testing.T) { - assert := assert.New(t) - data, err := os.ReadFile("test_responses/michael_malice_user_profile.json") - if err != nil { - panic(err) - } - var user_resp UserResponse - err = json.Unmarshal(data, &user_resp) - assert.NoError(err) - - result, err := user_resp.ConvertToAPIUser() - assert.NoError(err) - assert.Equal(int64(44067298), result.ID) - assert.Equal(user_resp.Data.User.Result.Legacy.FollowersCount, result.FollowersCount) -} - func TestGetCursorBottom(t *testing.T) { assert := assert.New(t) data, err := os.ReadFile("test_responses/midriffs_anarchist_cookbook.json") @@ -185,3 +171,43 @@ func TestHandleTombstonesUnavailable(t *testing.T) { assert.Equal("unavailable", tombstone.TombstoneText) } } + +// Should extract a user handle from a shortened tweet URL +func TestParseHandleFromShortenedTweetUrl(t *testing.T) { + assert := assert.New(t) + + short_url := "https://t.co/rZVrNGJyDe" + expanded_url := "https://twitter.com/MarkSnyderJr1/status/1460857606147350529" + + httpmock.Activate() + defer httpmock.DeactivateAndReset() + + httpmock.RegisterResponder("GET", short_url, func(req *http.Request) (*http.Response, error) { + header := http.Header{} + header.Set("Location", expanded_url) + return &http.Response{StatusCode: 301, Header: header}, nil + }) + + // Check the httpmock interceptor is working correctly + require.Equal(t, expanded_url, ExpandShortUrl(short_url), "httpmock didn't intercept the request") + + result, err := ParseHandleFromTweetUrl(short_url) + require.NoError(t, err) + assert.Equal(UserHandle("MarkSnyderJr1"), result) +} + +// Should compute tiny profile image URLs correctly, and fix local paths if needed (e.g., "_normal" and no file extension) +func TestGetTinyURLs(t *testing.T) { + assert := assert.New(t) + u := User{ + ProfileImageUrl: "https://pbs.twimg.com/profile_images/1208124284/iwRReicO.jpg", + Handle: "testUser", + } + assert.Equal(u.GetTinyProfileImageUrl(), "https://pbs.twimg.com/profile_images/1208124284/iwRReicO_normal.jpg") + assert.Equal(u.GetTinyProfileImageLocalPath(), "testUser_profile_iwRReicO_normal.jpg") + + // User with poorly formed profile image URL + u.ProfileImageUrl = "https://pbs.twimg.com/profile_images/1208124284/iwRReicO_normal" + assert.Equal(u.GetTinyProfileImageUrl(), "https://pbs.twimg.com/profile_images/1208124284/iwRReicO_normal") + assert.Equal(u.GetTinyProfileImageLocalPath(), "testUser_profile_iwRReicO_normal.jpg") +} diff --git a/pkg/scraper/api_types_user.go b/pkg/scraper/api_types_user.go new file mode 100644 index 0000000..9c96c27 --- /dev/null +++ b/pkg/scraper/api_types_user.go @@ -0,0 +1,179 @@ +package scraper + +import ( + "errors" + "fmt" + "net/url" +) + +type UserResponse struct { + Data struct { + User struct { + Result struct { + MetaTypename string `json:"__typename"` + ID int64 `json:"rest_id,string"` + Legacy APIUser `json:"legacy"` + IsBlueVerified bool `json:"is_blue_verified"` + UnavailableMessage struct { + Text string `json:"text"` + } `json:"unavailable_message"` + Reason string `json:"reason"` + } `json:"result"` + } `json:"user"` + } `json:"data"` + Errors []struct { + Message string `json:"message"` + Name string `json:"name"` + Code int `json:"code"` + } `json:"errors"` +} + +func (u UserResponse) ConvertToAPIUser() (APIUser, error) { + if u.Data.User.Result.MetaTypename == "" { + // Completely empty response (user not found) + return APIUser{}, ErrDoesntExist + } + + ret := u.Data.User.Result.Legacy + ret.ID = u.Data.User.Result.ID + ret.Verified = u.Data.User.Result.IsBlueVerified + + // Banned users + for _, api_error := range u.Errors { + if api_error.Message == "Authorization: User has been suspended. (63)" { + ret.IsBanned = true + } else if api_error.Name == "NotFoundError" { + // TODO: not sure what kind of request returns this + ret.DoesntExist = true + } else { + panic(fmt.Errorf("Unknown api error %q:\n %w", api_error.Message, EXTERNAL_API_ERROR)) + } + } + + // Banned users, new version + if u.Data.User.Result.Reason == "Suspended" { + ret.IsBanned = true + } + + // Deleted users + if ret.ID == 0 && ret.ScreenName == "" && u.Data.User.Result.Reason != "Suspended" { + ret.DoesntExist = true + } + + return ret, nil +} + +func (api API) GetUser(handle UserHandle) (User, error) { + url, err := url.Parse(GraphqlURL{ + BaseUrl: "https://api.twitter.com/graphql/SAMkL5y_N9pmahSw8yy6gw/UserByScreenName", + Variables: GraphqlVariables{ + ScreenName: handle, + Count: 20, + IncludePromotedContent: false, + WithSuperFollowsUserFields: true, + WithDownvotePerspective: false, + WithReactionsMetadata: false, + WithReactionsPerspective: false, + WithSuperFollowsTweetFields: true, + WithBirdwatchNotes: false, + WithVoice: true, + WithV2Timeline: false, + }, + Features: GraphqlFeatures{ + ResponsiveWebTwitterBlueVerifiedBadgeIsEnabled: true, + VerifiedPhoneLabelEnabled: false, + ResponsiveWebGraphqlTimelineNavigationEnabled: true, + UnifiedCardsAdMetadataContainerDynamicCardContentQueryEnabled: true, + TweetypieUnmentionOptimizationEnabled: true, + ResponsiveWebUcGqlEnabled: true, + VibeApiEnabled: true, + ResponsiveWebEditTweetApiEnabled: true, + GraphqlIsTranslatableRWebTweetIsTranslatableEnabled: true, + StandardizedNudgesMisinfo: true, + TweetWithVisibilityResultsPreferGqlLimitedActionsPolicyEnabled: false, + InteractiveTextEnabled: true, + ResponsiveWebTextConversationsEnabled: false, + ResponsiveWebEnhanceCardsEnabled: true, + }, + }.String()) + if err != nil { + panic(err) + } + + var response UserResponse + err = api.do_http(url.String(), "", &response) + if err != nil { + return User{}, err + } + apiUser, err := response.ConvertToAPIUser() + if errors.Is(err, ErrDoesntExist) { + return User{}, err + } + if apiUser.ScreenName == "" { + if apiUser.IsBanned || apiUser.DoesntExist { + ret := GetUnknownUserWithHandle(handle) + ret.IsBanned = apiUser.IsBanned + ret.IsDeleted = apiUser.DoesntExist + return ret, nil + } + apiUser.ScreenName = string(handle) + } + if err != nil { + return User{}, fmt.Errorf("Error fetching user %q:\n %w", handle, err) + } + return ParseSingleUser(apiUser) +} + +// Calls API#GetUserByID and returns the parsed result +func GetUserByID(u_id UserID) (User, error) { + session, err := NewGuestSession() // This endpoint works better if you're not logged in + if err != nil { + return User{}, err + } + return session.GetUserByID(u_id) +} + +func (api API) GetUserByID(u_id UserID) (User, error) { + if u_id == UserID(0) { + panic("No Users with ID 0") + } + url, err := url.Parse(GraphqlURL{ + BaseUrl: "https://x.com/i/api/graphql/Qw77dDjp9xCpUY-AXwt-yQ/UserByRestId", + Variables: GraphqlVariables{ + UserID: u_id, + }, + Features: GraphqlFeatures{ + RWebTipjarConsumptionEnabled: true, + ResponsiveWebGraphqlExcludeDirectiveEnabled: true, + VerifiedPhoneLabelEnabled: false, + ResponsiveWebGraphqlSkipUserProfileImageExtensionsEnabled: false, + ResponsiveWebGraphqlTimelineNavigationEnabled: true, + SubscriptionsFeatureCanGiftPremium: true, + ResponsiveWebTwitterArticleNotesTabEnabled: true, + }, + }.String()) + if err != nil { + panic(err) + } + + var response UserResponse + err = api.do_http(url.String(), "", &response) + if err != nil { + return User{}, err + } + apiUser, err := response.ConvertToAPIUser() + if errors.Is(err, ErrDoesntExist) { + return User{}, err + } + if apiUser.ScreenName == "" { + if apiUser.IsBanned { + return User{}, ErrUserIsBanned + } else { + return User{}, ErrDoesntExist + } + } + if err != nil { + return User{}, fmt.Errorf("Error fetching user ID %d:\n %w", u_id, err) + } + return ParseSingleUser(apiUser) +} diff --git a/pkg/scraper/user_test.go b/pkg/scraper/api_types_user_test.go similarity index 61% rename from pkg/scraper/user_test.go rename to pkg/scraper/api_types_user_test.go index 38061e7..f766b7e 100644 --- a/pkg/scraper/user_test.go +++ b/pkg/scraper/api_types_user_test.go @@ -2,17 +2,31 @@ package scraper_test import ( "encoding/json" - "net/http" "os" "testing" - "github.com/jarcoal/httpmock" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" . "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper" ) +func TestUserProfileToAPIUser(t *testing.T) { + assert := assert.New(t) + data, err := os.ReadFile("test_responses/michael_malice_user_profile.json") + if err != nil { + panic(err) + } + var user_resp UserResponse + err = json.Unmarshal(data, &user_resp) + assert.NoError(err) + + result, err := user_resp.ConvertToAPIUser() + assert.NoError(err) + assert.Equal(int64(44067298), result.ID) + assert.Equal(user_resp.Data.User.Result.Legacy.FollowersCount, result.FollowersCount) +} + func TestParseSingleUser(t *testing.T) { assert := assert.New(t) require := require.New(t) @@ -51,9 +65,7 @@ func TestParseSingleUser(t *testing.T) { assert.Equal(TweetID(1692611652397453790), user.PinnedTweetID) } -/** - * Should correctly parse a banned user - */ +// Should correctly parse a banned user func TestParseBannedUser(t *testing.T) { assert := assert.New(t) data, err := os.ReadFile("test_responses/api_v2/user_suspended.json") @@ -76,9 +88,7 @@ func TestParseBannedUser(t *testing.T) { assert.Equal("default_profile.png", user.GetTinyProfileImageLocalPath()) } -/** - * Should correctly parse a deleted user - */ +// Should correctly parse a deleted user func TestParseDeletedUser(t *testing.T) { assert := assert.New(t) data, err := os.ReadFile("test_responses/deleted_user.json") @@ -93,45 +103,3 @@ func TestParseDeletedUser(t *testing.T) { assert.Error(err) assert.ErrorIs(err, ErrDoesntExist) } - -/** - * Should extract a user handle from a shortened tweet URL - */ -func TestParseHandleFromShortenedTweetUrl(t *testing.T) { - assert := assert.New(t) - - short_url := "https://t.co/rZVrNGJyDe" - expanded_url := "https://twitter.com/MarkSnyderJr1/status/1460857606147350529" - - httpmock.Activate() - defer httpmock.DeactivateAndReset() - - httpmock.RegisterResponder("GET", short_url, func(req *http.Request) (*http.Response, error) { - header := http.Header{} - header.Set("Location", expanded_url) - return &http.Response{StatusCode: 301, Header: header}, nil - }) - - // Check the httpmock interceptor is working correctly - require.Equal(t, expanded_url, ExpandShortUrl(short_url), "httpmock didn't intercept the request") - - result, err := ParseHandleFromTweetUrl(short_url) - require.NoError(t, err) - assert.Equal(UserHandle("MarkSnyderJr1"), result) -} - -// Should compute tiny profile image URLs correctly, and fix local paths if needed (e.g., "_normal" and no file extension) -func TestGetTinyURLs(t *testing.T) { - assert := assert.New(t) - u := User{ - ProfileImageUrl: "https://pbs.twimg.com/profile_images/1208124284/iwRReicO.jpg", - Handle: "testUser", - } - assert.Equal(u.GetTinyProfileImageUrl(), "https://pbs.twimg.com/profile_images/1208124284/iwRReicO_normal.jpg") - assert.Equal(u.GetTinyProfileImageLocalPath(), "testUser_profile_iwRReicO_normal.jpg") - - // User with poorly formed profile image URL - u.ProfileImageUrl = "https://pbs.twimg.com/profile_images/1208124284/iwRReicO_normal" - assert.Equal(u.GetTinyProfileImageUrl(), "https://pbs.twimg.com/profile_images/1208124284/iwRReicO_normal") - assert.Equal(u.GetTinyProfileImageLocalPath(), "testUser_profile_iwRReicO_normal.jpg") -} diff --git a/pkg/scraper/api_types_v2.go b/pkg/scraper/api_types_v2.go index 99223bd..7509886 100644 --- a/pkg/scraper/api_types_v2.go +++ b/pkg/scraper/api_types_v2.go @@ -1326,124 +1326,6 @@ func (api *API) GetHomeTimeline(cursor string, is_following_only bool) (TweetTro return trove, err } -// Get User -// -------- - -func (api API) GetUser(handle UserHandle) (User, error) { - url, err := url.Parse(GraphqlURL{ - BaseUrl: "https://api.twitter.com/graphql/SAMkL5y_N9pmahSw8yy6gw/UserByScreenName", - Variables: GraphqlVariables{ - ScreenName: handle, - Count: 20, - IncludePromotedContent: false, - WithSuperFollowsUserFields: true, - WithDownvotePerspective: false, - WithReactionsMetadata: false, - WithReactionsPerspective: false, - WithSuperFollowsTweetFields: true, - WithBirdwatchNotes: false, - WithVoice: true, - WithV2Timeline: false, - }, - Features: GraphqlFeatures{ - ResponsiveWebTwitterBlueVerifiedBadgeIsEnabled: true, - VerifiedPhoneLabelEnabled: false, - ResponsiveWebGraphqlTimelineNavigationEnabled: true, - UnifiedCardsAdMetadataContainerDynamicCardContentQueryEnabled: true, - TweetypieUnmentionOptimizationEnabled: true, - ResponsiveWebUcGqlEnabled: true, - VibeApiEnabled: true, - ResponsiveWebEditTweetApiEnabled: true, - GraphqlIsTranslatableRWebTweetIsTranslatableEnabled: true, - StandardizedNudgesMisinfo: true, - TweetWithVisibilityResultsPreferGqlLimitedActionsPolicyEnabled: false, - InteractiveTextEnabled: true, - ResponsiveWebTextConversationsEnabled: false, - ResponsiveWebEnhanceCardsEnabled: true, - }, - }.String()) - if err != nil { - panic(err) - } - - var response UserResponse - err = api.do_http(url.String(), "", &response) - if err != nil { - return User{}, err - } - apiUser, err := response.ConvertToAPIUser() - if errors.Is(err, ErrDoesntExist) { - return User{}, err - } - if apiUser.ScreenName == "" { - if apiUser.IsBanned || apiUser.DoesntExist { - ret := GetUnknownUserWithHandle(handle) - ret.IsBanned = apiUser.IsBanned - ret.IsDeleted = apiUser.DoesntExist - return ret, nil - } - apiUser.ScreenName = string(handle) - } - if err != nil { - return User{}, fmt.Errorf("Error fetching user %q:\n %w", handle, err) - } - return ParseSingleUser(apiUser) -} - -// Calls API#GetUserByID and returns the parsed result -func GetUserByID(u_id UserID) (User, error) { - session, err := NewGuestSession() // This endpoint works better if you're not logged in - if err != nil { - return User{}, err - } - return session.GetUserByID(u_id) -} - -func (api API) GetUserByID(u_id UserID) (User, error) { - if u_id == UserID(0) { - panic("No Users with ID 0") - } - url, err := url.Parse(GraphqlURL{ - BaseUrl: "https://x.com/i/api/graphql/Qw77dDjp9xCpUY-AXwt-yQ/UserByRestId", - Variables: GraphqlVariables{ - UserID: u_id, - }, - Features: GraphqlFeatures{ - RWebTipjarConsumptionEnabled: true, - ResponsiveWebGraphqlExcludeDirectiveEnabled: true, - VerifiedPhoneLabelEnabled: false, - ResponsiveWebGraphqlSkipUserProfileImageExtensionsEnabled: false, - ResponsiveWebGraphqlTimelineNavigationEnabled: true, - SubscriptionsFeatureCanGiftPremium: true, - ResponsiveWebTwitterArticleNotesTabEnabled: true, - }, - }.String()) - if err != nil { - panic(err) - } - - var response UserResponse - err = api.do_http(url.String(), "", &response) - if err != nil { - return User{}, err - } - apiUser, err := response.ConvertToAPIUser() - if errors.Is(err, ErrDoesntExist) { - return User{}, err - } - if apiUser.ScreenName == "" { - if apiUser.IsBanned { - return User{}, ErrUserIsBanned - } else { - return User{}, ErrDoesntExist - } - } - if err != nil { - return User{}, fmt.Errorf("Error fetching user ID %d:\n %w", u_id, err) - } - return ParseSingleUser(apiUser) -} - // Paginated Search // ----------------