Move User Detail query, structures, parsing and tests to new 'api_types_user' file

This commit is contained in:
Alessio 2024-12-23 18:41:10 -08:00
parent 14024f550d
commit 32531a3bd9
6 changed files with 242 additions and 243 deletions

View File

@ -8,8 +8,8 @@ import (
"net/url"
"path"
"regexp"
"strconv"
"slices"
"strconv"
"strings"
"time"
)
@ -48,7 +48,7 @@ type APIExtendedMedia struct {
Type string `json:"type"`
VideoInfo struct {
Variants []Variant `json:"variants"`
Duration int `json:"duration_millis"`
Duration int `json:"duration_millis"`
} `json:"video_info"`
ExtMediaAvailability struct {
Status string `json:"status"`
@ -674,62 +674,6 @@ type APINotification struct {
} `json:"template"`
}
type UserResponse struct {
Data struct {
User struct {
Result struct {
MetaTypename string `json:"__typename"`
ID int64 `json:"rest_id,string"`
Legacy APIUser `json:"legacy"`
IsBlueVerified bool `json:"is_blue_verified"`
UnavailableMessage struct {
Text string `json:"text"`
} `json:"unavailable_message"`
Reason string `json:"reason"`
} `json:"result"`
} `json:"user"`
} `json:"data"`
Errors []struct {
Message string `json:"message"`
Name string `json:"name"`
Code int `json:"code"`
} `json:"errors"`
}
func (u UserResponse) ConvertToAPIUser() (APIUser, error) {
if u.Data.User.Result.MetaTypename == "" {
// Completely empty response (user not found)
return APIUser{}, ErrDoesntExist
}
ret := u.Data.User.Result.Legacy
ret.ID = u.Data.User.Result.ID
ret.Verified = u.Data.User.Result.IsBlueVerified
// Banned users
for _, api_error := range u.Errors {
if api_error.Message == "Authorization: User has been suspended. (63)" {
ret.IsBanned = true
} else if api_error.Name == "NotFoundError" {
ret.DoesntExist = true
} else {
panic(fmt.Errorf("Unknown api error %q:\n %w", api_error.Message, EXTERNAL_API_ERROR))
}
}
// Banned users, new version
if u.Data.User.Result.Reason == "Suspended" {
ret.IsBanned = true
}
// Deleted users
if ret.ID == 0 && ret.ScreenName == "" && u.Data.User.Result.Reason != "Suspended" {
ret.DoesntExist = true
}
return ret, nil
}
type APIv1Entry struct {
EntryID string `json:"entryId"`
SortIndex int64 `json:"sortIndex,string"`

View File

@ -5,9 +5,9 @@ import (
"fmt"
"net/url"
"regexp"
"slices"
"strings"
"time"
"slices"
log "github.com/sirupsen/logrus"
)

View File

@ -2,9 +2,11 @@ package scraper_test
import (
"encoding/json"
"net/http"
"os"
"testing"
"github.com/jarcoal/httpmock"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
@ -59,22 +61,6 @@ func TestNormalizeContent(t *testing.T) {
}
}
func TestUserProfileToAPIUser(t *testing.T) {
assert := assert.New(t)
data, err := os.ReadFile("test_responses/michael_malice_user_profile.json")
if err != nil {
panic(err)
}
var user_resp UserResponse
err = json.Unmarshal(data, &user_resp)
assert.NoError(err)
result, err := user_resp.ConvertToAPIUser()
assert.NoError(err)
assert.Equal(int64(44067298), result.ID)
assert.Equal(user_resp.Data.User.Result.Legacy.FollowersCount, result.FollowersCount)
}
func TestGetCursorBottom(t *testing.T) {
assert := assert.New(t)
data, err := os.ReadFile("test_responses/midriffs_anarchist_cookbook.json")
@ -185,3 +171,43 @@ func TestHandleTombstonesUnavailable(t *testing.T) {
assert.Equal("unavailable", tombstone.TombstoneText)
}
}
// Should extract a user handle from a shortened tweet URL
func TestParseHandleFromShortenedTweetUrl(t *testing.T) {
assert := assert.New(t)
short_url := "https://t.co/rZVrNGJyDe"
expanded_url := "https://twitter.com/MarkSnyderJr1/status/1460857606147350529"
httpmock.Activate()
defer httpmock.DeactivateAndReset()
httpmock.RegisterResponder("GET", short_url, func(req *http.Request) (*http.Response, error) {
header := http.Header{}
header.Set("Location", expanded_url)
return &http.Response{StatusCode: 301, Header: header}, nil
})
// Check the httpmock interceptor is working correctly
require.Equal(t, expanded_url, ExpandShortUrl(short_url), "httpmock didn't intercept the request")
result, err := ParseHandleFromTweetUrl(short_url)
require.NoError(t, err)
assert.Equal(UserHandle("MarkSnyderJr1"), result)
}
// Should compute tiny profile image URLs correctly, and fix local paths if needed (e.g., "_normal" and no file extension)
func TestGetTinyURLs(t *testing.T) {
assert := assert.New(t)
u := User{
ProfileImageUrl: "https://pbs.twimg.com/profile_images/1208124284/iwRReicO.jpg",
Handle: "testUser",
}
assert.Equal(u.GetTinyProfileImageUrl(), "https://pbs.twimg.com/profile_images/1208124284/iwRReicO_normal.jpg")
assert.Equal(u.GetTinyProfileImageLocalPath(), "testUser_profile_iwRReicO_normal.jpg")
// User with poorly formed profile image URL
u.ProfileImageUrl = "https://pbs.twimg.com/profile_images/1208124284/iwRReicO_normal"
assert.Equal(u.GetTinyProfileImageUrl(), "https://pbs.twimg.com/profile_images/1208124284/iwRReicO_normal")
assert.Equal(u.GetTinyProfileImageLocalPath(), "testUser_profile_iwRReicO_normal.jpg")
}

View File

@ -0,0 +1,179 @@
package scraper
import (
"errors"
"fmt"
"net/url"
)
type UserResponse struct {
Data struct {
User struct {
Result struct {
MetaTypename string `json:"__typename"`
ID int64 `json:"rest_id,string"`
Legacy APIUser `json:"legacy"`
IsBlueVerified bool `json:"is_blue_verified"`
UnavailableMessage struct {
Text string `json:"text"`
} `json:"unavailable_message"`
Reason string `json:"reason"`
} `json:"result"`
} `json:"user"`
} `json:"data"`
Errors []struct {
Message string `json:"message"`
Name string `json:"name"`
Code int `json:"code"`
} `json:"errors"`
}
func (u UserResponse) ConvertToAPIUser() (APIUser, error) {
if u.Data.User.Result.MetaTypename == "" {
// Completely empty response (user not found)
return APIUser{}, ErrDoesntExist
}
ret := u.Data.User.Result.Legacy
ret.ID = u.Data.User.Result.ID
ret.Verified = u.Data.User.Result.IsBlueVerified
// Banned users
for _, api_error := range u.Errors {
if api_error.Message == "Authorization: User has been suspended. (63)" {
ret.IsBanned = true
} else if api_error.Name == "NotFoundError" {
// TODO: not sure what kind of request returns this
ret.DoesntExist = true
} else {
panic(fmt.Errorf("Unknown api error %q:\n %w", api_error.Message, EXTERNAL_API_ERROR))
}
}
// Banned users, new version
if u.Data.User.Result.Reason == "Suspended" {
ret.IsBanned = true
}
// Deleted users
if ret.ID == 0 && ret.ScreenName == "" && u.Data.User.Result.Reason != "Suspended" {
ret.DoesntExist = true
}
return ret, nil
}
func (api API) GetUser(handle UserHandle) (User, error) {
url, err := url.Parse(GraphqlURL{
BaseUrl: "https://api.twitter.com/graphql/SAMkL5y_N9pmahSw8yy6gw/UserByScreenName",
Variables: GraphqlVariables{
ScreenName: handle,
Count: 20,
IncludePromotedContent: false,
WithSuperFollowsUserFields: true,
WithDownvotePerspective: false,
WithReactionsMetadata: false,
WithReactionsPerspective: false,
WithSuperFollowsTweetFields: true,
WithBirdwatchNotes: false,
WithVoice: true,
WithV2Timeline: false,
},
Features: GraphqlFeatures{
ResponsiveWebTwitterBlueVerifiedBadgeIsEnabled: true,
VerifiedPhoneLabelEnabled: false,
ResponsiveWebGraphqlTimelineNavigationEnabled: true,
UnifiedCardsAdMetadataContainerDynamicCardContentQueryEnabled: true,
TweetypieUnmentionOptimizationEnabled: true,
ResponsiveWebUcGqlEnabled: true,
VibeApiEnabled: true,
ResponsiveWebEditTweetApiEnabled: true,
GraphqlIsTranslatableRWebTweetIsTranslatableEnabled: true,
StandardizedNudgesMisinfo: true,
TweetWithVisibilityResultsPreferGqlLimitedActionsPolicyEnabled: false,
InteractiveTextEnabled: true,
ResponsiveWebTextConversationsEnabled: false,
ResponsiveWebEnhanceCardsEnabled: true,
},
}.String())
if err != nil {
panic(err)
}
var response UserResponse
err = api.do_http(url.String(), "", &response)
if err != nil {
return User{}, err
}
apiUser, err := response.ConvertToAPIUser()
if errors.Is(err, ErrDoesntExist) {
return User{}, err
}
if apiUser.ScreenName == "" {
if apiUser.IsBanned || apiUser.DoesntExist {
ret := GetUnknownUserWithHandle(handle)
ret.IsBanned = apiUser.IsBanned
ret.IsDeleted = apiUser.DoesntExist
return ret, nil
}
apiUser.ScreenName = string(handle)
}
if err != nil {
return User{}, fmt.Errorf("Error fetching user %q:\n %w", handle, err)
}
return ParseSingleUser(apiUser)
}
// Calls API#GetUserByID and returns the parsed result
func GetUserByID(u_id UserID) (User, error) {
session, err := NewGuestSession() // This endpoint works better if you're not logged in
if err != nil {
return User{}, err
}
return session.GetUserByID(u_id)
}
func (api API) GetUserByID(u_id UserID) (User, error) {
if u_id == UserID(0) {
panic("No Users with ID 0")
}
url, err := url.Parse(GraphqlURL{
BaseUrl: "https://x.com/i/api/graphql/Qw77dDjp9xCpUY-AXwt-yQ/UserByRestId",
Variables: GraphqlVariables{
UserID: u_id,
},
Features: GraphqlFeatures{
RWebTipjarConsumptionEnabled: true,
ResponsiveWebGraphqlExcludeDirectiveEnabled: true,
VerifiedPhoneLabelEnabled: false,
ResponsiveWebGraphqlSkipUserProfileImageExtensionsEnabled: false,
ResponsiveWebGraphqlTimelineNavigationEnabled: true,
SubscriptionsFeatureCanGiftPremium: true,
ResponsiveWebTwitterArticleNotesTabEnabled: true,
},
}.String())
if err != nil {
panic(err)
}
var response UserResponse
err = api.do_http(url.String(), "", &response)
if err != nil {
return User{}, err
}
apiUser, err := response.ConvertToAPIUser()
if errors.Is(err, ErrDoesntExist) {
return User{}, err
}
if apiUser.ScreenName == "" {
if apiUser.IsBanned {
return User{}, ErrUserIsBanned
} else {
return User{}, ErrDoesntExist
}
}
if err != nil {
return User{}, fmt.Errorf("Error fetching user ID %d:\n %w", u_id, err)
}
return ParseSingleUser(apiUser)
}

View File

@ -2,17 +2,31 @@ package scraper_test
import (
"encoding/json"
"net/http"
"os"
"testing"
"github.com/jarcoal/httpmock"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
)
func TestUserProfileToAPIUser(t *testing.T) {
assert := assert.New(t)
data, err := os.ReadFile("test_responses/michael_malice_user_profile.json")
if err != nil {
panic(err)
}
var user_resp UserResponse
err = json.Unmarshal(data, &user_resp)
assert.NoError(err)
result, err := user_resp.ConvertToAPIUser()
assert.NoError(err)
assert.Equal(int64(44067298), result.ID)
assert.Equal(user_resp.Data.User.Result.Legacy.FollowersCount, result.FollowersCount)
}
func TestParseSingleUser(t *testing.T) {
assert := assert.New(t)
require := require.New(t)
@ -51,9 +65,7 @@ func TestParseSingleUser(t *testing.T) {
assert.Equal(TweetID(1692611652397453790), user.PinnedTweetID)
}
/**
* Should correctly parse a banned user
*/
// Should correctly parse a banned user
func TestParseBannedUser(t *testing.T) {
assert := assert.New(t)
data, err := os.ReadFile("test_responses/api_v2/user_suspended.json")
@ -76,9 +88,7 @@ func TestParseBannedUser(t *testing.T) {
assert.Equal("default_profile.png", user.GetTinyProfileImageLocalPath())
}
/**
* Should correctly parse a deleted user
*/
// Should correctly parse a deleted user
func TestParseDeletedUser(t *testing.T) {
assert := assert.New(t)
data, err := os.ReadFile("test_responses/deleted_user.json")
@ -93,45 +103,3 @@ func TestParseDeletedUser(t *testing.T) {
assert.Error(err)
assert.ErrorIs(err, ErrDoesntExist)
}
/**
* Should extract a user handle from a shortened tweet URL
*/
func TestParseHandleFromShortenedTweetUrl(t *testing.T) {
assert := assert.New(t)
short_url := "https://t.co/rZVrNGJyDe"
expanded_url := "https://twitter.com/MarkSnyderJr1/status/1460857606147350529"
httpmock.Activate()
defer httpmock.DeactivateAndReset()
httpmock.RegisterResponder("GET", short_url, func(req *http.Request) (*http.Response, error) {
header := http.Header{}
header.Set("Location", expanded_url)
return &http.Response{StatusCode: 301, Header: header}, nil
})
// Check the httpmock interceptor is working correctly
require.Equal(t, expanded_url, ExpandShortUrl(short_url), "httpmock didn't intercept the request")
result, err := ParseHandleFromTweetUrl(short_url)
require.NoError(t, err)
assert.Equal(UserHandle("MarkSnyderJr1"), result)
}
// Should compute tiny profile image URLs correctly, and fix local paths if needed (e.g., "_normal" and no file extension)
func TestGetTinyURLs(t *testing.T) {
assert := assert.New(t)
u := User{
ProfileImageUrl: "https://pbs.twimg.com/profile_images/1208124284/iwRReicO.jpg",
Handle: "testUser",
}
assert.Equal(u.GetTinyProfileImageUrl(), "https://pbs.twimg.com/profile_images/1208124284/iwRReicO_normal.jpg")
assert.Equal(u.GetTinyProfileImageLocalPath(), "testUser_profile_iwRReicO_normal.jpg")
// User with poorly formed profile image URL
u.ProfileImageUrl = "https://pbs.twimg.com/profile_images/1208124284/iwRReicO_normal"
assert.Equal(u.GetTinyProfileImageUrl(), "https://pbs.twimg.com/profile_images/1208124284/iwRReicO_normal")
assert.Equal(u.GetTinyProfileImageLocalPath(), "testUser_profile_iwRReicO_normal.jpg")
}

View File

@ -1326,124 +1326,6 @@ func (api *API) GetHomeTimeline(cursor string, is_following_only bool) (TweetTro
return trove, err
}
// Get User
// --------
func (api API) GetUser(handle UserHandle) (User, error) {
url, err := url.Parse(GraphqlURL{
BaseUrl: "https://api.twitter.com/graphql/SAMkL5y_N9pmahSw8yy6gw/UserByScreenName",
Variables: GraphqlVariables{
ScreenName: handle,
Count: 20,
IncludePromotedContent: false,
WithSuperFollowsUserFields: true,
WithDownvotePerspective: false,
WithReactionsMetadata: false,
WithReactionsPerspective: false,
WithSuperFollowsTweetFields: true,
WithBirdwatchNotes: false,
WithVoice: true,
WithV2Timeline: false,
},
Features: GraphqlFeatures{
ResponsiveWebTwitterBlueVerifiedBadgeIsEnabled: true,
VerifiedPhoneLabelEnabled: false,
ResponsiveWebGraphqlTimelineNavigationEnabled: true,
UnifiedCardsAdMetadataContainerDynamicCardContentQueryEnabled: true,
TweetypieUnmentionOptimizationEnabled: true,
ResponsiveWebUcGqlEnabled: true,
VibeApiEnabled: true,
ResponsiveWebEditTweetApiEnabled: true,
GraphqlIsTranslatableRWebTweetIsTranslatableEnabled: true,
StandardizedNudgesMisinfo: true,
TweetWithVisibilityResultsPreferGqlLimitedActionsPolicyEnabled: false,
InteractiveTextEnabled: true,
ResponsiveWebTextConversationsEnabled: false,
ResponsiveWebEnhanceCardsEnabled: true,
},
}.String())
if err != nil {
panic(err)
}
var response UserResponse
err = api.do_http(url.String(), "", &response)
if err != nil {
return User{}, err
}
apiUser, err := response.ConvertToAPIUser()
if errors.Is(err, ErrDoesntExist) {
return User{}, err
}
if apiUser.ScreenName == "" {
if apiUser.IsBanned || apiUser.DoesntExist {
ret := GetUnknownUserWithHandle(handle)
ret.IsBanned = apiUser.IsBanned
ret.IsDeleted = apiUser.DoesntExist
return ret, nil
}
apiUser.ScreenName = string(handle)
}
if err != nil {
return User{}, fmt.Errorf("Error fetching user %q:\n %w", handle, err)
}
return ParseSingleUser(apiUser)
}
// Calls API#GetUserByID and returns the parsed result
func GetUserByID(u_id UserID) (User, error) {
session, err := NewGuestSession() // This endpoint works better if you're not logged in
if err != nil {
return User{}, err
}
return session.GetUserByID(u_id)
}
func (api API) GetUserByID(u_id UserID) (User, error) {
if u_id == UserID(0) {
panic("No Users with ID 0")
}
url, err := url.Parse(GraphqlURL{
BaseUrl: "https://x.com/i/api/graphql/Qw77dDjp9xCpUY-AXwt-yQ/UserByRestId",
Variables: GraphqlVariables{
UserID: u_id,
},
Features: GraphqlFeatures{
RWebTipjarConsumptionEnabled: true,
ResponsiveWebGraphqlExcludeDirectiveEnabled: true,
VerifiedPhoneLabelEnabled: false,
ResponsiveWebGraphqlSkipUserProfileImageExtensionsEnabled: false,
ResponsiveWebGraphqlTimelineNavigationEnabled: true,
SubscriptionsFeatureCanGiftPremium: true,
ResponsiveWebTwitterArticleNotesTabEnabled: true,
},
}.String())
if err != nil {
panic(err)
}
var response UserResponse
err = api.do_http(url.String(), "", &response)
if err != nil {
return User{}, err
}
apiUser, err := response.ConvertToAPIUser()
if errors.Is(err, ErrDoesntExist) {
return User{}, err
}
if apiUser.ScreenName == "" {
if apiUser.IsBanned {
return User{}, ErrUserIsBanned
} else {
return User{}, ErrDoesntExist
}
}
if err != nil {
return User{}, fmt.Errorf("Error fetching user ID %d:\n %w", u_id, err)
}
return ParseSingleUser(apiUser)
}
// Paginated Search
// ----------------