From f7d383adf37bf4fb343649dfee1b328960c81af2 Mon Sep 17 00:00:00 2001 From: Alessio Date: Wed, 7 Jun 2023 17:18:26 -0300 Subject: [PATCH] REFACTOR: move API types, requests and tests to their own 'api_types_spaces' files --- scraper/api_request_utils.go | 13 +--- scraper/api_types_spaces.go | 105 +++++++++++++++++++++++++++++++ scraper/api_types_spaces_test.go | 63 +++++++++++++++++++ scraper/api_types_v2.go | 88 -------------------------- scraper/api_types_v2_test.go | 51 --------------- 5 files changed, 169 insertions(+), 151 deletions(-) create mode 100644 scraper/api_types_spaces.go create mode 100644 scraper/api_types_spaces_test.go diff --git a/scraper/api_request_utils.go b/scraper/api_request_utils.go index feb7c53..6dbcf8a 100644 --- a/scraper/api_request_utils.go +++ b/scraper/api_request_utils.go @@ -207,6 +207,7 @@ func (api *API) update_csrf_token() { panic("No CSRF Token Found") } + func (api *API) do_http_POST(url string, body string, result interface{}) error { req, err := http.NewRequest("POST", url, strings.NewReader(body)) if err != nil { @@ -387,18 +388,6 @@ func (api API) GetMoreTweetsFromFeed(user_id UserID, response *TweetResponse, mi return nil } -func (api API) GetSpace(id SpaceID) (SpaceResponse, error) { - // TODO: break up this URL into params so it's readable - url, err := url.Parse("https://twitter.com/i/api/graphql/Ha9BKBF0uAz9d4-lz0jnYA/AudioSpaceById?variables=%7B%22id%22%3A%22" + string(id) + "%22%2C%22isMetatagsQuery%22%3Afalse%2C%22withSuperFollowsUserFields%22%3Atrue%2C%22withDownvotePerspective%22%3Afalse%2C%22withReactionsMetadata%22%3Afalse%2C%22withReactionsPerspective%22%3Afalse%2C%22withSuperFollowsTweetFields%22%3Atrue%2C%22withReplays%22%3Atrue%7D&features=%7B%22spaces_2022_h2_clipping%22%3Atrue%2C%22spaces_2022_h2_spaces_communities%22%3Atrue%2C%22responsive_web_twitter_blue_verified_badge_is_enabled%22%3Atrue%2C%22verified_phone_label_enabled%22%3Afalse%2C%22tweetypie_unmention_optimization_enabled%22%3Atrue%2C%22responsive_web_uc_gql_enabled%22%3Atrue%2C%22vibe_api_enabled%22%3Atrue%2C%22responsive_web_edit_tweet_api_enabled%22%3Atrue%2C%22graphql_is_translatable_rweb_tweet_is_translatable_enabled%22%3Atrue%2C%22standardized_nudges_misinfo%22%3Atrue%2C%22tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled%22%3Afalse%2C%22responsive_web_graphql_timeline_navigation_enabled%22%3Atrue%2C%22interactive_text_enabled%22%3Atrue%2C%22responsive_web_text_conversations_enabled%22%3Afalse%2C%22responsive_web_enhance_cards_enabled%22%3Atrue%7D") //nolint:lll // It's a URL, come on - if err != nil { - panic(err) - } - - var result SpaceResponse - err = api.do_http(url.String(), "", &result) - return result, err -} - func (api *API) GetTweet(id TweetID, cursor string) (TweetResponse, error) { url, err := url.Parse(fmt.Sprintf("%s%d.json", API_CONVERSATION_BASE_PATH, id)) if err != nil { diff --git a/scraper/api_types_spaces.go b/scraper/api_types_spaces.go new file mode 100644 index 0000000..2dedcf6 --- /dev/null +++ b/scraper/api_types_spaces.go @@ -0,0 +1,105 @@ +package scraper + +import ( + "net/url" +) + +type SpaceResponse struct { + Data struct { + AudioSpace struct { + Metadata struct { + RestId string `json:"rest_id"` + State string + Title string + MediaKey string `json:"media_key"` + CreatedAt int64 `json:"created_at"` + StartedAt int64 `json:"started_at"` + EndedAt int64 `json:"ended_at,string"` + UpdatedAt int64 `json:"updated_at"` + DisallowJoin bool `json:"disallow_join"` + NarrowCastSpaceType int64 `json:"narrow_cast_space_type"` + IsEmployeeOnly bool `json:"is_employee_only"` + IsLocked bool `json:"is_locked"` + IsSpaceAvailableForReplay bool `json:"is_space_available_for_replay"` + IsSpaceAvailableForClipping bool `json:"is_space_available_for_clipping"` + ConversationControls int64 `json:"conversation_controls"` + TotalReplayWatched int64 `json:"total_replay_watched"` + TotalLiveListeners int64 `json:"total_live_listeners"` + CreatorResults struct { + Result struct { + ID int64 `json:"rest_id,string"` + Legacy APIUser `json:"legacy"` + } `json:"result"` + } `json:"creator_results"` + } + Participants struct { + Total int + Admins []struct { + Start int + User struct { + RestId int64 `json:"rest_id,string"` + } + } + Speakers []struct { + User struct { + RestId int64 `json:"rest_id,string"` + } + } + } + } + } +} + +func (r SpaceResponse) ToTweetTrove() TweetTrove { + data := r.Data.AudioSpace + + ret := NewTweetTrove() + space := Space{} + space.ID = SpaceID(data.Metadata.RestId) + if space.ID == "" { + // The response is empty. Abort processing + return ret + } + + space.Title = data.Metadata.Title + space.State = data.Metadata.State + space.CreatedById = UserID(data.Metadata.CreatorResults.Result.ID) + space.CreatedAt = TimestampFromUnix(data.Metadata.CreatedAt) + space.StartedAt = TimestampFromUnix(data.Metadata.StartedAt) + space.EndedAt = TimestampFromUnix(data.Metadata.EndedAt) + space.UpdatedAt = TimestampFromUnix(data.Metadata.UpdatedAt) + space.IsAvailableForReplay = data.Metadata.IsSpaceAvailableForReplay + space.ReplayWatchCount = data.Metadata.TotalReplayWatched + space.LiveListenersCount = data.Metadata.TotalLiveListeners + space.IsDetailsFetched = true + + for _, admin := range data.Participants.Admins { + space.ParticipantIds = append(space.ParticipantIds, UserID(admin.User.RestId)) + } + for _, speaker := range data.Participants.Speakers { + space.ParticipantIds = append(space.ParticipantIds, UserID(speaker.User.RestId)) + } + + ret.Spaces[space.ID] = space + + creator, err := ParseSingleUser(data.Metadata.CreatorResults.Result.Legacy) + if err != nil { + panic(err) + } + creator.ID = space.CreatedById + ret.Users[creator.ID] = creator + + return ret +} + +func (api API) GetSpace(id SpaceID) (SpaceResponse, error) { + // TODO: break up this URL into params so it's readable + url, err := url.Parse("https://twitter.com/i/api/graphql/Ha9BKBF0uAz9d4-lz0jnYA/AudioSpaceById?variables=%7B%22id%22%3A%22" + string(id) + "%22%2C%22isMetatagsQuery%22%3Afalse%2C%22withSuperFollowsUserFields%22%3Atrue%2C%22withDownvotePerspective%22%3Afalse%2C%22withReactionsMetadata%22%3Afalse%2C%22withReactionsPerspective%22%3Afalse%2C%22withSuperFollowsTweetFields%22%3Atrue%2C%22withReplays%22%3Atrue%7D&features=%7B%22spaces_2022_h2_clipping%22%3Atrue%2C%22spaces_2022_h2_spaces_communities%22%3Atrue%2C%22responsive_web_twitter_blue_verified_badge_is_enabled%22%3Atrue%2C%22verified_phone_label_enabled%22%3Afalse%2C%22tweetypie_unmention_optimization_enabled%22%3Atrue%2C%22responsive_web_uc_gql_enabled%22%3Atrue%2C%22vibe_api_enabled%22%3Atrue%2C%22responsive_web_edit_tweet_api_enabled%22%3Atrue%2C%22graphql_is_translatable_rweb_tweet_is_translatable_enabled%22%3Atrue%2C%22standardized_nudges_misinfo%22%3Atrue%2C%22tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled%22%3Afalse%2C%22responsive_web_graphql_timeline_navigation_enabled%22%3Atrue%2C%22interactive_text_enabled%22%3Atrue%2C%22responsive_web_text_conversations_enabled%22%3Afalse%2C%22responsive_web_enhance_cards_enabled%22%3Atrue%7D") //nolint:lll // It's a URL, come on + if err != nil { + panic(err) + } + + var result SpaceResponse + err = api.do_http(url.String(), "", &result) + return result, err +} diff --git a/scraper/api_types_spaces_test.go b/scraper/api_types_spaces_test.go new file mode 100644 index 0000000..dfd2fa0 --- /dev/null +++ b/scraper/api_types_spaces_test.go @@ -0,0 +1,63 @@ +package scraper_test + +import ( + "encoding/json" + "os" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + . "offline_twitter/scraper" +) + +func TestParseSpaceResponse(t *testing.T) { + assert := assert.New(t) + require := require.New(t) + data, err := os.ReadFile("test_responses/tweet_content/space_object.json") + if err != nil { + panic(err) + } + + var response SpaceResponse + err = json.Unmarshal(data, &response) + assert.NoError(err) + + trove := response.ToTweetTrove() + require.Len(trove.Spaces, 1) + space := trove.Spaces["1BdxYypQzBgxX"] + assert.Equal(space.Title, "dreary weather 🌧️☔🌬️") + assert.Equal(space.CreatedById, UserID(1356335022815539201)) + assert.Equal(int64(1665884387263), space.CreatedAt.Time.Unix()) + assert.Equal(int64(1665884388222), space.StartedAt.Time.Unix()) + assert.Equal(int64(1665887491804), space.EndedAt.Time.Unix()) + assert.Equal(int64(1665887492705), space.UpdatedAt.Time.Unix()) + assert.False(space.IsAvailableForReplay) + assert.Equal(int64(4), space.ReplayWatchCount) + assert.Equal(int64(1), space.LiveListenersCount) + + assert.True(space.IsDetailsFetched) + + assert.Len(space.ParticipantIds, 2) + assert.Equal(UserID(1356335022815539201), space.ParticipantIds[0]) + assert.Equal(UserID(1523838615377350656), space.ParticipantIds[1]) + + require.Len(trove.Users, 1) + user := trove.Users[1356335022815539201] + assert.Equal(847, user.FollowersCount) +} + +func TestParseEmptySpaceResponse(t *testing.T) { + require := require.New(t) + data, err := os.ReadFile("test_responses/tweet_content/space_object_empty.json") + if err != nil { + panic(err) + } + + var response SpaceResponse + err = json.Unmarshal(data, &response) + require.NoError(err) + + trove := response.ToTweetTrove() + require.Len(trove.Spaces, 0) +} diff --git a/scraper/api_types_v2.go b/scraper/api_types_v2.go index d9618bb..5e621bf 100644 --- a/scraper/api_types_v2.go +++ b/scraper/api_types_v2.go @@ -491,91 +491,3 @@ func (api *API) GetMoreTweetsFromGraphqlFeed(user_id UserID, response *APIV2Resp } return nil } - -type SpaceResponse struct { - Data struct { - AudioSpace struct { - Metadata struct { - RestId string `json:"rest_id"` - State string - Title string - MediaKey string `json:"media_key"` - CreatedAt int64 `json:"created_at"` - StartedAt int64 `json:"started_at"` - EndedAt int64 `json:"ended_at,string"` - UpdatedAt int64 `json:"updated_at"` - DisallowJoin bool `json:"disallow_join"` - NarrowCastSpaceType int64 `json:"narrow_cast_space_type"` - IsEmployeeOnly bool `json:"is_employee_only"` - IsLocked bool `json:"is_locked"` - IsSpaceAvailableForReplay bool `json:"is_space_available_for_replay"` - IsSpaceAvailableForClipping bool `json:"is_space_available_for_clipping"` - ConversationControls int64 `json:"conversation_controls"` - TotalReplayWatched int64 `json:"total_replay_watched"` - TotalLiveListeners int64 `json:"total_live_listeners"` - CreatorResults struct { - Result struct { - ID int64 `json:"rest_id,string"` - Legacy APIUser `json:"legacy"` - } `json:"result"` - } `json:"creator_results"` - } - Participants struct { - Total int - Admins []struct { - Start int - User struct { - RestId int64 `json:"rest_id,string"` - } - } - Speakers []struct { - User struct { - RestId int64 `json:"rest_id,string"` - } - } - } - } - } -} - -func (r SpaceResponse) ToTweetTrove() TweetTrove { - data := r.Data.AudioSpace - - ret := NewTweetTrove() - space := Space{} - space.ID = SpaceID(data.Metadata.RestId) - if space.ID == "" { - // The response is empty. Abort processing - return ret - } - - space.Title = data.Metadata.Title - space.State = data.Metadata.State - space.CreatedById = UserID(data.Metadata.CreatorResults.Result.ID) - space.CreatedAt = TimestampFromUnix(data.Metadata.CreatedAt) - space.StartedAt = TimestampFromUnix(data.Metadata.StartedAt) - space.EndedAt = TimestampFromUnix(data.Metadata.EndedAt) - space.UpdatedAt = TimestampFromUnix(data.Metadata.UpdatedAt) - space.IsAvailableForReplay = data.Metadata.IsSpaceAvailableForReplay - space.ReplayWatchCount = data.Metadata.TotalReplayWatched - space.LiveListenersCount = data.Metadata.TotalLiveListeners - space.IsDetailsFetched = true - - for _, admin := range data.Participants.Admins { - space.ParticipantIds = append(space.ParticipantIds, UserID(admin.User.RestId)) - } - for _, speaker := range data.Participants.Speakers { - space.ParticipantIds = append(space.ParticipantIds, UserID(speaker.User.RestId)) - } - - ret.Spaces[space.ID] = space - - creator, err := ParseSingleUser(data.Metadata.CreatorResults.Result.Legacy) - if err != nil { - panic(err) - } - creator.ID = space.CreatedById - ret.Users[creator.ID] = creator - - return ret -} diff --git a/scraper/api_types_v2_test.go b/scraper/api_types_v2_test.go index c00832e..4ba313a 100644 --- a/scraper/api_types_v2_test.go +++ b/scraper/api_types_v2_test.go @@ -471,57 +471,6 @@ func TestAPIV2ParseTweetWithSpace(t *testing.T) { assert.False(s.IsDetailsFetched) } -func TestParseSpaceResponse(t *testing.T) { - assert := assert.New(t) - require := require.New(t) - data, err := os.ReadFile("test_responses/tweet_content/space_object.json") - if err != nil { - panic(err) - } - - var response SpaceResponse - err = json.Unmarshal(data, &response) - assert.NoError(err) - - trove := response.ToTweetTrove() - require.Len(trove.Spaces, 1) - space := trove.Spaces["1BdxYypQzBgxX"] - assert.Equal(space.Title, "dreary weather 🌧️☔🌬️") - assert.Equal(space.CreatedById, UserID(1356335022815539201)) - assert.Equal(int64(1665884387263), space.CreatedAt.Time.Unix()) - assert.Equal(int64(1665884388222), space.StartedAt.Time.Unix()) - assert.Equal(int64(1665887491804), space.EndedAt.Time.Unix()) - assert.Equal(int64(1665887492705), space.UpdatedAt.Time.Unix()) - assert.False(space.IsAvailableForReplay) - assert.Equal(int64(4), space.ReplayWatchCount) - assert.Equal(int64(1), space.LiveListenersCount) - - assert.True(space.IsDetailsFetched) - - assert.Len(space.ParticipantIds, 2) - assert.Equal(UserID(1356335022815539201), space.ParticipantIds[0]) - assert.Equal(UserID(1523838615377350656), space.ParticipantIds[1]) - - require.Len(trove.Users, 1) - user := trove.Users[1356335022815539201] - assert.Equal(847, user.FollowersCount) -} - -func TestParseEmptySpaceResponse(t *testing.T) { - require := require.New(t) - data, err := os.ReadFile("test_responses/tweet_content/space_object_empty.json") - if err != nil { - panic(err) - } - - var response SpaceResponse - err = json.Unmarshal(data, &response) - require.NoError(err) - - trove := response.ToTweetTrove() - require.Len(trove.Spaces, 0) -} - func TestParseAPIV2UserFeed(t *testing.T) { data, err := os.ReadFile("test_responses/api_v2/user_feed_apiv2.json") if err != nil {