diff --git a/doc/TODO.txt b/doc/TODO.txt index 4d425e8..21cd2de 100644 --- a/doc/TODO.txt +++ b/doc/TODO.txt @@ -80,6 +80,12 @@ TODO: mobile-requests - Check in `pkg/scraper/test_responses/mobile_requests` +TODO: search-bottom-cursor +- Entry type "TimelineReplaceEntries" that replaces the cursor in the timeline instead of the new timeline having a new one +- As first step, need helper function that returns the []Instruction element in a APIV2Response (not just the MainInstruction which is TimelineAddEntries) + - since different requests have different paths to the Instructions, e.g., Data.User.Result.Timeline.Timeline.Instructions vs Data.Home.HomeTimelineUrt.Instructions, etc + + Webserver --------- diff --git a/pkg/persistence/schema.sql b/pkg/persistence/schema.sql index 2e0592e..0cb9820 100644 --- a/pkg/persistence/schema.sql +++ b/pkg/persistence/schema.sql @@ -13,6 +13,7 @@ create table users (rowid integer primary key, is_private boolean default 0, is_verified boolean default 0, is_banned boolean default 0, + is_deleted boolean default 0, profile_image_url text, profile_image_local_path text, banner_image_url text, diff --git a/pkg/persistence/user_queries.go b/pkg/persistence/user_queries.go index 4eceec6..67f3fd1 100644 --- a/pkg/persistence/user_queries.go +++ b/pkg/persistence/user_queries.go @@ -33,11 +33,11 @@ func (p Profile) SaveUser(u *scraper.User) error { _, err := p.DB.NamedExec(` insert into users (id, display_name, handle, bio, following_count, followers_count, location, website, join_date, is_private, - is_verified, is_banned, profile_image_url, profile_image_local_path, banner_image_url, banner_image_local_path, - pinned_tweet_id, is_content_downloaded, is_id_fake) + is_verified, is_banned, is_deleted, profile_image_url, profile_image_local_path, banner_image_url, + banner_image_local_path, pinned_tweet_id, is_content_downloaded, is_id_fake) values (:id, :display_name, :handle, :bio, :following_count, :followers_count, :location, :website, :join_date, :is_private, - :is_verified, :is_banned, :profile_image_url, :profile_image_local_path, :banner_image_url, :banner_image_local_path, - :pinned_tweet_id, :is_content_downloaded, :is_id_fake) + :is_verified, :is_banned, :is_deleted, :profile_image_url, :profile_image_local_path, :banner_image_url, + :banner_image_local_path, :pinned_tweet_id, :is_content_downloaded, :is_id_fake) on conflict do update set handle=:handle, bio=:bio, @@ -49,6 +49,7 @@ func (p Profile) SaveUser(u *scraper.User) error { is_private=:is_private, is_verified=:is_verified, is_banned=:is_banned, + is_deleted=:is_deleted, profile_image_url=:profile_image_url, profile_image_local_path=:profile_image_local_path, banner_image_url=:banner_image_url, @@ -100,8 +101,8 @@ func (p Profile) GetUserByHandle(handle scraper.UserHandle) (scraper.User, error var ret scraper.User err := db.Get(&ret, ` select id, display_name, handle, bio, following_count, followers_count, location, website, join_date, is_private, is_verified, - is_banned, profile_image_url, profile_image_local_path, banner_image_url, banner_image_local_path, pinned_tweet_id, - is_content_downloaded, is_followed + is_banned, is_deleted, profile_image_url, profile_image_local_path, banner_image_url, banner_image_local_path, + pinned_tweet_id, is_content_downloaded, is_followed from users where lower(handle) = lower(?) `, handle) @@ -126,8 +127,8 @@ func (p Profile) GetUserByID(id scraper.UserID) (scraper.User, error) { err := db.Get(&ret, ` select id, display_name, handle, bio, following_count, followers_count, location, website, join_date, is_private, is_verified, - is_banned, profile_image_url, profile_image_local_path, banner_image_url, banner_image_local_path, pinned_tweet_id, - is_content_downloaded, is_followed + is_banned, is_deleted, profile_image_url, profile_image_local_path, banner_image_url, banner_image_local_path, + pinned_tweet_id, is_content_downloaded, is_followed from users where id = ? `, id) @@ -262,8 +263,8 @@ func (p Profile) SearchUsers(s string) []scraper.User { val := fmt.Sprintf("%%%s%%", s) err := p.DB.Select(&ret, ` select id, display_name, handle, bio, following_count, followers_count, location, website, join_date, is_private, is_verified, - is_banned, profile_image_url, profile_image_local_path, banner_image_url, banner_image_local_path, pinned_tweet_id, - is_content_downloaded, is_followed + is_banned, is_deleted, profile_image_url, profile_image_local_path, banner_image_url, banner_image_local_path, + pinned_tweet_id, is_content_downloaded, is_followed from users where handle like ? or display_name like ? diff --git a/pkg/persistence/versions.go b/pkg/persistence/versions.go index 3b9df86..23f05d6 100644 --- a/pkg/persistence/versions.go +++ b/pkg/persistence/versions.go @@ -52,6 +52,8 @@ var MIGRATIONS = []string{ (6, 'no longer exists', 'This Tweet is from an account that no longer exists')`, `alter table videos add column thumbnail_remote_url text not null default "missing"; alter table videos add column thumbnail_local_filename text not null default "missing"`, + + // 5 `alter table videos add column duration integer not null default 0; alter table videos add column view_count integer not null default 0`, `alter table users add column is_banned boolean default 0`, @@ -59,6 +61,8 @@ var MIGRATIONS = []string{ `insert into tombstone_types (rowid, short_name, tombstone_text) values (7, 'age-restricted', 'Age-restricted adult content. ' || 'This content might not be appropriate for people under 18 years old. To view this media, you’ll need to log in to Twitter')`, `alter table users add column is_followed boolean default 0`, + + // 10 `create table fake_user_sequence(latest_fake_id integer not null); insert into fake_user_sequence values(0x4000000000000000); alter table users add column is_id_fake boolean default 0;`, @@ -75,6 +79,8 @@ var MIGRATIONS = []string{ create index if not exists index_polls_tweet_id on polls (tweet_id); create index if not exists index_images_tweet_id on images (tweet_id); create index if not exists index_videos_tweet_id on videos (tweet_id);`, + + // 15 `alter table spaces add column created_by_id integer references users(id); alter table spaces add column state text not null default ""; alter table spaces add column title text not null default ""; @@ -116,12 +122,15 @@ var MIGRATIONS = []string{ foreign key(user_id) references users(id) foreign key(tweet_id) references tweets(id) );`, + + // 20 `create index if not exists index_tweets_posted_at on tweets (posted_at); create index if not exists index_retweets_retweeted_at on retweets (retweeted_at)`, `update spaces set ended_at = ended_at/1000 where ended_at > strftime("%s")*500; update spaces set updated_at = updated_at/1000 where updated_at > strftime("%s")*500; update spaces set started_at = started_at/1000 where started_at > strftime("%s")*500; update spaces set created_at = created_at/1000 where created_at > strftime("%s")*500;`, + `alter table users add column is_deleted boolean default 0`, } var ENGINE_DATABASE_VERSION = len(MIGRATIONS) diff --git a/pkg/scraper/api_graphql_utils.go b/pkg/scraper/api_graphql_utils.go index fb8e54e..7c072c7 100644 --- a/pkg/scraper/api_graphql_utils.go +++ b/pkg/scraper/api_graphql_utils.go @@ -6,26 +6,29 @@ import ( ) type GraphqlVariables struct { - UserID UserID `json:"userId,string,omitempty"` - FocalTweetID TweetID `json:"focalTweetId,string,omitempty"` - Cursor string `json:"cursor,omitempty"` - WithRuxInjections bool `json:"with_rux_injections"` - IncludePromotedContent bool `json:"includePromotedContent"` - Count int `json:"count,omitempty"` - WithCommunity bool `json:"withCommunity"` - WithQuickPromoteEligibilityTweetFields bool `json:"withQuickPromoteEligibilityTweetFields"` - WithSuperFollowsUserFields bool `json:"withSuperFollowsUserFields,omitempty"` - WithBirdwatchPivots bool `json:"withBirdwatchPivots"` - WithBirdwatchNotes bool `json:"withBirdwatchNotes,omitempty"` - WithDownvotePerspective bool `json:"withDownvotePerspective"` - WithReactionsMetadata bool `json:"withReactionsMetadata"` - WithReactionsPerspective bool `json:"withReactionsPerspective"` - WithSuperFollowsTweetFields bool `json:"withSuperFollowsTweetFields,omitempty"` - WithVoice bool `json:"withVoice"` - WithV2Timeline bool `json:"withV2Timeline"` - FSInteractiveText bool `json:"__fs_interactive_text,omitempty"` - FSResponsiveWebUCGqlEnabled bool `json:"__fs_responsive_web_uc_gql_enabled,omitempty"` - FSDontMentionMeViewApiEnabled bool `json:"__fs_dont_mention_me_view_api_enabled,omitempty"` + UserID UserID `json:"userId,string,omitempty"` + ScreenName UserHandle `json:"screen_name,omitempty"` + RawQuery string `json:"rawQuery,omitempty"` + Product string `json:"product,omitempty"` + FocalTweetID TweetID `json:"focalTweetId,string,omitempty"` + Cursor string `json:"cursor,omitempty"` + WithRuxInjections bool `json:"with_rux_injections"` + IncludePromotedContent bool `json:"includePromotedContent"` + Count int `json:"count,omitempty"` + WithCommunity bool `json:"withCommunity"` + WithQuickPromoteEligibilityTweetFields bool `json:"withQuickPromoteEligibilityTweetFields"` + WithSuperFollowsUserFields bool `json:"withSuperFollowsUserFields,omitempty"` + WithBirdwatchPivots bool `json:"withBirdwatchPivots"` + WithBirdwatchNotes bool `json:"withBirdwatchNotes,omitempty"` + WithDownvotePerspective bool `json:"withDownvotePerspective"` + WithReactionsMetadata bool `json:"withReactionsMetadata"` + WithReactionsPerspective bool `json:"withReactionsPerspective"` + WithSuperFollowsTweetFields bool `json:"withSuperFollowsTweetFields,omitempty"` + WithVoice bool `json:"withVoice"` + WithV2Timeline bool `json:"withV2Timeline"` + FSInteractiveText bool `json:"__fs_interactive_text,omitempty"` + FSResponsiveWebUCGqlEnabled bool `json:"__fs_responsive_web_uc_gql_enabled,omitempty"` + FSDontMentionMeViewApiEnabled bool `json:"__fs_dont_mention_me_view_api_enabled,omitempty"` // Spaces ID SpaceID `json:"id"` @@ -60,6 +63,11 @@ type GraphqlFeatures struct { ResponsiveWebTextConversationsEnabled bool `json:"responsive_web_text_conversations_enabled"` ResponsiveWebTwitterArticleTweetConsumptionEnabled bool `json:"responsive_web_twitter_article_tweet_consumption_enabled"` ResponsiveWebMediaDownloadVideoEnabled bool `json:"responsive_web_media_download_video_enabled"` + SubscriptionsVerificationInfoVerifiedSinceEnabled bool `json:"subscriptions_verification_info_verified_since_enabled"` + HiddenProfileLikesEnabled bool `json:"hidden_profile_likes_enabled"` + HiddenProfileSubscriptionsEnabled bool `json:"hidden_profile_subscriptions_enabled"` + HighlightsTweetsTabUIEnabled bool `json:"highlights_tweets_tab_ui_enabled"` + SubscriptionsVerificationInfoIsIdentityVerifiedEnabled bool `json:"subscriptions_verification_info_is_identity_verified_enabled"` //nolint:lll // I didn't choose this field name // Spaces Spaces2022H2Clipping bool `json:"spaces_2022_h2_clipping,omitempty"` diff --git a/pkg/scraper/api_request_utils.go b/pkg/scraper/api_request_utils.go index c8dce3e..ad8b658 100644 --- a/pkg/scraper/api_request_utils.go +++ b/pkg/scraper/api_request_utils.go @@ -440,81 +440,3 @@ func (api *API) GetMoreReplies(tweet_id TweetID, response *TweetResponse, max_re } return nil } - -func (api API) GetUser(handle UserHandle) (APIUser, error) { - // TODO: break up this URL into params so it's readable - url, err := url.Parse("https://api.twitter.com/graphql/4S2ihIKfF3xhp-ENxvUAfQ/UserByScreenName?variables=%7B%22screen_name%22%3A%22" + - string(handle) + "%22%2C%22withHighlightedLabel%22%3Atrue%7D") - if err != nil { - panic(err) - } - - var result UserResponse - for retries := 0; retries < 3; retries += 1 { - result = UserResponse{} // Clear any previous result - err = api.do_http(url.String(), "", &result) - if err != nil { - return APIUser{}, err - } - - if len(result.Errors) == 0 { - // Success; no retrying needed - break - } - - if result.Errors[0].Code != 50 || result.Errors[0].Name == "NotFoundError" { - // Retry ONLY if the error is code 50 (random authentication failure) - // Do NOT retry on real errors - break - } - } - - return result.ConvertToAPIUser(), err -} - -func (api *API) Search(query string, cursor string) (TweetResponse, error) { - url, err := url.Parse("https://twitter.com/i/api/2/search/adaptive.json") - if err != nil { - panic(err) - } - - queryParams := url.Query() - add_tweet_query_params(&queryParams) - queryParams.Add("count", "50") - queryParams.Add("spelling_corrections", "1") - queryParams.Add("query_source", "typed_query") - queryParams.Add("pc", "1") - queryParams.Add("q", query) - url.RawQuery = queryParams.Encode() - fmt.Println(url.RawQuery) - - var result TweetResponse - err = api.do_http(url.String(), cursor, &result) - return result, err -} - -func (api *API) GetMoreTweetsFromSearch(query string, response *TweetResponse, max_results int) error { - last_response := response - for last_response.GetCursor() != "" && len(response.GlobalObjects.Tweets) < max_results { - fresh_response, err := api.Search(query, last_response.GetCursor()) - if err != nil { - return err - } - if fresh_response.GetCursor() == last_response.GetCursor() || len(fresh_response.GlobalObjects.Tweets) == 0 { - // Empty response, cursor same as previous: end of feed has been reached - return END_OF_FEED - } - - last_response = &fresh_response - - // Copy the results over - for id, tweet := range last_response.GlobalObjects.Tweets { - response.GlobalObjects.Tweets[id] = tweet - } - for id, user := range last_response.GlobalObjects.Users { - response.GlobalObjects.Users[id] = user - } - fmt.Printf("Have %d tweets\n", len(response.GlobalObjects.Tweets)) - } - return nil -} diff --git a/pkg/scraper/api_types.go b/pkg/scraper/api_types.go index 35702f8..b0c9567 100644 --- a/pkg/scraper/api_types.go +++ b/pkg/scraper/api_types.go @@ -254,8 +254,15 @@ type APIUser struct { type UserResponse struct { Data struct { User struct { - ID int64 `json:"rest_id,string"` - Legacy APIUser `json:"legacy"` + Result struct { + ID int64 `json:"rest_id,string"` + Legacy APIUser `json:"legacy"` + IsBlueVerified bool `json:"is_blue_verified"` + UnavailableMessage struct { + Text string `json:"text"` + } `json:"unavailable_message"` + Reason string `json:"reason"` + } `json:"result"` } `json:"user"` } `json:"data"` Errors []struct { @@ -266,8 +273,9 @@ type UserResponse struct { } func (u UserResponse) ConvertToAPIUser() APIUser { - ret := u.Data.User.Legacy - ret.ID = u.Data.User.ID + ret := u.Data.User.Result.Legacy + ret.ID = u.Data.User.Result.ID + ret.Verified = u.Data.User.Result.IsBlueVerified // Banned users for _, api_error := range u.Errors { @@ -280,6 +288,18 @@ func (u UserResponse) ConvertToAPIUser() APIUser { } } + // Banned users, new version + if u.Data.User.Result.UnavailableMessage.Text != "" { + if u.Data.User.Result.Reason == "Suspended" { + ret.IsBanned = true + } + } + + // Deleted users + if ret.ID == 0 && ret.ScreenName == "" && u.Data.User.Result.UnavailableMessage.Text == "" { + ret.DoesntExist = true + } + return ret } diff --git a/pkg/scraper/api_types_test.go b/pkg/scraper/api_types_test.go index 04d433f..731bfad 100644 --- a/pkg/scraper/api_types_test.go +++ b/pkg/scraper/api_types_test.go @@ -71,7 +71,7 @@ func TestUserProfileToAPIUser(t *testing.T) { result := user_resp.ConvertToAPIUser() assert.Equal(int64(44067298), result.ID) - assert.Equal(user_resp.Data.User.Legacy.FollowersCount, result.FollowersCount) + assert.Equal(user_resp.Data.User.Result.Legacy.FollowersCount, result.FollowersCount) } func TestGetCursor(t *testing.T) { diff --git a/pkg/scraper/api_types_v2.go b/pkg/scraper/api_types_v2.go index b9ab1d8..cb4b898 100644 --- a/pkg/scraper/api_types_v2.go +++ b/pkg/scraper/api_types_v2.go @@ -531,6 +531,13 @@ type APIV2Response struct { ThreadedConversationWithInjectionsV2 struct { Instructions []APIV2Instruction `json:"instructions"` } `json:"threaded_conversation_with_injections_v2"` + SearchByRawQuery struct { + SearchTimeline struct { + Timeline struct { + Instructions []APIV2Instruction `json:"instructions"` + } `json:"timeline"` + } `json:"search_timeline"` + } `json:"search_by_raw_query"` } `json:"data"` } @@ -559,6 +566,12 @@ func (api_response APIV2Response) GetMainInstruction() *APIV2Instruction { return &instructions[i] } } + instructions = api_response.Data.SearchByRawQuery.SearchTimeline.Timeline.Instructions + for i := range instructions { + if instructions[i].Type == "TimelineAddEntries" { + return &instructions[i] + } + } panic("No 'TimelineAddEntries' found") } @@ -1005,3 +1018,121 @@ func (api API) GetHomeTimeline(cursor string, is_for_you bool) (TweetTrove, erro func GetHomeTimeline(cursor string, is_for_you bool) (TweetTrove, error) { return the_api.GetHomeTimeline(cursor, is_for_you) } + +func (api API) GetUser(handle UserHandle) (APIUser, error) { + url, err := url.Parse(GraphqlURL{ + BaseUrl: "https://api.twitter.com/graphql/SAMkL5y_N9pmahSw8yy6gw/UserByScreenName", + Variables: GraphqlVariables{ + ScreenName: handle, + Count: 20, + IncludePromotedContent: false, + WithSuperFollowsUserFields: true, + WithDownvotePerspective: false, + WithReactionsMetadata: false, + WithReactionsPerspective: false, + WithSuperFollowsTweetFields: true, + WithBirdwatchNotes: false, + WithVoice: true, + WithV2Timeline: false, + }, + Features: GraphqlFeatures{ + ResponsiveWebTwitterBlueVerifiedBadgeIsEnabled: true, + VerifiedPhoneLabelEnabled: false, + ResponsiveWebGraphqlTimelineNavigationEnabled: true, + UnifiedCardsAdMetadataContainerDynamicCardContentQueryEnabled: true, + TweetypieUnmentionOptimizationEnabled: true, + ResponsiveWebUcGqlEnabled: true, + VibeApiEnabled: true, + ResponsiveWebEditTweetApiEnabled: true, + GraphqlIsTranslatableRWebTweetIsTranslatableEnabled: true, + StandardizedNudgesMisinfo: true, + TweetWithVisibilityResultsPreferGqlLimitedActionsPolicyEnabled: false, + InteractiveTextEnabled: true, + ResponsiveWebTextConversationsEnabled: false, + ResponsiveWebEnhanceCardsEnabled: true, + }, + }.String()) + if err != nil { + panic(err) + } + + var response UserResponse + err = api.do_http(url.String(), "", &response) + if err != nil { + panic(err) + } + + return response.ConvertToAPIUser(), nil +} + +func (api *API) Search(query string, cursor string) (APIV2Response, error) { + url, err := url.Parse(GraphqlURL{ + BaseUrl: "https://twitter.com/i/api/graphql/NA567V_8AFwu0cZEkAAKcw/SearchTimeline", + Variables: GraphqlVariables{ + RawQuery: query, + Count: 50, + Product: "Top", + Cursor: cursor, + IncludePromotedContent: false, + WithSuperFollowsUserFields: true, + WithDownvotePerspective: false, + WithReactionsMetadata: false, + WithReactionsPerspective: false, + WithSuperFollowsTweetFields: true, + WithBirdwatchNotes: false, + WithVoice: true, + WithV2Timeline: false, + }, + Features: GraphqlFeatures{ + ResponsiveWebTwitterBlueVerifiedBadgeIsEnabled: true, + VerifiedPhoneLabelEnabled: false, + ResponsiveWebGraphqlTimelineNavigationEnabled: true, + UnifiedCardsAdMetadataContainerDynamicCardContentQueryEnabled: true, + TweetypieUnmentionOptimizationEnabled: true, + ResponsiveWebUcGqlEnabled: true, + VibeApiEnabled: true, + ResponsiveWebEditTweetApiEnabled: true, + GraphqlIsTranslatableRWebTweetIsTranslatableEnabled: true, + StandardizedNudgesMisinfo: true, + TweetWithVisibilityResultsPreferGqlLimitedActionsPolicyEnabled: false, + InteractiveTextEnabled: true, + ResponsiveWebTextConversationsEnabled: false, + ResponsiveWebEnhanceCardsEnabled: true, + }, + }.String()) + if err != nil { + panic(err) + } + + var result APIV2Response + err = api.do_http(url.String(), cursor, &result) + return result, err +} + +func (api *API) GetMoreTweetsFromSearch(query string, response *APIV2Response, max_results int) error { + last_response := response + for last_response.GetCursorBottom() != "" && len(response.GetMainInstruction().Entries) < max_results { + fresh_response, err := api.Search(query, last_response.GetCursorBottom()) + if err != nil { + return err + } + if fresh_response.GetCursorBottom() == last_response.GetCursorBottom() || len(fresh_response.GetMainInstruction().Entries) == 0 { + // Empty response, cursor same as previous: end of feed has been reached + return END_OF_FEED + } + + last_response = &fresh_response + + // Copy the results over + // Copy over the entries + response.GetMainInstruction().Entries = append( + response.GetMainInstruction().Entries, + last_response.GetMainInstruction().Entries...) + + fmt.Printf("Have %d tweets\n", len(response.GetMainInstruction().Entries)) + } + fmt.Println() + fmt.Printf("Cursor bottom: %q\n", last_response.GetCursorBottom()) + fmt.Printf("Entries count: %d\n", len(response.GetMainInstruction().Entries)) + return nil +} diff --git a/pkg/scraper/search.go b/pkg/scraper/search.go index 597e378..38f6560 100644 --- a/pkg/scraper/search.go +++ b/pkg/scraper/search.go @@ -18,13 +18,13 @@ func TimestampToDateString(timestamp int) string { * - videos */ func Search(query string, min_results int) (trove TweetTrove, err error) { - tweet_response, err := the_api.Search(query, "") + api_response, err := the_api.Search(query, "") if err != nil { return } - if len(tweet_response.GlobalObjects.Tweets) < min_results && tweet_response.GetCursor() != "" { - err = the_api.GetMoreTweetsFromSearch(query, &tweet_response, min_results) + if len(api_response.GetMainInstruction().Entries) < min_results && api_response.GetCursorBottom() != "" { + err = the_api.GetMoreTweetsFromSearch(query, &api_response, min_results) if errors.Is(err, END_OF_FEED) { println("End of feed!") } else if err != nil { @@ -32,7 +32,7 @@ func Search(query string, min_results int) (trove TweetTrove, err error) { } } - trove, err = tweet_response.ToTweetTrove() + trove, err = api_response.ToTweetTrove() if err != nil { err = fmt.Errorf("Error parsing the tweet trove for search query %q:\n %w", query, err) return diff --git a/pkg/scraper/test_responses/api_v2/user_suspended.json b/pkg/scraper/test_responses/api_v2/user_suspended.json new file mode 100644 index 0000000..1fd1c02 --- /dev/null +++ b/pkg/scraper/test_responses/api_v2/user_suspended.json @@ -0,0 +1 @@ +{"data":{"user":{"result":{"__typename":"UserUnavailable","unavailable_message":{"rtl":false,"text":"Twitter suspends accounts that violate the Twitter Rules. Learn more","entities":[{"fromIndex":58,"toIndex":68,"ref":{"type":"TimelineUrl","url":"https://help.twitter.com/rules-and-policies/twitter-rules","urlType":"ExternalUrl"}}]},"reason":"Suspended"}}}} diff --git a/pkg/scraper/test_responses/deleted_user.json b/pkg/scraper/test_responses/deleted_user.json index d50c55e..394541d 100644 --- a/pkg/scraper/test_responses/deleted_user.json +++ b/pkg/scraper/test_responses/deleted_user.json @@ -1 +1 @@ -{"errors":[{"message":"User 'GregCunningham0' not found","locations":[{"line":126,"column":3}],"path":["user"],"extensions":{"name":"NotFoundError","source":"Server","code":50,"kind":"NonFatal","tracing":{"trace_id":"2c6f690015d9f18e"}},"code":50,"kind":"NonFatal","name":"NotFoundError","source":"Server","tracing":{"trace_id":"2c6f690015d9f18e"}}],"data":{}} +{"data":{}} diff --git a/pkg/scraper/test_responses/michael_malice_user_profile.json b/pkg/scraper/test_responses/michael_malice_user_profile.json index 57aa80f..51e322d 100644 --- a/pkg/scraper/test_responses/michael_malice_user_profile.json +++ b/pkg/scraper/test_responses/michael_malice_user_profile.json @@ -1 +1 @@ -{"data":{"user":{"id":"VXNlcjo0NDA2NzI5OA==","rest_id":"44067298","affiliates_highlighted_label":{},"legacy":{"created_at":"Tue Jun 02 05:35:52 +0000 2009","default_profile":false,"default_profile_image":false,"description":"Author of Dear Reader, The New Right & The Anarchist Handbook\nHost of \"YOUR WELCOME\" \nSubject of Ego & Hubris by Harvey Pekar\nUnderwear Model\nHe/Him ⚑","entities":{"description":{"urls":[]},"url":{"urls":[{"display_url":"amzn.to/3oInafv","expanded_url":"https://amzn.to/3oInafv","url":"https://t.co/7VDFOOtFK2","indices":[0,23]}]}},"fast_followers_count":0,"favourites_count":2782,"followers_count":208589,"friends_count":941,"has_custom_timelines":false,"is_translator":false,"listed_count":1058,"location":"Brooklyn","media_count":8124,"name":"Michael Malice","normal_followers_count":208589,"pinned_tweet_ids_str":["1403835414373339136"],"profile_banner_extensions":{"mediaColor":{"r":{"ok":{"palette":[{"percentage":60.59,"rgb":{"blue":0,"green":0,"red":0}},{"percentage":18.77,"rgb":{"blue":64,"green":60,"red":156}},{"percentage":3.62,"rgb":{"blue":31,"green":29,"red":77}},{"percentage":3.22,"rgb":{"blue":215,"green":199,"red":138}},{"percentage":2.83,"rgb":{"blue":85,"green":79,"red":215}}]}}}},"profile_banner_url":"https://pbs.twimg.com/profile_banners/44067298/1615134676","profile_image_extensions":{"mediaColor":{"r":{"ok":{"palette":[{"percentage":74.71,"rgb":{"blue":31,"green":32,"red":36}},{"percentage":14.09,"rgb":{"blue":100,"green":129,"red":186}},{"percentage":4.84,"rgb":{"blue":96,"green":108,"red":118}},{"percentage":2.18,"rgb":{"blue":71,"green":61,"red":54}},{"percentage":2.11,"rgb":{"blue":37,"green":56,"red":99}}]}}}},"profile_image_url_https":"https://pbs.twimg.com/profile_images/1064051934812913664/Lbwdb_C9_normal.jpg","profile_interstitial_type":"","protected":false,"screen_name":"michaelmalice","statuses_count":126641,"translator_type":"none","url":"https://t.co/7VDFOOtFK2","verified":true,"withheld_in_countries":[]},"legacy_extended_profile":{},"is_profile_translatable":false}}} +{"data":{"user":{"result":{"__typename":"User","id":"VXNlcjo0NDA2NzI5OA==","rest_id":"44067298","affiliates_highlighted_label":{},"has_graduated_access":true,"is_blue_verified":true,"profile_image_shape":"Circle","legacy":{"can_dm":false,"can_media_tag":false,"created_at":"Tue Jun 02 05:35:52 +0000 2009","default_profile":false,"default_profile_image":false,"description":"Author: Dear Reader, The New Right, The Anarchist Handbook & The White Pill \nHost: \"YOUR WELCOME\" \nSubject: Ego & Hubris by Harvey Pekar\nHe/Him ⚑","entities":{"description":{"urls":[]},"url":{"urls":[{"display_url":"amzn.to/3oInafv","expanded_url":"https://amzn.to/3oInafv","url":"https://t.co/7VDFOOtFK2","indices":[0,23]}]}},"fast_followers_count":0,"favourites_count":4784,"followers_count":649484,"friends_count":1035,"has_custom_timelines":true,"is_translator":false,"listed_count":2196,"location":"Austin","media_count":13275,"name":"Michael Malice","normal_followers_count":649484,"pinned_tweet_ids_str":["1692611652397453790"],"possibly_sensitive":false,"profile_banner_url":"https://pbs.twimg.com/profile_banners/44067298/1664774013","profile_image_url_https":"https://pbs.twimg.com/profile_images/1415820415314931715/_VVX4GI8_normal.jpg","profile_interstitial_type":"","screen_name":"michaelmalice","statuses_count":160843,"translator_type":"none","url":"https://t.co/7VDFOOtFK2","verified":false,"want_retweets":false,"withheld_in_countries":[]},"smart_blocked_by":false,"smart_blocking":false,"legacy_extended_profile":{},"is_profile_translatable":false,"has_hidden_subscriptions_on_profile":false,"verification_info":{"reason":{"description":{"text":"This account is verified. Learn more","entities":[{"from_index":26,"to_index":36,"ref":{"url":"https://help.twitter.com/managing-your-account/about-twitter-verified-accounts","url_type":"ExternalUrl"}}]},"verified_since_msec":"1469093319523"}},"highlights_info":{"can_highlight_tweets":true,"highlighted_tweets":"0"},"business_account":{},"creator_subscriptions_count":0}}}} diff --git a/pkg/scraper/test_responses/suspended_user.json b/pkg/scraper/test_responses/suspended_user.json deleted file mode 100644 index 457750c..0000000 --- a/pkg/scraper/test_responses/suspended_user.json +++ /dev/null @@ -1 +0,0 @@ -{"errors":[{"message":"Authorization: User has been suspended. (63)","locations":[{"line":17,"column":3}],"path":["user","legacy"],"extensions":{"name":"AuthorizationError","source":"Client","code":63,"kind":"Permissions","tracing":{"trace_id":"c9f5c3bc1afaab46"}},"code":63,"kind":"Permissions","name":"AuthorizationError","source":"Client","tracing":{"trace_id":"c9f5c3bc1afaab46"}}],"data":{"user":{"id":"VXNlcjoxOTM5MTg1NTA=","rest_id":"193918550","affiliates_highlighted_label":{},"legacy_extended_profile":{},"is_profile_translatable":false}}} diff --git a/pkg/scraper/tweet.go b/pkg/scraper/tweet.go index c28894e..3638d81 100644 --- a/pkg/scraper/tweet.go +++ b/pkg/scraper/tweet.go @@ -248,18 +248,23 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) { * returns: the single Tweet */ func GetTweet(id TweetID) (Tweet, error) { - tweet_response, err := the_api.GetTweet(id, "") + resp, err := the_api.GetTweetDetail(id, "") if err != nil { - return Tweet{}, fmt.Errorf("Error in API call:\n %w", err) + return Tweet{}, fmt.Errorf("Error getting tweet detail: %d\n %w", id, err) + } + trove, err := resp.ToTweetTrove() + if err != nil { + return Tweet{}, err } - single_tweet, ok := tweet_response.GlobalObjects.Tweets[fmt.Sprint(id)] - + // Find the main tweet and update its "is_conversation_downloaded" and "last_scraped_at" + tweet, ok := trove.Tweets[id] if !ok { - return Tweet{}, fmt.Errorf("Didn't get the tweet!") + panic("Trove didn't contain its own tweet!") } - - return ParseSingleTweet(single_tweet) + tweet.LastScrapedAt = Timestamp{time.Now()} + tweet.IsConversationScraped = true + return tweet, nil } /** diff --git a/pkg/scraper/user.go b/pkg/scraper/user.go index f7a028c..e8fad20 100644 --- a/pkg/scraper/user.go +++ b/pkg/scraper/user.go @@ -175,8 +175,14 @@ func ParseSingleUser(apiUser APIUser) (ret User, err error) { // Calls API#GetUser and returns the parsed result func GetUser(handle UserHandle) (User, error) { - apiUser, err := the_api.GetUser(handle) + apiUser, err := NewGuestSession().GetUser(handle) if apiUser.ScreenName == "" { + if apiUser.IsBanned || apiUser.DoesntExist { + ret := GetUnknownUserWithHandle(handle) + ret.IsBanned = apiUser.IsBanned + ret.IsDeleted = apiUser.DoesntExist + return ret, nil + } apiUser.ScreenName = string(handle) } if err != nil { diff --git a/pkg/scraper/user_test.go b/pkg/scraper/user_test.go index 34fb0cd..6fe3742 100644 --- a/pkg/scraper/user_test.go +++ b/pkg/scraper/user_test.go @@ -31,22 +31,22 @@ func TestParseSingleUser(t *testing.T) { assert.Equal(UserID(44067298), user.ID) assert.Equal("Michael Malice", user.DisplayName) assert.Equal(UserHandle("michaelmalice"), user.Handle) - assert.Equal("Author of Dear Reader, The New Right & The Anarchist Handbook\nHost of \"YOUR WELCOME\" \nSubject of Ego & Hubris by "+ - "Harvey Pekar\nUnderwear Model\nHe/Him ⚑", user.Bio) - assert.Equal(941, user.FollowingCount) - assert.Equal(208589, user.FollowersCount) - assert.Equal("Brooklyn", user.Location) + assert.Equal("Author: Dear Reader, The New Right, The Anarchist Handbook & The White Pill \n"+ + "Host: \"YOUR WELCOME\" \nSubject: Ego & Hubris by Harvey Pekar\nHe/Him ⚑", user.Bio) + assert.Equal(1035, user.FollowingCount) + assert.Equal(649484, user.FollowersCount) + assert.Equal("Austin", user.Location) assert.Equal("https://amzn.to/3oInafv", user.Website) assert.Equal(int64(1243920952), user.JoinDate.Unix()) assert.False(user.IsPrivate) assert.True(user.IsVerified) assert.False(user.IsBanned) - assert.Equal("https://pbs.twimg.com/profile_images/1064051934812913664/Lbwdb_C9.jpg", user.ProfileImageUrl) - assert.Equal("https://pbs.twimg.com/profile_images/1064051934812913664/Lbwdb_C9_normal.jpg", user.GetTinyProfileImageUrl()) - assert.Equal("https://pbs.twimg.com/profile_banners/44067298/1615134676", user.BannerImageUrl) - assert.Equal("michaelmalice_profile_Lbwdb_C9.jpg", user.ProfileImageLocalPath) - assert.Equal("michaelmalice_banner_1615134676.jpg", user.BannerImageLocalPath) - assert.Equal(TweetID(1403835414373339136), user.PinnedTweetID) + assert.Equal("https://pbs.twimg.com/profile_images/1415820415314931715/_VVX4GI8.jpg", user.ProfileImageUrl) + assert.Equal("https://pbs.twimg.com/profile_images/1415820415314931715/_VVX4GI8_normal.jpg", user.GetTinyProfileImageUrl()) + assert.Equal("https://pbs.twimg.com/profile_banners/44067298/1664774013", user.BannerImageUrl) + assert.Equal("michaelmalice_profile__VVX4GI8.jpg", user.ProfileImageLocalPath) + assert.Equal("michaelmalice_banner_1664774013.jpg", user.BannerImageLocalPath) + assert.Equal(TweetID(1692611652397453790), user.PinnedTweetID) } /** @@ -54,7 +54,7 @@ func TestParseSingleUser(t *testing.T) { */ func TestParseBannedUser(t *testing.T) { assert := assert.New(t) - data, err := os.ReadFile("test_responses/suspended_user.json") + data, err := os.ReadFile("test_responses/api_v2/user_suspended.json") if err != nil { panic(err) } @@ -66,7 +66,6 @@ func TestParseBannedUser(t *testing.T) { user, err := ParseSingleUser(apiUser) require.NoError(t, err) - assert.Equal(UserID(193918550), user.ID) assert.True(user.IsBanned) // Test generation of profile images for banned user