Fix fetching users and search

- Add is_deleted field on Users
- Fix fetching of tombstoned users including deleted users
- Fix "verified" bluechecks not being scraped anymore
- Fix search to use new graphql endpoint (old one got taken down)
This commit is contained in:
Alessio 2023-08-22 20:07:32 -03:00
parent e09c97a85e
commit eaa01a2360
17 changed files with 249 additions and 141 deletions

View File

@ -80,6 +80,12 @@ TODO: mobile-requests
- Check in `pkg/scraper/test_responses/mobile_requests` - Check in `pkg/scraper/test_responses/mobile_requests`
TODO: search-bottom-cursor
- Entry type "TimelineReplaceEntries" that replaces the cursor in the timeline instead of the new timeline having a new one
- As first step, need helper function that returns the []Instruction element in a APIV2Response (not just the MainInstruction which is TimelineAddEntries)
- since different requests have different paths to the Instructions, e.g., Data.User.Result.Timeline.Timeline.Instructions vs Data.Home.HomeTimelineUrt.Instructions, etc
Webserver Webserver
--------- ---------

View File

@ -13,6 +13,7 @@ create table users (rowid integer primary key,
is_private boolean default 0, is_private boolean default 0,
is_verified boolean default 0, is_verified boolean default 0,
is_banned boolean default 0, is_banned boolean default 0,
is_deleted boolean default 0,
profile_image_url text, profile_image_url text,
profile_image_local_path text, profile_image_local_path text,
banner_image_url text, banner_image_url text,

View File

@ -33,11 +33,11 @@ func (p Profile) SaveUser(u *scraper.User) error {
_, err := p.DB.NamedExec(` _, err := p.DB.NamedExec(`
insert into users (id, display_name, handle, bio, following_count, followers_count, location, website, join_date, is_private, insert into users (id, display_name, handle, bio, following_count, followers_count, location, website, join_date, is_private,
is_verified, is_banned, profile_image_url, profile_image_local_path, banner_image_url, banner_image_local_path, is_verified, is_banned, is_deleted, profile_image_url, profile_image_local_path, banner_image_url,
pinned_tweet_id, is_content_downloaded, is_id_fake) banner_image_local_path, pinned_tweet_id, is_content_downloaded, is_id_fake)
values (:id, :display_name, :handle, :bio, :following_count, :followers_count, :location, :website, :join_date, :is_private, values (:id, :display_name, :handle, :bio, :following_count, :followers_count, :location, :website, :join_date, :is_private,
:is_verified, :is_banned, :profile_image_url, :profile_image_local_path, :banner_image_url, :banner_image_local_path, :is_verified, :is_banned, :is_deleted, :profile_image_url, :profile_image_local_path, :banner_image_url,
:pinned_tweet_id, :is_content_downloaded, :is_id_fake) :banner_image_local_path, :pinned_tweet_id, :is_content_downloaded, :is_id_fake)
on conflict do update on conflict do update
set handle=:handle, set handle=:handle,
bio=:bio, bio=:bio,
@ -49,6 +49,7 @@ func (p Profile) SaveUser(u *scraper.User) error {
is_private=:is_private, is_private=:is_private,
is_verified=:is_verified, is_verified=:is_verified,
is_banned=:is_banned, is_banned=:is_banned,
is_deleted=:is_deleted,
profile_image_url=:profile_image_url, profile_image_url=:profile_image_url,
profile_image_local_path=:profile_image_local_path, profile_image_local_path=:profile_image_local_path,
banner_image_url=:banner_image_url, banner_image_url=:banner_image_url,
@ -100,8 +101,8 @@ func (p Profile) GetUserByHandle(handle scraper.UserHandle) (scraper.User, error
var ret scraper.User var ret scraper.User
err := db.Get(&ret, ` err := db.Get(&ret, `
select id, display_name, handle, bio, following_count, followers_count, location, website, join_date, is_private, is_verified, select id, display_name, handle, bio, following_count, followers_count, location, website, join_date, is_private, is_verified,
is_banned, profile_image_url, profile_image_local_path, banner_image_url, banner_image_local_path, pinned_tweet_id, is_banned, is_deleted, profile_image_url, profile_image_local_path, banner_image_url, banner_image_local_path,
is_content_downloaded, is_followed pinned_tweet_id, is_content_downloaded, is_followed
from users from users
where lower(handle) = lower(?) where lower(handle) = lower(?)
`, handle) `, handle)
@ -126,8 +127,8 @@ func (p Profile) GetUserByID(id scraper.UserID) (scraper.User, error) {
err := db.Get(&ret, ` err := db.Get(&ret, `
select id, display_name, handle, bio, following_count, followers_count, location, website, join_date, is_private, is_verified, select id, display_name, handle, bio, following_count, followers_count, location, website, join_date, is_private, is_verified,
is_banned, profile_image_url, profile_image_local_path, banner_image_url, banner_image_local_path, pinned_tweet_id, is_banned, is_deleted, profile_image_url, profile_image_local_path, banner_image_url, banner_image_local_path,
is_content_downloaded, is_followed pinned_tweet_id, is_content_downloaded, is_followed
from users from users
where id = ? where id = ?
`, id) `, id)
@ -262,8 +263,8 @@ func (p Profile) SearchUsers(s string) []scraper.User {
val := fmt.Sprintf("%%%s%%", s) val := fmt.Sprintf("%%%s%%", s)
err := p.DB.Select(&ret, ` err := p.DB.Select(&ret, `
select id, display_name, handle, bio, following_count, followers_count, location, website, join_date, is_private, is_verified, select id, display_name, handle, bio, following_count, followers_count, location, website, join_date, is_private, is_verified,
is_banned, profile_image_url, profile_image_local_path, banner_image_url, banner_image_local_path, pinned_tweet_id, is_banned, is_deleted, profile_image_url, profile_image_local_path, banner_image_url, banner_image_local_path,
is_content_downloaded, is_followed pinned_tweet_id, is_content_downloaded, is_followed
from users from users
where handle like ? where handle like ?
or display_name like ? or display_name like ?

View File

@ -52,6 +52,8 @@ var MIGRATIONS = []string{
(6, 'no longer exists', 'This Tweet is from an account that no longer exists')`, (6, 'no longer exists', 'This Tweet is from an account that no longer exists')`,
`alter table videos add column thumbnail_remote_url text not null default "missing"; `alter table videos add column thumbnail_remote_url text not null default "missing";
alter table videos add column thumbnail_local_filename text not null default "missing"`, alter table videos add column thumbnail_local_filename text not null default "missing"`,
// 5
`alter table videos add column duration integer not null default 0; `alter table videos add column duration integer not null default 0;
alter table videos add column view_count integer not null default 0`, alter table videos add column view_count integer not null default 0`,
`alter table users add column is_banned boolean default 0`, `alter table users add column is_banned boolean default 0`,
@ -59,6 +61,8 @@ var MIGRATIONS = []string{
`insert into tombstone_types (rowid, short_name, tombstone_text) values (7, 'age-restricted', 'Age-restricted adult content. ' `insert into tombstone_types (rowid, short_name, tombstone_text) values (7, 'age-restricted', 'Age-restricted adult content. '
|| 'This content might not be appropriate for people under 18 years old. To view this media, youll need to log in to Twitter')`, || 'This content might not be appropriate for people under 18 years old. To view this media, youll need to log in to Twitter')`,
`alter table users add column is_followed boolean default 0`, `alter table users add column is_followed boolean default 0`,
// 10
`create table fake_user_sequence(latest_fake_id integer not null); `create table fake_user_sequence(latest_fake_id integer not null);
insert into fake_user_sequence values(0x4000000000000000); insert into fake_user_sequence values(0x4000000000000000);
alter table users add column is_id_fake boolean default 0;`, alter table users add column is_id_fake boolean default 0;`,
@ -75,6 +79,8 @@ var MIGRATIONS = []string{
create index if not exists index_polls_tweet_id on polls (tweet_id); create index if not exists index_polls_tweet_id on polls (tweet_id);
create index if not exists index_images_tweet_id on images (tweet_id); create index if not exists index_images_tweet_id on images (tweet_id);
create index if not exists index_videos_tweet_id on videos (tweet_id);`, create index if not exists index_videos_tweet_id on videos (tweet_id);`,
// 15
`alter table spaces add column created_by_id integer references users(id); `alter table spaces add column created_by_id integer references users(id);
alter table spaces add column state text not null default ""; alter table spaces add column state text not null default "";
alter table spaces add column title text not null default ""; alter table spaces add column title text not null default "";
@ -116,12 +122,15 @@ var MIGRATIONS = []string{
foreign key(user_id) references users(id) foreign key(user_id) references users(id)
foreign key(tweet_id) references tweets(id) foreign key(tweet_id) references tweets(id)
);`, );`,
// 20
`create index if not exists index_tweets_posted_at on tweets (posted_at); `create index if not exists index_tweets_posted_at on tweets (posted_at);
create index if not exists index_retweets_retweeted_at on retweets (retweeted_at)`, create index if not exists index_retweets_retweeted_at on retweets (retweeted_at)`,
`update spaces set ended_at = ended_at/1000 where ended_at > strftime("%s")*500; `update spaces set ended_at = ended_at/1000 where ended_at > strftime("%s")*500;
update spaces set updated_at = updated_at/1000 where updated_at > strftime("%s")*500; update spaces set updated_at = updated_at/1000 where updated_at > strftime("%s")*500;
update spaces set started_at = started_at/1000 where started_at > strftime("%s")*500; update spaces set started_at = started_at/1000 where started_at > strftime("%s")*500;
update spaces set created_at = created_at/1000 where created_at > strftime("%s")*500;`, update spaces set created_at = created_at/1000 where created_at > strftime("%s")*500;`,
`alter table users add column is_deleted boolean default 0`,
} }
var ENGINE_DATABASE_VERSION = len(MIGRATIONS) var ENGINE_DATABASE_VERSION = len(MIGRATIONS)

View File

@ -6,26 +6,29 @@ import (
) )
type GraphqlVariables struct { type GraphqlVariables struct {
UserID UserID `json:"userId,string,omitempty"` UserID UserID `json:"userId,string,omitempty"`
FocalTweetID TweetID `json:"focalTweetId,string,omitempty"` ScreenName UserHandle `json:"screen_name,omitempty"`
Cursor string `json:"cursor,omitempty"` RawQuery string `json:"rawQuery,omitempty"`
WithRuxInjections bool `json:"with_rux_injections"` Product string `json:"product,omitempty"`
IncludePromotedContent bool `json:"includePromotedContent"` FocalTweetID TweetID `json:"focalTweetId,string,omitempty"`
Count int `json:"count,omitempty"` Cursor string `json:"cursor,omitempty"`
WithCommunity bool `json:"withCommunity"` WithRuxInjections bool `json:"with_rux_injections"`
WithQuickPromoteEligibilityTweetFields bool `json:"withQuickPromoteEligibilityTweetFields"` IncludePromotedContent bool `json:"includePromotedContent"`
WithSuperFollowsUserFields bool `json:"withSuperFollowsUserFields,omitempty"` Count int `json:"count,omitempty"`
WithBirdwatchPivots bool `json:"withBirdwatchPivots"` WithCommunity bool `json:"withCommunity"`
WithBirdwatchNotes bool `json:"withBirdwatchNotes,omitempty"` WithQuickPromoteEligibilityTweetFields bool `json:"withQuickPromoteEligibilityTweetFields"`
WithDownvotePerspective bool `json:"withDownvotePerspective"` WithSuperFollowsUserFields bool `json:"withSuperFollowsUserFields,omitempty"`
WithReactionsMetadata bool `json:"withReactionsMetadata"` WithBirdwatchPivots bool `json:"withBirdwatchPivots"`
WithReactionsPerspective bool `json:"withReactionsPerspective"` WithBirdwatchNotes bool `json:"withBirdwatchNotes,omitempty"`
WithSuperFollowsTweetFields bool `json:"withSuperFollowsTweetFields,omitempty"` WithDownvotePerspective bool `json:"withDownvotePerspective"`
WithVoice bool `json:"withVoice"` WithReactionsMetadata bool `json:"withReactionsMetadata"`
WithV2Timeline bool `json:"withV2Timeline"` WithReactionsPerspective bool `json:"withReactionsPerspective"`
FSInteractiveText bool `json:"__fs_interactive_text,omitempty"` WithSuperFollowsTweetFields bool `json:"withSuperFollowsTweetFields,omitempty"`
FSResponsiveWebUCGqlEnabled bool `json:"__fs_responsive_web_uc_gql_enabled,omitempty"` WithVoice bool `json:"withVoice"`
FSDontMentionMeViewApiEnabled bool `json:"__fs_dont_mention_me_view_api_enabled,omitempty"` WithV2Timeline bool `json:"withV2Timeline"`
FSInteractiveText bool `json:"__fs_interactive_text,omitempty"`
FSResponsiveWebUCGqlEnabled bool `json:"__fs_responsive_web_uc_gql_enabled,omitempty"`
FSDontMentionMeViewApiEnabled bool `json:"__fs_dont_mention_me_view_api_enabled,omitempty"`
// Spaces // Spaces
ID SpaceID `json:"id"` ID SpaceID `json:"id"`
@ -60,6 +63,11 @@ type GraphqlFeatures struct {
ResponsiveWebTextConversationsEnabled bool `json:"responsive_web_text_conversations_enabled"` ResponsiveWebTextConversationsEnabled bool `json:"responsive_web_text_conversations_enabled"`
ResponsiveWebTwitterArticleTweetConsumptionEnabled bool `json:"responsive_web_twitter_article_tweet_consumption_enabled"` ResponsiveWebTwitterArticleTweetConsumptionEnabled bool `json:"responsive_web_twitter_article_tweet_consumption_enabled"`
ResponsiveWebMediaDownloadVideoEnabled bool `json:"responsive_web_media_download_video_enabled"` ResponsiveWebMediaDownloadVideoEnabled bool `json:"responsive_web_media_download_video_enabled"`
SubscriptionsVerificationInfoVerifiedSinceEnabled bool `json:"subscriptions_verification_info_verified_since_enabled"`
HiddenProfileLikesEnabled bool `json:"hidden_profile_likes_enabled"`
HiddenProfileSubscriptionsEnabled bool `json:"hidden_profile_subscriptions_enabled"`
HighlightsTweetsTabUIEnabled bool `json:"highlights_tweets_tab_ui_enabled"`
SubscriptionsVerificationInfoIsIdentityVerifiedEnabled bool `json:"subscriptions_verification_info_is_identity_verified_enabled"` //nolint:lll // I didn't choose this field name
// Spaces // Spaces
Spaces2022H2Clipping bool `json:"spaces_2022_h2_clipping,omitempty"` Spaces2022H2Clipping bool `json:"spaces_2022_h2_clipping,omitempty"`

View File

@ -440,81 +440,3 @@ func (api *API) GetMoreReplies(tweet_id TweetID, response *TweetResponse, max_re
} }
return nil return nil
} }
func (api API) GetUser(handle UserHandle) (APIUser, error) {
// TODO: break up this URL into params so it's readable
url, err := url.Parse("https://api.twitter.com/graphql/4S2ihIKfF3xhp-ENxvUAfQ/UserByScreenName?variables=%7B%22screen_name%22%3A%22" +
string(handle) + "%22%2C%22withHighlightedLabel%22%3Atrue%7D")
if err != nil {
panic(err)
}
var result UserResponse
for retries := 0; retries < 3; retries += 1 {
result = UserResponse{} // Clear any previous result
err = api.do_http(url.String(), "", &result)
if err != nil {
return APIUser{}, err
}
if len(result.Errors) == 0 {
// Success; no retrying needed
break
}
if result.Errors[0].Code != 50 || result.Errors[0].Name == "NotFoundError" {
// Retry ONLY if the error is code 50 (random authentication failure)
// Do NOT retry on real errors
break
}
}
return result.ConvertToAPIUser(), err
}
func (api *API) Search(query string, cursor string) (TweetResponse, error) {
url, err := url.Parse("https://twitter.com/i/api/2/search/adaptive.json")
if err != nil {
panic(err)
}
queryParams := url.Query()
add_tweet_query_params(&queryParams)
queryParams.Add("count", "50")
queryParams.Add("spelling_corrections", "1")
queryParams.Add("query_source", "typed_query")
queryParams.Add("pc", "1")
queryParams.Add("q", query)
url.RawQuery = queryParams.Encode()
fmt.Println(url.RawQuery)
var result TweetResponse
err = api.do_http(url.String(), cursor, &result)
return result, err
}
func (api *API) GetMoreTweetsFromSearch(query string, response *TweetResponse, max_results int) error {
last_response := response
for last_response.GetCursor() != "" && len(response.GlobalObjects.Tweets) < max_results {
fresh_response, err := api.Search(query, last_response.GetCursor())
if err != nil {
return err
}
if fresh_response.GetCursor() == last_response.GetCursor() || len(fresh_response.GlobalObjects.Tweets) == 0 {
// Empty response, cursor same as previous: end of feed has been reached
return END_OF_FEED
}
last_response = &fresh_response
// Copy the results over
for id, tweet := range last_response.GlobalObjects.Tweets {
response.GlobalObjects.Tweets[id] = tweet
}
for id, user := range last_response.GlobalObjects.Users {
response.GlobalObjects.Users[id] = user
}
fmt.Printf("Have %d tweets\n", len(response.GlobalObjects.Tweets))
}
return nil
}

View File

@ -254,8 +254,15 @@ type APIUser struct {
type UserResponse struct { type UserResponse struct {
Data struct { Data struct {
User struct { User struct {
ID int64 `json:"rest_id,string"` Result struct {
Legacy APIUser `json:"legacy"` ID int64 `json:"rest_id,string"`
Legacy APIUser `json:"legacy"`
IsBlueVerified bool `json:"is_blue_verified"`
UnavailableMessage struct {
Text string `json:"text"`
} `json:"unavailable_message"`
Reason string `json:"reason"`
} `json:"result"`
} `json:"user"` } `json:"user"`
} `json:"data"` } `json:"data"`
Errors []struct { Errors []struct {
@ -266,8 +273,9 @@ type UserResponse struct {
} }
func (u UserResponse) ConvertToAPIUser() APIUser { func (u UserResponse) ConvertToAPIUser() APIUser {
ret := u.Data.User.Legacy ret := u.Data.User.Result.Legacy
ret.ID = u.Data.User.ID ret.ID = u.Data.User.Result.ID
ret.Verified = u.Data.User.Result.IsBlueVerified
// Banned users // Banned users
for _, api_error := range u.Errors { for _, api_error := range u.Errors {
@ -280,6 +288,18 @@ func (u UserResponse) ConvertToAPIUser() APIUser {
} }
} }
// Banned users, new version
if u.Data.User.Result.UnavailableMessage.Text != "" {
if u.Data.User.Result.Reason == "Suspended" {
ret.IsBanned = true
}
}
// Deleted users
if ret.ID == 0 && ret.ScreenName == "" && u.Data.User.Result.UnavailableMessage.Text == "" {
ret.DoesntExist = true
}
return ret return ret
} }

View File

@ -71,7 +71,7 @@ func TestUserProfileToAPIUser(t *testing.T) {
result := user_resp.ConvertToAPIUser() result := user_resp.ConvertToAPIUser()
assert.Equal(int64(44067298), result.ID) assert.Equal(int64(44067298), result.ID)
assert.Equal(user_resp.Data.User.Legacy.FollowersCount, result.FollowersCount) assert.Equal(user_resp.Data.User.Result.Legacy.FollowersCount, result.FollowersCount)
} }
func TestGetCursor(t *testing.T) { func TestGetCursor(t *testing.T) {

View File

@ -531,6 +531,13 @@ type APIV2Response struct {
ThreadedConversationWithInjectionsV2 struct { ThreadedConversationWithInjectionsV2 struct {
Instructions []APIV2Instruction `json:"instructions"` Instructions []APIV2Instruction `json:"instructions"`
} `json:"threaded_conversation_with_injections_v2"` } `json:"threaded_conversation_with_injections_v2"`
SearchByRawQuery struct {
SearchTimeline struct {
Timeline struct {
Instructions []APIV2Instruction `json:"instructions"`
} `json:"timeline"`
} `json:"search_timeline"`
} `json:"search_by_raw_query"`
} `json:"data"` } `json:"data"`
} }
@ -559,6 +566,12 @@ func (api_response APIV2Response) GetMainInstruction() *APIV2Instruction {
return &instructions[i] return &instructions[i]
} }
} }
instructions = api_response.Data.SearchByRawQuery.SearchTimeline.Timeline.Instructions
for i := range instructions {
if instructions[i].Type == "TimelineAddEntries" {
return &instructions[i]
}
}
panic("No 'TimelineAddEntries' found") panic("No 'TimelineAddEntries' found")
} }
@ -1005,3 +1018,121 @@ func (api API) GetHomeTimeline(cursor string, is_for_you bool) (TweetTrove, erro
func GetHomeTimeline(cursor string, is_for_you bool) (TweetTrove, error) { func GetHomeTimeline(cursor string, is_for_you bool) (TweetTrove, error) {
return the_api.GetHomeTimeline(cursor, is_for_you) return the_api.GetHomeTimeline(cursor, is_for_you)
} }
func (api API) GetUser(handle UserHandle) (APIUser, error) {
url, err := url.Parse(GraphqlURL{
BaseUrl: "https://api.twitter.com/graphql/SAMkL5y_N9pmahSw8yy6gw/UserByScreenName",
Variables: GraphqlVariables{
ScreenName: handle,
Count: 20,
IncludePromotedContent: false,
WithSuperFollowsUserFields: true,
WithDownvotePerspective: false,
WithReactionsMetadata: false,
WithReactionsPerspective: false,
WithSuperFollowsTweetFields: true,
WithBirdwatchNotes: false,
WithVoice: true,
WithV2Timeline: false,
},
Features: GraphqlFeatures{
ResponsiveWebTwitterBlueVerifiedBadgeIsEnabled: true,
VerifiedPhoneLabelEnabled: false,
ResponsiveWebGraphqlTimelineNavigationEnabled: true,
UnifiedCardsAdMetadataContainerDynamicCardContentQueryEnabled: true,
TweetypieUnmentionOptimizationEnabled: true,
ResponsiveWebUcGqlEnabled: true,
VibeApiEnabled: true,
ResponsiveWebEditTweetApiEnabled: true,
GraphqlIsTranslatableRWebTweetIsTranslatableEnabled: true,
StandardizedNudgesMisinfo: true,
TweetWithVisibilityResultsPreferGqlLimitedActionsPolicyEnabled: false,
InteractiveTextEnabled: true,
ResponsiveWebTextConversationsEnabled: false,
ResponsiveWebEnhanceCardsEnabled: true,
},
}.String())
if err != nil {
panic(err)
}
var response UserResponse
err = api.do_http(url.String(), "", &response)
if err != nil {
panic(err)
}
return response.ConvertToAPIUser(), nil
}
func (api *API) Search(query string, cursor string) (APIV2Response, error) {
url, err := url.Parse(GraphqlURL{
BaseUrl: "https://twitter.com/i/api/graphql/NA567V_8AFwu0cZEkAAKcw/SearchTimeline",
Variables: GraphqlVariables{
RawQuery: query,
Count: 50,
Product: "Top",
Cursor: cursor,
IncludePromotedContent: false,
WithSuperFollowsUserFields: true,
WithDownvotePerspective: false,
WithReactionsMetadata: false,
WithReactionsPerspective: false,
WithSuperFollowsTweetFields: true,
WithBirdwatchNotes: false,
WithVoice: true,
WithV2Timeline: false,
},
Features: GraphqlFeatures{
ResponsiveWebTwitterBlueVerifiedBadgeIsEnabled: true,
VerifiedPhoneLabelEnabled: false,
ResponsiveWebGraphqlTimelineNavigationEnabled: true,
UnifiedCardsAdMetadataContainerDynamicCardContentQueryEnabled: true,
TweetypieUnmentionOptimizationEnabled: true,
ResponsiveWebUcGqlEnabled: true,
VibeApiEnabled: true,
ResponsiveWebEditTweetApiEnabled: true,
GraphqlIsTranslatableRWebTweetIsTranslatableEnabled: true,
StandardizedNudgesMisinfo: true,
TweetWithVisibilityResultsPreferGqlLimitedActionsPolicyEnabled: false,
InteractiveTextEnabled: true,
ResponsiveWebTextConversationsEnabled: false,
ResponsiveWebEnhanceCardsEnabled: true,
},
}.String())
if err != nil {
panic(err)
}
var result APIV2Response
err = api.do_http(url.String(), cursor, &result)
return result, err
}
func (api *API) GetMoreTweetsFromSearch(query string, response *APIV2Response, max_results int) error {
last_response := response
for last_response.GetCursorBottom() != "" && len(response.GetMainInstruction().Entries) < max_results {
fresh_response, err := api.Search(query, last_response.GetCursorBottom())
if err != nil {
return err
}
if fresh_response.GetCursorBottom() == last_response.GetCursorBottom() || len(fresh_response.GetMainInstruction().Entries) == 0 {
// Empty response, cursor same as previous: end of feed has been reached
return END_OF_FEED
}
last_response = &fresh_response
// Copy the results over
// Copy over the entries
response.GetMainInstruction().Entries = append(
response.GetMainInstruction().Entries,
last_response.GetMainInstruction().Entries...)
fmt.Printf("Have %d tweets\n", len(response.GetMainInstruction().Entries))
}
fmt.Println()
fmt.Printf("Cursor bottom: %q\n", last_response.GetCursorBottom())
fmt.Printf("Entries count: %d\n", len(response.GetMainInstruction().Entries))
return nil
}

View File

@ -18,13 +18,13 @@ func TimestampToDateString(timestamp int) string {
* - videos * - videos
*/ */
func Search(query string, min_results int) (trove TweetTrove, err error) { func Search(query string, min_results int) (trove TweetTrove, err error) {
tweet_response, err := the_api.Search(query, "") api_response, err := the_api.Search(query, "")
if err != nil { if err != nil {
return return
} }
if len(tweet_response.GlobalObjects.Tweets) < min_results && tweet_response.GetCursor() != "" { if len(api_response.GetMainInstruction().Entries) < min_results && api_response.GetCursorBottom() != "" {
err = the_api.GetMoreTweetsFromSearch(query, &tweet_response, min_results) err = the_api.GetMoreTweetsFromSearch(query, &api_response, min_results)
if errors.Is(err, END_OF_FEED) { if errors.Is(err, END_OF_FEED) {
println("End of feed!") println("End of feed!")
} else if err != nil { } else if err != nil {
@ -32,7 +32,7 @@ func Search(query string, min_results int) (trove TweetTrove, err error) {
} }
} }
trove, err = tweet_response.ToTweetTrove() trove, err = api_response.ToTweetTrove()
if err != nil { if err != nil {
err = fmt.Errorf("Error parsing the tweet trove for search query %q:\n %w", query, err) err = fmt.Errorf("Error parsing the tweet trove for search query %q:\n %w", query, err)
return return

View File

@ -0,0 +1 @@
{"data":{"user":{"result":{"__typename":"UserUnavailable","unavailable_message":{"rtl":false,"text":"Twitter suspends accounts that violate the Twitter Rules. Learn more","entities":[{"fromIndex":58,"toIndex":68,"ref":{"type":"TimelineUrl","url":"https://help.twitter.com/rules-and-policies/twitter-rules","urlType":"ExternalUrl"}}]},"reason":"Suspended"}}}}

View File

@ -1 +1 @@
{"errors":[{"message":"User 'GregCunningham0' not found","locations":[{"line":126,"column":3}],"path":["user"],"extensions":{"name":"NotFoundError","source":"Server","code":50,"kind":"NonFatal","tracing":{"trace_id":"2c6f690015d9f18e"}},"code":50,"kind":"NonFatal","name":"NotFoundError","source":"Server","tracing":{"trace_id":"2c6f690015d9f18e"}}],"data":{}} {"data":{}}

View File

@ -1 +1 @@
{"data":{"user":{"id":"VXNlcjo0NDA2NzI5OA==","rest_id":"44067298","affiliates_highlighted_label":{},"legacy":{"created_at":"Tue Jun 02 05:35:52 +0000 2009","default_profile":false,"default_profile_image":false,"description":"Author of Dear Reader, The New Right & The Anarchist Handbook\nHost of \"YOUR WELCOME\" \nSubject of Ego & Hubris by Harvey Pekar\nUnderwear Model\nHe/Him ⚑","entities":{"description":{"urls":[]},"url":{"urls":[{"display_url":"amzn.to/3oInafv","expanded_url":"https://amzn.to/3oInafv","url":"https://t.co/7VDFOOtFK2","indices":[0,23]}]}},"fast_followers_count":0,"favourites_count":2782,"followers_count":208589,"friends_count":941,"has_custom_timelines":false,"is_translator":false,"listed_count":1058,"location":"Brooklyn","media_count":8124,"name":"Michael Malice","normal_followers_count":208589,"pinned_tweet_ids_str":["1403835414373339136"],"profile_banner_extensions":{"mediaColor":{"r":{"ok":{"palette":[{"percentage":60.59,"rgb":{"blue":0,"green":0,"red":0}},{"percentage":18.77,"rgb":{"blue":64,"green":60,"red":156}},{"percentage":3.62,"rgb":{"blue":31,"green":29,"red":77}},{"percentage":3.22,"rgb":{"blue":215,"green":199,"red":138}},{"percentage":2.83,"rgb":{"blue":85,"green":79,"red":215}}]}}}},"profile_banner_url":"https://pbs.twimg.com/profile_banners/44067298/1615134676","profile_image_extensions":{"mediaColor":{"r":{"ok":{"palette":[{"percentage":74.71,"rgb":{"blue":31,"green":32,"red":36}},{"percentage":14.09,"rgb":{"blue":100,"green":129,"red":186}},{"percentage":4.84,"rgb":{"blue":96,"green":108,"red":118}},{"percentage":2.18,"rgb":{"blue":71,"green":61,"red":54}},{"percentage":2.11,"rgb":{"blue":37,"green":56,"red":99}}]}}}},"profile_image_url_https":"https://pbs.twimg.com/profile_images/1064051934812913664/Lbwdb_C9_normal.jpg","profile_interstitial_type":"","protected":false,"screen_name":"michaelmalice","statuses_count":126641,"translator_type":"none","url":"https://t.co/7VDFOOtFK2","verified":true,"withheld_in_countries":[]},"legacy_extended_profile":{},"is_profile_translatable":false}}} {"data":{"user":{"result":{"__typename":"User","id":"VXNlcjo0NDA2NzI5OA==","rest_id":"44067298","affiliates_highlighted_label":{},"has_graduated_access":true,"is_blue_verified":true,"profile_image_shape":"Circle","legacy":{"can_dm":false,"can_media_tag":false,"created_at":"Tue Jun 02 05:35:52 +0000 2009","default_profile":false,"default_profile_image":false,"description":"Author: Dear Reader, The New Right, The Anarchist Handbook & The White Pill \nHost: \"YOUR WELCOME\" \nSubject: Ego & Hubris by Harvey Pekar\nHe/Him ⚑","entities":{"description":{"urls":[]},"url":{"urls":[{"display_url":"amzn.to/3oInafv","expanded_url":"https://amzn.to/3oInafv","url":"https://t.co/7VDFOOtFK2","indices":[0,23]}]}},"fast_followers_count":0,"favourites_count":4784,"followers_count":649484,"friends_count":1035,"has_custom_timelines":true,"is_translator":false,"listed_count":2196,"location":"Austin","media_count":13275,"name":"Michael Malice","normal_followers_count":649484,"pinned_tweet_ids_str":["1692611652397453790"],"possibly_sensitive":false,"profile_banner_url":"https://pbs.twimg.com/profile_banners/44067298/1664774013","profile_image_url_https":"https://pbs.twimg.com/profile_images/1415820415314931715/_VVX4GI8_normal.jpg","profile_interstitial_type":"","screen_name":"michaelmalice","statuses_count":160843,"translator_type":"none","url":"https://t.co/7VDFOOtFK2","verified":false,"want_retweets":false,"withheld_in_countries":[]},"smart_blocked_by":false,"smart_blocking":false,"legacy_extended_profile":{},"is_profile_translatable":false,"has_hidden_subscriptions_on_profile":false,"verification_info":{"reason":{"description":{"text":"This account is verified. Learn more","entities":[{"from_index":26,"to_index":36,"ref":{"url":"https://help.twitter.com/managing-your-account/about-twitter-verified-accounts","url_type":"ExternalUrl"}}]},"verified_since_msec":"1469093319523"}},"highlights_info":{"can_highlight_tweets":true,"highlighted_tweets":"0"},"business_account":{},"creator_subscriptions_count":0}}}}

View File

@ -1 +0,0 @@
{"errors":[{"message":"Authorization: User has been suspended. (63)","locations":[{"line":17,"column":3}],"path":["user","legacy"],"extensions":{"name":"AuthorizationError","source":"Client","code":63,"kind":"Permissions","tracing":{"trace_id":"c9f5c3bc1afaab46"}},"code":63,"kind":"Permissions","name":"AuthorizationError","source":"Client","tracing":{"trace_id":"c9f5c3bc1afaab46"}}],"data":{"user":{"id":"VXNlcjoxOTM5MTg1NTA=","rest_id":"193918550","affiliates_highlighted_label":{},"legacy_extended_profile":{},"is_profile_translatable":false}}}

View File

@ -248,18 +248,23 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
* returns: the single Tweet * returns: the single Tweet
*/ */
func GetTweet(id TweetID) (Tweet, error) { func GetTweet(id TweetID) (Tweet, error) {
tweet_response, err := the_api.GetTweet(id, "") resp, err := the_api.GetTweetDetail(id, "")
if err != nil { if err != nil {
return Tweet{}, fmt.Errorf("Error in API call:\n %w", err) return Tweet{}, fmt.Errorf("Error getting tweet detail: %d\n %w", id, err)
}
trove, err := resp.ToTweetTrove()
if err != nil {
return Tweet{}, err
} }
single_tweet, ok := tweet_response.GlobalObjects.Tweets[fmt.Sprint(id)] // Find the main tweet and update its "is_conversation_downloaded" and "last_scraped_at"
tweet, ok := trove.Tweets[id]
if !ok { if !ok {
return Tweet{}, fmt.Errorf("Didn't get the tweet!") panic("Trove didn't contain its own tweet!")
} }
tweet.LastScrapedAt = Timestamp{time.Now()}
return ParseSingleTweet(single_tweet) tweet.IsConversationScraped = true
return tweet, nil
} }
/** /**

View File

@ -175,8 +175,14 @@ func ParseSingleUser(apiUser APIUser) (ret User, err error) {
// Calls API#GetUser and returns the parsed result // Calls API#GetUser and returns the parsed result
func GetUser(handle UserHandle) (User, error) { func GetUser(handle UserHandle) (User, error) {
apiUser, err := the_api.GetUser(handle) apiUser, err := NewGuestSession().GetUser(handle)
if apiUser.ScreenName == "" { if apiUser.ScreenName == "" {
if apiUser.IsBanned || apiUser.DoesntExist {
ret := GetUnknownUserWithHandle(handle)
ret.IsBanned = apiUser.IsBanned
ret.IsDeleted = apiUser.DoesntExist
return ret, nil
}
apiUser.ScreenName = string(handle) apiUser.ScreenName = string(handle)
} }
if err != nil { if err != nil {

View File

@ -31,22 +31,22 @@ func TestParseSingleUser(t *testing.T) {
assert.Equal(UserID(44067298), user.ID) assert.Equal(UserID(44067298), user.ID)
assert.Equal("Michael Malice", user.DisplayName) assert.Equal("Michael Malice", user.DisplayName)
assert.Equal(UserHandle("michaelmalice"), user.Handle) assert.Equal(UserHandle("michaelmalice"), user.Handle)
assert.Equal("Author of Dear Reader, The New Right & The Anarchist Handbook\nHost of \"YOUR WELCOME\" \nSubject of Ego & Hubris by "+ assert.Equal("Author: Dear Reader, The New Right, The Anarchist Handbook & The White Pill \n"+
"Harvey Pekar\nUnderwear Model\nHe/Him ⚑", user.Bio) "Host: \"YOUR WELCOME\" \nSubject: Ego & Hubris by Harvey Pekar\nHe/Him ⚑", user.Bio)
assert.Equal(941, user.FollowingCount) assert.Equal(1035, user.FollowingCount)
assert.Equal(208589, user.FollowersCount) assert.Equal(649484, user.FollowersCount)
assert.Equal("Brooklyn", user.Location) assert.Equal("Austin", user.Location)
assert.Equal("https://amzn.to/3oInafv", user.Website) assert.Equal("https://amzn.to/3oInafv", user.Website)
assert.Equal(int64(1243920952), user.JoinDate.Unix()) assert.Equal(int64(1243920952), user.JoinDate.Unix())
assert.False(user.IsPrivate) assert.False(user.IsPrivate)
assert.True(user.IsVerified) assert.True(user.IsVerified)
assert.False(user.IsBanned) assert.False(user.IsBanned)
assert.Equal("https://pbs.twimg.com/profile_images/1064051934812913664/Lbwdb_C9.jpg", user.ProfileImageUrl) assert.Equal("https://pbs.twimg.com/profile_images/1415820415314931715/_VVX4GI8.jpg", user.ProfileImageUrl)
assert.Equal("https://pbs.twimg.com/profile_images/1064051934812913664/Lbwdb_C9_normal.jpg", user.GetTinyProfileImageUrl()) assert.Equal("https://pbs.twimg.com/profile_images/1415820415314931715/_VVX4GI8_normal.jpg", user.GetTinyProfileImageUrl())
assert.Equal("https://pbs.twimg.com/profile_banners/44067298/1615134676", user.BannerImageUrl) assert.Equal("https://pbs.twimg.com/profile_banners/44067298/1664774013", user.BannerImageUrl)
assert.Equal("michaelmalice_profile_Lbwdb_C9.jpg", user.ProfileImageLocalPath) assert.Equal("michaelmalice_profile__VVX4GI8.jpg", user.ProfileImageLocalPath)
assert.Equal("michaelmalice_banner_1615134676.jpg", user.BannerImageLocalPath) assert.Equal("michaelmalice_banner_1664774013.jpg", user.BannerImageLocalPath)
assert.Equal(TweetID(1403835414373339136), user.PinnedTweetID) assert.Equal(TweetID(1692611652397453790), user.PinnedTweetID)
} }
/** /**
@ -54,7 +54,7 @@ func TestParseSingleUser(t *testing.T) {
*/ */
func TestParseBannedUser(t *testing.T) { func TestParseBannedUser(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)
data, err := os.ReadFile("test_responses/suspended_user.json") data, err := os.ReadFile("test_responses/api_v2/user_suspended.json")
if err != nil { if err != nil {
panic(err) panic(err)
} }
@ -66,7 +66,6 @@ func TestParseBannedUser(t *testing.T) {
user, err := ParseSingleUser(apiUser) user, err := ParseSingleUser(apiUser)
require.NoError(t, err) require.NoError(t, err)
assert.Equal(UserID(193918550), user.ID)
assert.True(user.IsBanned) assert.True(user.IsBanned)
// Test generation of profile images for banned user // Test generation of profile images for banned user