diff --git a/pkg/scraper/api_obsolete_requests.go b/pkg/scraper/api_obsolete_requests.go index 78790cf..12a4174 100644 --- a/pkg/scraper/api_obsolete_requests.go +++ b/pkg/scraper/api_obsolete_requests.go @@ -13,7 +13,7 @@ import ( const API_CONVERSATION_BASE_PATH = "https://twitter.com/i/api/2/timeline/conversation/" const API_USER_TIMELINE_BASE_PATH = "https://api.twitter.com/2/timeline/profile/" -func (api API) GetFeedFor(user_id UserID, cursor string) (TweetResponse, error) { +func (api API) GetFeedFor(user_id UserID, cursor string) (APIv1Response, error) { url, err := url.Parse(fmt.Sprintf("%s%d.json", API_USER_TIMELINE_BASE_PATH, user_id)) if err != nil { panic(err) @@ -22,7 +22,7 @@ func (api API) GetFeedFor(user_id UserID, cursor string) (TweetResponse, error) add_tweet_query_params(&queryParams) url.RawQuery = queryParams.Encode() - var result TweetResponse + var result APIv1Response err = api.do_http(url.String(), cursor, &result) return result, err @@ -33,10 +33,10 @@ func (api API) GetFeedFor(user_id UserID, cursor string) (TweetResponse, error) * * args: * - user_id: the user's UserID - * - response: an "out" parameter; the TweetResponse that tweets, RTs and users will be appended to + * - response: an "out" parameter; the APIv1Response that tweets, RTs and users will be appended to * - min_tweets: the desired minimum amount of tweets to get */ -func (api API) GetMoreTweetsFromFeed(user_id UserID, response *TweetResponse, min_tweets int) error { +func (api API) GetMoreTweetsFromFeed(user_id UserID, response *APIv1Response, min_tweets int) error { last_response := response for last_response.GetCursor() != "" && len(response.GlobalObjects.Tweets) < min_tweets { fresh_response, err := api.GetFeedFor(user_id, last_response.GetCursor()) @@ -121,7 +121,7 @@ func GetTweetFull(id TweetID, how_many int) (trove TweetTrove, err error) { } } - // This has to be called BEFORE ToTweetTrove, because it modifies the TweetResponse (adds tombstone tweets to its tweets list) + // This has to be called BEFORE ToTweetTrove, because it modifies the APIv1Response (adds tombstone tweets to its tweets list) tombstoned_users := tweet_response.HandleTombstones() trove, err = tweet_response.ToTweetTrove() @@ -150,7 +150,7 @@ func GetTweetFull(id TweetID, how_many int) (trove TweetTrove, err error) { return } -func (api *API) GetTweet(id TweetID, cursor string) (TweetResponse, error) { +func (api *API) GetTweet(id TweetID, cursor string) (APIv1Response, error) { url, err := url.Parse(fmt.Sprintf("%s%d.json", API_CONVERSATION_BASE_PATH, id)) if err != nil { panic(err) @@ -162,13 +162,13 @@ func (api *API) GetTweet(id TweetID, cursor string) (TweetResponse, error) { add_tweet_query_params(&queryParams) url.RawQuery = queryParams.Encode() - var result TweetResponse + var result APIv1Response err = api.do_http(url.String(), cursor, &result) return result, err } // Resend the request to get more replies if necessary -func (api *API) GetMoreReplies(tweet_id TweetID, response *TweetResponse, max_replies int) error { +func (api *API) GetMoreReplies(tweet_id TweetID, response *APIv1Response, max_replies int) error { last_response := response for last_response.GetCursor() != "" && len(response.GlobalObjects.Tweets) < max_replies { fresh_response, err := api.GetTweet(tweet_id, last_response.GetCursor()) diff --git a/pkg/scraper/api_types.go b/pkg/scraper/api_types.go index cae2874..4991221 100644 --- a/pkg/scraper/api_types.go +++ b/pkg/scraper/api_types.go @@ -735,7 +735,7 @@ func (u UserResponse) ConvertToAPIUser() (APIUser, error) { return ret, nil } -type Entry struct { +type APIv1Entry struct { EntryID string `json:"entryId"` SortIndex int64 `json:"sortIndex,string"` Content struct { @@ -769,17 +769,13 @@ type Entry struct { } `json:"content"` } -func (e Entry) GetTombstoneText() string { - return e.Content.Item.Content.Tombstone.TombstoneInfo.RichText.Text -} - -type SortableEntries []Entry +type SortableEntries []APIv1Entry func (e SortableEntries) Len() int { return len(e) } func (e SortableEntries) Swap(i, j int) { e[i], e[j] = e[j], e[i] } func (e SortableEntries) Less(i, j int) bool { return e[i].SortIndex > e[j].SortIndex } -type TweetResponse struct { +type APIv1Response struct { GlobalObjects struct { Tweets map[string]APITweet `json:"tweets"` Users map[string]APIUser `json:"users"` @@ -791,7 +787,7 @@ type TweetResponse struct { Entries SortableEntries `json:"entries"` } `json:"addEntries"` ReplaceEntry struct { - Entry Entry + Entry APIv1Entry } `json:"replaceEntry"` MarkEntriesUnreadGreaterThanSortIndex struct { SortIndex int64 `json:"sortIndex,string"` @@ -819,11 +815,9 @@ var tombstone_types = map[string]string{ "This Post is from an account that no longer exists. Learn more": "no longer exists", } -/** - * Insert tweets into GlobalObjects for each tombstone. Returns a list of users that need to - * be fetched for tombstones. - */ -func (t *TweetResponse) HandleTombstones() []UserHandle { +// Insert tweets into GlobalObjects for each tombstone. Returns a list of users that need to +// be fetched for tombstones. +func (t *APIv1Response) HandleTombstones() []UserHandle { ret := []UserHandle{} // Handle tombstones in quote-tweets @@ -857,7 +851,7 @@ func (t *TweetResponse) HandleTombstones() []UserHandle { entries := t.Timeline.Instructions[0].AddEntries.Entries sort.Sort(entries) for i, entry := range entries { - if entry.GetTombstoneText() != "" { + if entry.Content.Item.Content.Tombstone.TombstoneInfo.RichText.Text != "" { // Try to reconstruct the tombstone tweet var tombstoned_tweet APITweet tombstoned_tweet.ID = int64(i) // Set a default to prevent clobbering other tombstones @@ -880,9 +874,10 @@ func (t *TweetResponse) HandleTombstones() []UserHandle { tombstoned_tweet.InReplyToStatusID = prev_tweet_id } - short_text, ok := tombstone_types[entry.GetTombstoneText()] + short_text, ok := tombstone_types[entry.Content.Item.Content.Tombstone.TombstoneInfo.RichText.Text] if !ok { - panic(fmt.Errorf("Unknown tombstone text %q:\n %w", entry.GetTombstoneText(), EXTERNAL_API_ERROR)) + panic(fmt.Errorf("Unknown tombstone text %q:\n %w", + entry.Content.Item.Content.Tombstone.TombstoneInfo.RichText.Text, EXTERNAL_API_ERROR)) } tombstoned_tweet.TombstoneText = short_text @@ -894,7 +889,7 @@ func (t *TweetResponse) HandleTombstones() []UserHandle { return ret } -func (t *TweetResponse) GetCursor() string { +func (t *APIv1Response) GetCursorBottom() string { // TODO: is this function used anywhere other than Notifications? for _, instr := range t.Timeline.Instructions { if len(instr.AddEntries.Entries) > 0 { @@ -914,7 +909,7 @@ func (t *TweetResponse) GetCursor() string { return "" } -func (t *TweetResponse) GetCursorTop() string { +func (t *APIv1Response) GetCursorTop() string { for _, instr := range t.Timeline.Instructions { for _, entry := range instr.AddEntries.Entries { if strings.Contains(entry.EntryID, "cursor-top") { @@ -925,13 +920,11 @@ func (t *TweetResponse) GetCursorTop() string { return "" } -/** - * Test for one case of end-of-feed. Cursor increments on each request for some reason, but - * there's no new content. This seems to happen when there's a pinned tweet. - * - * In this case, we look for an "entries" object that has only cursors in it, and no tweets. - */ -func (t *TweetResponse) IsEndOfFeed() bool { +// Test for one case of end-of-feed. Cursor increments on each request for some reason, but +// there's no new content. This seems to happen when there's a pinned tweet. +// +// In this case, we look for an "entries" object that has only cursors in it, and no tweets. +func (t *APIv1Response) IsEndOfFeed() bool { for _, instr := range t.Timeline.Instructions { entries := instr.AddEntries.Entries if len(entries) == 0 { @@ -949,7 +942,7 @@ func (t *TweetResponse) IsEndOfFeed() bool { return true } -func (t *TweetResponse) ToTweetTrove() (TweetTrove, error) { +func (t *APIv1Response) ToTweetTrove() (TweetTrove, error) { ret := NewTweetTrove() for _, single_tweet := range t.GlobalObjects.Tweets { diff --git a/pkg/scraper/api_types_notifications.go b/pkg/scraper/api_types_notifications.go index 3c1701c..8165093 100644 --- a/pkg/scraper/api_types_notifications.go +++ b/pkg/scraper/api_types_notifications.go @@ -13,7 +13,7 @@ import ( ) // TODO: pagination -func (api *API) GetNotificationsPage(cursor string) (TweetResponse, error) { +func (api *API) GetNotificationsPage(cursor string) (APIv1Response, error) { url, err := url.Parse("https://api.twitter.com/2/notifications/all.json") if err != nil { panic(err) @@ -23,7 +23,7 @@ func (api *API) GetNotificationsPage(cursor string) (TweetResponse, error) { add_tweet_query_params(&query) url.RawQuery = query.Encode() - var result TweetResponse + var result APIv1Response err = api.do_http(url.String(), cursor, &result) return result, err @@ -41,7 +41,7 @@ func (api *API) GetNotifications(how_many int) (TweetTrove, int64, error) { } for len(trove.Notifications) < how_many { - resp, err = api.GetNotificationsPage(resp.GetCursor()) + resp, err = api.GetNotificationsPage(resp.GetCursorBottom()) if errors.Is(err, ErrRateLimited) { log.Warnf("Rate limited!") break @@ -86,7 +86,7 @@ func (api *API) MarkNotificationsAsRead() error { } // Check a Notifications result for unread notifications. Returns `0` if there are none. -func (t TweetResponse) CheckUnreadNotifications() int64 { +func (t APIv1Response) CheckUnreadNotifications() int64 { for _, instr := range t.Timeline.Instructions { if instr.MarkEntriesUnreadGreaterThanSortIndex.SortIndex != 0 { return instr.MarkEntriesUnreadGreaterThanSortIndex.SortIndex @@ -129,7 +129,7 @@ func (api *API) GetNotificationDetailForAll(trove TweetTrove, to_scrape []Notifi return trove, nil } -func (t *TweetResponse) ToTweetTroveAsNotifications(current_user_id UserID) (TweetTrove, error) { +func (t *APIv1Response) ToTweetTroveAsNotifications(current_user_id UserID) (TweetTrove, error) { ret, err := t.ToTweetTrove() if err != nil { return TweetTrove{}, err @@ -251,7 +251,7 @@ func ParseSingleNotification(n APINotification) Notification { return ret } -func (api *API) GetNotificationDetail(n Notification) (TweetResponse, error) { +func (api *API) GetNotificationDetail(n Notification) (APIv1Response, error) { url, err := url.Parse(fmt.Sprintf("https://twitter.com/i/api/2/notifications/view/%s.json", n.ID)) if err != nil { panic(err) @@ -261,13 +261,13 @@ func (api *API) GetNotificationDetail(n Notification) (TweetResponse, error) { add_tweet_query_params(&query) url.RawQuery = query.Encode() - var result TweetResponse + var result APIv1Response err = api.do_http(url.String(), "", &result) return result, err } -func (t *TweetResponse) ToTweetTroveAsNotificationDetail() (TweetTrove, []TweetID, error) { +func (t *APIv1Response) ToTweetTroveAsNotificationDetail() (TweetTrove, []TweetID, error) { ids := []TweetID{} ret, err := t.ToTweetTrove() if err != nil { diff --git a/pkg/scraper/api_types_notifications_test.go b/pkg/scraper/api_types_notifications_test.go index 36bee27..62ec7f1 100644 --- a/pkg/scraper/api_types_notifications_test.go +++ b/pkg/scraper/api_types_notifications_test.go @@ -18,7 +18,7 @@ func TestParseNotificationsPage(t *testing.T) { data, err := os.ReadFile("test_responses/notifications/notifications_response_first_page.json") require.NoError(err) - var resp TweetResponse + var resp APIv1Response err = json.Unmarshal(data, &resp) require.NoError(err) @@ -164,7 +164,7 @@ func TestParseNotificationsPage(t *testing.T) { assert.Equal(int64(1724566381021), resp.CheckUnreadNotifications()) // Test cursor-bottom - bottom_cursor := resp.GetCursor() + bottom_cursor := resp.GetCursorBottom() assert.Equal("DAACDAABCgABFKncQJGVgAQIAAIAAAABCAADSQ3bEQgABIsN6BEACwACAAAAC0FaRkxRSXFNLTJJAAA", bottom_cursor) assert.False(resp.IsEndOfFeed()) @@ -178,7 +178,7 @@ func TestParseNotificationsEndOfFeed(t *testing.T) { data, err := os.ReadFile("test_responses/notifications/notifications_end_of_feed.json") require.NoError(err) - var resp TweetResponse + var resp APIv1Response err = json.Unmarshal(data, &resp) require.NoError(err) @@ -191,7 +191,7 @@ func TestParseNotificationDetail(t *testing.T) { data, err := os.ReadFile("test_responses/notifications/notification_detail.json") require.NoError(err) - var resp TweetResponse + var resp APIv1Response err = json.Unmarshal(data, &resp) require.NoError(err) diff --git a/pkg/scraper/api_types_test.go b/pkg/scraper/api_types_test.go index 01dc7a9..44ea95c 100644 --- a/pkg/scraper/api_types_test.go +++ b/pkg/scraper/api_types_test.go @@ -75,18 +75,18 @@ func TestUserProfileToAPIUser(t *testing.T) { assert.Equal(user_resp.Data.User.Result.Legacy.FollowersCount, result.FollowersCount) } -func TestGetCursor(t *testing.T) { +func TestGetCursorBottom(t *testing.T) { assert := assert.New(t) data, err := os.ReadFile("test_responses/midriffs_anarchist_cookbook.json") if err != nil { panic(err) } - var tweet_resp TweetResponse + var tweet_resp APIv1Response err = json.Unmarshal(data, &tweet_resp) assert.NoError(err) assert.Equal("LBmGhsC+ibH1peAmgICjpbS0m98mgICj7a2lmd8mhsC4rbmsmN8mgMCqkbT1p+AmgsC4ucv4o+AmhoCyrf+nlt8mhMC9qfOwlt8mJQISAAA=", - tweet_resp.GetCursor()) + tweet_resp.GetCursorBottom()) } func TestIsEndOfFeed(t *testing.T) { @@ -103,7 +103,7 @@ func TestIsEndOfFeed(t *testing.T) { if err != nil { panic(err) } - var tweet_resp TweetResponse + var tweet_resp APIv1Response err = json.Unmarshal(data, &tweet_resp) assert.NoError(err) assert.Equal(v.is_end_of_feed, tweet_resp.IsEndOfFeed()) @@ -116,7 +116,7 @@ func TestHandleTombstonesHidden(t *testing.T) { if err != nil { panic(err) } - var tweet_resp TweetResponse + var tweet_resp APIv1Response err = json.Unmarshal(data, &tweet_resp) require.NoError(t, err) assert.Equal(2, len(tweet_resp.GlobalObjects.Tweets), "Before tombstone handling") @@ -146,7 +146,7 @@ func TestHandleTombstonesDeleted(t *testing.T) { if err != nil { panic(err) } - var tweet_resp TweetResponse + var tweet_resp APIv1Response err = json.Unmarshal(data, &tweet_resp) require.NoError(t, err) assert.Equal(1, len(tweet_resp.GlobalObjects.Tweets), "Before tombstone handling") @@ -169,7 +169,7 @@ func TestHandleTombstonesUnavailable(t *testing.T) { if err != nil { panic(err) } - var tweet_resp TweetResponse + var tweet_resp APIv1Response err = json.Unmarshal(data, &tweet_resp) require.NoError(t, err) assert.Equal(2, len(tweet_resp.GlobalObjects.Tweets), "Before tombstone handling") diff --git a/pkg/scraper/tweet_test.go b/pkg/scraper/tweet_test.go index 503f9a5..ecf30a8 100644 --- a/pkg/scraper/tweet_test.go +++ b/pkg/scraper/tweet_test.go @@ -217,13 +217,13 @@ func TestTweetWithSpace(t *testing.T) { assert.False(s.IsDetailsFetched) } -func TestParseTweetResponse(t *testing.T) { +func TestParseAPIv1Response(t *testing.T) { assert := assert.New(t) data, err := os.ReadFile("test_responses/michael_malice_feed.json") if err != nil { panic(err) } - var tweet_resp TweetResponse + var tweet_resp APIv1Response err = json.Unmarshal(data, &tweet_resp) require.NoError(t, err) @@ -235,13 +235,13 @@ func TestParseTweetResponse(t *testing.T) { assert.Len(trove.Users, 9) } -func TestParseTweetResponseWithTombstones(t *testing.T) { +func TestParseAPIv1ResponseWithTombstones(t *testing.T) { assert := assert.New(t) data, err := os.ReadFile("test_responses/tombstones/tombstone_deleted.json") if err != nil { panic(err) } - var tweet_resp TweetResponse + var tweet_resp APIv1Response err = json.Unmarshal(data, &tweet_resp) require.NoError(t, err)