REFACTOR: rename some stuff in scraper apiv1

- rename TweetResponse -> APIv1Response
- rename Entry -> APIv1Entry
- rename APIv1Response.GetCursor -> GetCursorBottom
This commit is contained in:
Alessio 2024-12-23 12:37:28 -08:00
parent eaa9f4c404
commit 81e6dc50be
6 changed files with 50 additions and 57 deletions

View File

@ -13,7 +13,7 @@ import (
const API_CONVERSATION_BASE_PATH = "https://twitter.com/i/api/2/timeline/conversation/" const API_CONVERSATION_BASE_PATH = "https://twitter.com/i/api/2/timeline/conversation/"
const API_USER_TIMELINE_BASE_PATH = "https://api.twitter.com/2/timeline/profile/" const API_USER_TIMELINE_BASE_PATH = "https://api.twitter.com/2/timeline/profile/"
func (api API) GetFeedFor(user_id UserID, cursor string) (TweetResponse, error) { func (api API) GetFeedFor(user_id UserID, cursor string) (APIv1Response, error) {
url, err := url.Parse(fmt.Sprintf("%s%d.json", API_USER_TIMELINE_BASE_PATH, user_id)) url, err := url.Parse(fmt.Sprintf("%s%d.json", API_USER_TIMELINE_BASE_PATH, user_id))
if err != nil { if err != nil {
panic(err) panic(err)
@ -22,7 +22,7 @@ func (api API) GetFeedFor(user_id UserID, cursor string) (TweetResponse, error)
add_tweet_query_params(&queryParams) add_tweet_query_params(&queryParams)
url.RawQuery = queryParams.Encode() url.RawQuery = queryParams.Encode()
var result TweetResponse var result APIv1Response
err = api.do_http(url.String(), cursor, &result) err = api.do_http(url.String(), cursor, &result)
return result, err return result, err
@ -33,10 +33,10 @@ func (api API) GetFeedFor(user_id UserID, cursor string) (TweetResponse, error)
* *
* args: * args:
* - user_id: the user's UserID * - user_id: the user's UserID
* - response: an "out" parameter; the TweetResponse that tweets, RTs and users will be appended to * - response: an "out" parameter; the APIv1Response that tweets, RTs and users will be appended to
* - min_tweets: the desired minimum amount of tweets to get * - min_tweets: the desired minimum amount of tweets to get
*/ */
func (api API) GetMoreTweetsFromFeed(user_id UserID, response *TweetResponse, min_tweets int) error { func (api API) GetMoreTweetsFromFeed(user_id UserID, response *APIv1Response, min_tweets int) error {
last_response := response last_response := response
for last_response.GetCursor() != "" && len(response.GlobalObjects.Tweets) < min_tweets { for last_response.GetCursor() != "" && len(response.GlobalObjects.Tweets) < min_tweets {
fresh_response, err := api.GetFeedFor(user_id, last_response.GetCursor()) fresh_response, err := api.GetFeedFor(user_id, last_response.GetCursor())
@ -121,7 +121,7 @@ func GetTweetFull(id TweetID, how_many int) (trove TweetTrove, err error) {
} }
} }
// This has to be called BEFORE ToTweetTrove, because it modifies the TweetResponse (adds tombstone tweets to its tweets list) // This has to be called BEFORE ToTweetTrove, because it modifies the APIv1Response (adds tombstone tweets to its tweets list)
tombstoned_users := tweet_response.HandleTombstones() tombstoned_users := tweet_response.HandleTombstones()
trove, err = tweet_response.ToTweetTrove() trove, err = tweet_response.ToTweetTrove()
@ -150,7 +150,7 @@ func GetTweetFull(id TweetID, how_many int) (trove TweetTrove, err error) {
return return
} }
func (api *API) GetTweet(id TweetID, cursor string) (TweetResponse, error) { func (api *API) GetTweet(id TweetID, cursor string) (APIv1Response, error) {
url, err := url.Parse(fmt.Sprintf("%s%d.json", API_CONVERSATION_BASE_PATH, id)) url, err := url.Parse(fmt.Sprintf("%s%d.json", API_CONVERSATION_BASE_PATH, id))
if err != nil { if err != nil {
panic(err) panic(err)
@ -162,13 +162,13 @@ func (api *API) GetTweet(id TweetID, cursor string) (TweetResponse, error) {
add_tweet_query_params(&queryParams) add_tweet_query_params(&queryParams)
url.RawQuery = queryParams.Encode() url.RawQuery = queryParams.Encode()
var result TweetResponse var result APIv1Response
err = api.do_http(url.String(), cursor, &result) err = api.do_http(url.String(), cursor, &result)
return result, err return result, err
} }
// Resend the request to get more replies if necessary // Resend the request to get more replies if necessary
func (api *API) GetMoreReplies(tweet_id TweetID, response *TweetResponse, max_replies int) error { func (api *API) GetMoreReplies(tweet_id TweetID, response *APIv1Response, max_replies int) error {
last_response := response last_response := response
for last_response.GetCursor() != "" && len(response.GlobalObjects.Tweets) < max_replies { for last_response.GetCursor() != "" && len(response.GlobalObjects.Tweets) < max_replies {
fresh_response, err := api.GetTweet(tweet_id, last_response.GetCursor()) fresh_response, err := api.GetTweet(tweet_id, last_response.GetCursor())

View File

@ -735,7 +735,7 @@ func (u UserResponse) ConvertToAPIUser() (APIUser, error) {
return ret, nil return ret, nil
} }
type Entry struct { type APIv1Entry struct {
EntryID string `json:"entryId"` EntryID string `json:"entryId"`
SortIndex int64 `json:"sortIndex,string"` SortIndex int64 `json:"sortIndex,string"`
Content struct { Content struct {
@ -769,17 +769,13 @@ type Entry struct {
} `json:"content"` } `json:"content"`
} }
func (e Entry) GetTombstoneText() string { type SortableEntries []APIv1Entry
return e.Content.Item.Content.Tombstone.TombstoneInfo.RichText.Text
}
type SortableEntries []Entry
func (e SortableEntries) Len() int { return len(e) } func (e SortableEntries) Len() int { return len(e) }
func (e SortableEntries) Swap(i, j int) { e[i], e[j] = e[j], e[i] } func (e SortableEntries) Swap(i, j int) { e[i], e[j] = e[j], e[i] }
func (e SortableEntries) Less(i, j int) bool { return e[i].SortIndex > e[j].SortIndex } func (e SortableEntries) Less(i, j int) bool { return e[i].SortIndex > e[j].SortIndex }
type TweetResponse struct { type APIv1Response struct {
GlobalObjects struct { GlobalObjects struct {
Tweets map[string]APITweet `json:"tweets"` Tweets map[string]APITweet `json:"tweets"`
Users map[string]APIUser `json:"users"` Users map[string]APIUser `json:"users"`
@ -791,7 +787,7 @@ type TweetResponse struct {
Entries SortableEntries `json:"entries"` Entries SortableEntries `json:"entries"`
} `json:"addEntries"` } `json:"addEntries"`
ReplaceEntry struct { ReplaceEntry struct {
Entry Entry Entry APIv1Entry
} `json:"replaceEntry"` } `json:"replaceEntry"`
MarkEntriesUnreadGreaterThanSortIndex struct { MarkEntriesUnreadGreaterThanSortIndex struct {
SortIndex int64 `json:"sortIndex,string"` SortIndex int64 `json:"sortIndex,string"`
@ -819,11 +815,9 @@ var tombstone_types = map[string]string{
"This Post is from an account that no longer exists. Learn more": "no longer exists", "This Post is from an account that no longer exists. Learn more": "no longer exists",
} }
/** // Insert tweets into GlobalObjects for each tombstone. Returns a list of users that need to
* Insert tweets into GlobalObjects for each tombstone. Returns a list of users that need to // be fetched for tombstones.
* be fetched for tombstones. func (t *APIv1Response) HandleTombstones() []UserHandle {
*/
func (t *TweetResponse) HandleTombstones() []UserHandle {
ret := []UserHandle{} ret := []UserHandle{}
// Handle tombstones in quote-tweets // Handle tombstones in quote-tweets
@ -857,7 +851,7 @@ func (t *TweetResponse) HandleTombstones() []UserHandle {
entries := t.Timeline.Instructions[0].AddEntries.Entries entries := t.Timeline.Instructions[0].AddEntries.Entries
sort.Sort(entries) sort.Sort(entries)
for i, entry := range entries { for i, entry := range entries {
if entry.GetTombstoneText() != "" { if entry.Content.Item.Content.Tombstone.TombstoneInfo.RichText.Text != "" {
// Try to reconstruct the tombstone tweet // Try to reconstruct the tombstone tweet
var tombstoned_tweet APITweet var tombstoned_tweet APITweet
tombstoned_tweet.ID = int64(i) // Set a default to prevent clobbering other tombstones tombstoned_tweet.ID = int64(i) // Set a default to prevent clobbering other tombstones
@ -880,9 +874,10 @@ func (t *TweetResponse) HandleTombstones() []UserHandle {
tombstoned_tweet.InReplyToStatusID = prev_tweet_id tombstoned_tweet.InReplyToStatusID = prev_tweet_id
} }
short_text, ok := tombstone_types[entry.GetTombstoneText()] short_text, ok := tombstone_types[entry.Content.Item.Content.Tombstone.TombstoneInfo.RichText.Text]
if !ok { if !ok {
panic(fmt.Errorf("Unknown tombstone text %q:\n %w", entry.GetTombstoneText(), EXTERNAL_API_ERROR)) panic(fmt.Errorf("Unknown tombstone text %q:\n %w",
entry.Content.Item.Content.Tombstone.TombstoneInfo.RichText.Text, EXTERNAL_API_ERROR))
} }
tombstoned_tweet.TombstoneText = short_text tombstoned_tweet.TombstoneText = short_text
@ -894,7 +889,7 @@ func (t *TweetResponse) HandleTombstones() []UserHandle {
return ret return ret
} }
func (t *TweetResponse) GetCursor() string { func (t *APIv1Response) GetCursorBottom() string {
// TODO: is this function used anywhere other than Notifications? // TODO: is this function used anywhere other than Notifications?
for _, instr := range t.Timeline.Instructions { for _, instr := range t.Timeline.Instructions {
if len(instr.AddEntries.Entries) > 0 { if len(instr.AddEntries.Entries) > 0 {
@ -914,7 +909,7 @@ func (t *TweetResponse) GetCursor() string {
return "" return ""
} }
func (t *TweetResponse) GetCursorTop() string { func (t *APIv1Response) GetCursorTop() string {
for _, instr := range t.Timeline.Instructions { for _, instr := range t.Timeline.Instructions {
for _, entry := range instr.AddEntries.Entries { for _, entry := range instr.AddEntries.Entries {
if strings.Contains(entry.EntryID, "cursor-top") { if strings.Contains(entry.EntryID, "cursor-top") {
@ -925,13 +920,11 @@ func (t *TweetResponse) GetCursorTop() string {
return "" return ""
} }
/** // Test for one case of end-of-feed. Cursor increments on each request for some reason, but
* Test for one case of end-of-feed. Cursor increments on each request for some reason, but // there's no new content. This seems to happen when there's a pinned tweet.
* there's no new content. This seems to happen when there's a pinned tweet. //
* // In this case, we look for an "entries" object that has only cursors in it, and no tweets.
* In this case, we look for an "entries" object that has only cursors in it, and no tweets. func (t *APIv1Response) IsEndOfFeed() bool {
*/
func (t *TweetResponse) IsEndOfFeed() bool {
for _, instr := range t.Timeline.Instructions { for _, instr := range t.Timeline.Instructions {
entries := instr.AddEntries.Entries entries := instr.AddEntries.Entries
if len(entries) == 0 { if len(entries) == 0 {
@ -949,7 +942,7 @@ func (t *TweetResponse) IsEndOfFeed() bool {
return true return true
} }
func (t *TweetResponse) ToTweetTrove() (TweetTrove, error) { func (t *APIv1Response) ToTweetTrove() (TweetTrove, error) {
ret := NewTweetTrove() ret := NewTweetTrove()
for _, single_tweet := range t.GlobalObjects.Tweets { for _, single_tweet := range t.GlobalObjects.Tweets {

View File

@ -13,7 +13,7 @@ import (
) )
// TODO: pagination // TODO: pagination
func (api *API) GetNotificationsPage(cursor string) (TweetResponse, error) { func (api *API) GetNotificationsPage(cursor string) (APIv1Response, error) {
url, err := url.Parse("https://api.twitter.com/2/notifications/all.json") url, err := url.Parse("https://api.twitter.com/2/notifications/all.json")
if err != nil { if err != nil {
panic(err) panic(err)
@ -23,7 +23,7 @@ func (api *API) GetNotificationsPage(cursor string) (TweetResponse, error) {
add_tweet_query_params(&query) add_tweet_query_params(&query)
url.RawQuery = query.Encode() url.RawQuery = query.Encode()
var result TweetResponse var result APIv1Response
err = api.do_http(url.String(), cursor, &result) err = api.do_http(url.String(), cursor, &result)
return result, err return result, err
@ -41,7 +41,7 @@ func (api *API) GetNotifications(how_many int) (TweetTrove, int64, error) {
} }
for len(trove.Notifications) < how_many { for len(trove.Notifications) < how_many {
resp, err = api.GetNotificationsPage(resp.GetCursor()) resp, err = api.GetNotificationsPage(resp.GetCursorBottom())
if errors.Is(err, ErrRateLimited) { if errors.Is(err, ErrRateLimited) {
log.Warnf("Rate limited!") log.Warnf("Rate limited!")
break break
@ -86,7 +86,7 @@ func (api *API) MarkNotificationsAsRead() error {
} }
// Check a Notifications result for unread notifications. Returns `0` if there are none. // Check a Notifications result for unread notifications. Returns `0` if there are none.
func (t TweetResponse) CheckUnreadNotifications() int64 { func (t APIv1Response) CheckUnreadNotifications() int64 {
for _, instr := range t.Timeline.Instructions { for _, instr := range t.Timeline.Instructions {
if instr.MarkEntriesUnreadGreaterThanSortIndex.SortIndex != 0 { if instr.MarkEntriesUnreadGreaterThanSortIndex.SortIndex != 0 {
return instr.MarkEntriesUnreadGreaterThanSortIndex.SortIndex return instr.MarkEntriesUnreadGreaterThanSortIndex.SortIndex
@ -129,7 +129,7 @@ func (api *API) GetNotificationDetailForAll(trove TweetTrove, to_scrape []Notifi
return trove, nil return trove, nil
} }
func (t *TweetResponse) ToTweetTroveAsNotifications(current_user_id UserID) (TweetTrove, error) { func (t *APIv1Response) ToTweetTroveAsNotifications(current_user_id UserID) (TweetTrove, error) {
ret, err := t.ToTweetTrove() ret, err := t.ToTweetTrove()
if err != nil { if err != nil {
return TweetTrove{}, err return TweetTrove{}, err
@ -251,7 +251,7 @@ func ParseSingleNotification(n APINotification) Notification {
return ret return ret
} }
func (api *API) GetNotificationDetail(n Notification) (TweetResponse, error) { func (api *API) GetNotificationDetail(n Notification) (APIv1Response, error) {
url, err := url.Parse(fmt.Sprintf("https://twitter.com/i/api/2/notifications/view/%s.json", n.ID)) url, err := url.Parse(fmt.Sprintf("https://twitter.com/i/api/2/notifications/view/%s.json", n.ID))
if err != nil { if err != nil {
panic(err) panic(err)
@ -261,13 +261,13 @@ func (api *API) GetNotificationDetail(n Notification) (TweetResponse, error) {
add_tweet_query_params(&query) add_tweet_query_params(&query)
url.RawQuery = query.Encode() url.RawQuery = query.Encode()
var result TweetResponse var result APIv1Response
err = api.do_http(url.String(), "", &result) err = api.do_http(url.String(), "", &result)
return result, err return result, err
} }
func (t *TweetResponse) ToTweetTroveAsNotificationDetail() (TweetTrove, []TweetID, error) { func (t *APIv1Response) ToTweetTroveAsNotificationDetail() (TweetTrove, []TweetID, error) {
ids := []TweetID{} ids := []TweetID{}
ret, err := t.ToTweetTrove() ret, err := t.ToTweetTrove()
if err != nil { if err != nil {

View File

@ -18,7 +18,7 @@ func TestParseNotificationsPage(t *testing.T) {
data, err := os.ReadFile("test_responses/notifications/notifications_response_first_page.json") data, err := os.ReadFile("test_responses/notifications/notifications_response_first_page.json")
require.NoError(err) require.NoError(err)
var resp TweetResponse var resp APIv1Response
err = json.Unmarshal(data, &resp) err = json.Unmarshal(data, &resp)
require.NoError(err) require.NoError(err)
@ -164,7 +164,7 @@ func TestParseNotificationsPage(t *testing.T) {
assert.Equal(int64(1724566381021), resp.CheckUnreadNotifications()) assert.Equal(int64(1724566381021), resp.CheckUnreadNotifications())
// Test cursor-bottom // Test cursor-bottom
bottom_cursor := resp.GetCursor() bottom_cursor := resp.GetCursorBottom()
assert.Equal("DAACDAABCgABFKncQJGVgAQIAAIAAAABCAADSQ3bEQgABIsN6BEACwACAAAAC0FaRkxRSXFNLTJJAAA", bottom_cursor) assert.Equal("DAACDAABCgABFKncQJGVgAQIAAIAAAABCAADSQ3bEQgABIsN6BEACwACAAAAC0FaRkxRSXFNLTJJAAA", bottom_cursor)
assert.False(resp.IsEndOfFeed()) assert.False(resp.IsEndOfFeed())
@ -178,7 +178,7 @@ func TestParseNotificationsEndOfFeed(t *testing.T) {
data, err := os.ReadFile("test_responses/notifications/notifications_end_of_feed.json") data, err := os.ReadFile("test_responses/notifications/notifications_end_of_feed.json")
require.NoError(err) require.NoError(err)
var resp TweetResponse var resp APIv1Response
err = json.Unmarshal(data, &resp) err = json.Unmarshal(data, &resp)
require.NoError(err) require.NoError(err)
@ -191,7 +191,7 @@ func TestParseNotificationDetail(t *testing.T) {
data, err := os.ReadFile("test_responses/notifications/notification_detail.json") data, err := os.ReadFile("test_responses/notifications/notification_detail.json")
require.NoError(err) require.NoError(err)
var resp TweetResponse var resp APIv1Response
err = json.Unmarshal(data, &resp) err = json.Unmarshal(data, &resp)
require.NoError(err) require.NoError(err)

View File

@ -75,18 +75,18 @@ func TestUserProfileToAPIUser(t *testing.T) {
assert.Equal(user_resp.Data.User.Result.Legacy.FollowersCount, result.FollowersCount) assert.Equal(user_resp.Data.User.Result.Legacy.FollowersCount, result.FollowersCount)
} }
func TestGetCursor(t *testing.T) { func TestGetCursorBottom(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)
data, err := os.ReadFile("test_responses/midriffs_anarchist_cookbook.json") data, err := os.ReadFile("test_responses/midriffs_anarchist_cookbook.json")
if err != nil { if err != nil {
panic(err) panic(err)
} }
var tweet_resp TweetResponse var tweet_resp APIv1Response
err = json.Unmarshal(data, &tweet_resp) err = json.Unmarshal(data, &tweet_resp)
assert.NoError(err) assert.NoError(err)
assert.Equal("LBmGhsC+ibH1peAmgICjpbS0m98mgICj7a2lmd8mhsC4rbmsmN8mgMCqkbT1p+AmgsC4ucv4o+AmhoCyrf+nlt8mhMC9qfOwlt8mJQISAAA=", assert.Equal("LBmGhsC+ibH1peAmgICjpbS0m98mgICj7a2lmd8mhsC4rbmsmN8mgMCqkbT1p+AmgsC4ucv4o+AmhoCyrf+nlt8mhMC9qfOwlt8mJQISAAA=",
tweet_resp.GetCursor()) tweet_resp.GetCursorBottom())
} }
func TestIsEndOfFeed(t *testing.T) { func TestIsEndOfFeed(t *testing.T) {
@ -103,7 +103,7 @@ func TestIsEndOfFeed(t *testing.T) {
if err != nil { if err != nil {
panic(err) panic(err)
} }
var tweet_resp TweetResponse var tweet_resp APIv1Response
err = json.Unmarshal(data, &tweet_resp) err = json.Unmarshal(data, &tweet_resp)
assert.NoError(err) assert.NoError(err)
assert.Equal(v.is_end_of_feed, tweet_resp.IsEndOfFeed()) assert.Equal(v.is_end_of_feed, tweet_resp.IsEndOfFeed())
@ -116,7 +116,7 @@ func TestHandleTombstonesHidden(t *testing.T) {
if err != nil { if err != nil {
panic(err) panic(err)
} }
var tweet_resp TweetResponse var tweet_resp APIv1Response
err = json.Unmarshal(data, &tweet_resp) err = json.Unmarshal(data, &tweet_resp)
require.NoError(t, err) require.NoError(t, err)
assert.Equal(2, len(tweet_resp.GlobalObjects.Tweets), "Before tombstone handling") assert.Equal(2, len(tweet_resp.GlobalObjects.Tweets), "Before tombstone handling")
@ -146,7 +146,7 @@ func TestHandleTombstonesDeleted(t *testing.T) {
if err != nil { if err != nil {
panic(err) panic(err)
} }
var tweet_resp TweetResponse var tweet_resp APIv1Response
err = json.Unmarshal(data, &tweet_resp) err = json.Unmarshal(data, &tweet_resp)
require.NoError(t, err) require.NoError(t, err)
assert.Equal(1, len(tweet_resp.GlobalObjects.Tweets), "Before tombstone handling") assert.Equal(1, len(tweet_resp.GlobalObjects.Tweets), "Before tombstone handling")
@ -169,7 +169,7 @@ func TestHandleTombstonesUnavailable(t *testing.T) {
if err != nil { if err != nil {
panic(err) panic(err)
} }
var tweet_resp TweetResponse var tweet_resp APIv1Response
err = json.Unmarshal(data, &tweet_resp) err = json.Unmarshal(data, &tweet_resp)
require.NoError(t, err) require.NoError(t, err)
assert.Equal(2, len(tweet_resp.GlobalObjects.Tweets), "Before tombstone handling") assert.Equal(2, len(tweet_resp.GlobalObjects.Tweets), "Before tombstone handling")

View File

@ -217,13 +217,13 @@ func TestTweetWithSpace(t *testing.T) {
assert.False(s.IsDetailsFetched) assert.False(s.IsDetailsFetched)
} }
func TestParseTweetResponse(t *testing.T) { func TestParseAPIv1Response(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)
data, err := os.ReadFile("test_responses/michael_malice_feed.json") data, err := os.ReadFile("test_responses/michael_malice_feed.json")
if err != nil { if err != nil {
panic(err) panic(err)
} }
var tweet_resp TweetResponse var tweet_resp APIv1Response
err = json.Unmarshal(data, &tweet_resp) err = json.Unmarshal(data, &tweet_resp)
require.NoError(t, err) require.NoError(t, err)
@ -235,13 +235,13 @@ func TestParseTweetResponse(t *testing.T) {
assert.Len(trove.Users, 9) assert.Len(trove.Users, 9)
} }
func TestParseTweetResponseWithTombstones(t *testing.T) { func TestParseAPIv1ResponseWithTombstones(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)
data, err := os.ReadFile("test_responses/tombstones/tombstone_deleted.json") data, err := os.ReadFile("test_responses/tombstones/tombstone_deleted.json")
if err != nil { if err != nil {
panic(err) panic(err)
} }
var tweet_resp TweetResponse var tweet_resp APIv1Response
err = json.Unmarshal(data, &tweet_resp) err = json.Unmarshal(data, &tweet_resp)
require.NoError(t, err) require.NoError(t, err)