REFACTOR: rename some stuff in scraper apiv1
- rename TweetResponse -> APIv1Response - rename Entry -> APIv1Entry - rename APIv1Response.GetCursor -> GetCursorBottom
This commit is contained in:
parent
eaa9f4c404
commit
81e6dc50be
@ -13,7 +13,7 @@ import (
|
||||
const API_CONVERSATION_BASE_PATH = "https://twitter.com/i/api/2/timeline/conversation/"
|
||||
const API_USER_TIMELINE_BASE_PATH = "https://api.twitter.com/2/timeline/profile/"
|
||||
|
||||
func (api API) GetFeedFor(user_id UserID, cursor string) (TweetResponse, error) {
|
||||
func (api API) GetFeedFor(user_id UserID, cursor string) (APIv1Response, error) {
|
||||
url, err := url.Parse(fmt.Sprintf("%s%d.json", API_USER_TIMELINE_BASE_PATH, user_id))
|
||||
if err != nil {
|
||||
panic(err)
|
||||
@ -22,7 +22,7 @@ func (api API) GetFeedFor(user_id UserID, cursor string) (TweetResponse, error)
|
||||
add_tweet_query_params(&queryParams)
|
||||
url.RawQuery = queryParams.Encode()
|
||||
|
||||
var result TweetResponse
|
||||
var result APIv1Response
|
||||
err = api.do_http(url.String(), cursor, &result)
|
||||
|
||||
return result, err
|
||||
@ -33,10 +33,10 @@ func (api API) GetFeedFor(user_id UserID, cursor string) (TweetResponse, error)
|
||||
*
|
||||
* args:
|
||||
* - user_id: the user's UserID
|
||||
* - response: an "out" parameter; the TweetResponse that tweets, RTs and users will be appended to
|
||||
* - response: an "out" parameter; the APIv1Response that tweets, RTs and users will be appended to
|
||||
* - min_tweets: the desired minimum amount of tweets to get
|
||||
*/
|
||||
func (api API) GetMoreTweetsFromFeed(user_id UserID, response *TweetResponse, min_tweets int) error {
|
||||
func (api API) GetMoreTweetsFromFeed(user_id UserID, response *APIv1Response, min_tweets int) error {
|
||||
last_response := response
|
||||
for last_response.GetCursor() != "" && len(response.GlobalObjects.Tweets) < min_tweets {
|
||||
fresh_response, err := api.GetFeedFor(user_id, last_response.GetCursor())
|
||||
@ -121,7 +121,7 @@ func GetTweetFull(id TweetID, how_many int) (trove TweetTrove, err error) {
|
||||
}
|
||||
}
|
||||
|
||||
// This has to be called BEFORE ToTweetTrove, because it modifies the TweetResponse (adds tombstone tweets to its tweets list)
|
||||
// This has to be called BEFORE ToTweetTrove, because it modifies the APIv1Response (adds tombstone tweets to its tweets list)
|
||||
tombstoned_users := tweet_response.HandleTombstones()
|
||||
|
||||
trove, err = tweet_response.ToTweetTrove()
|
||||
@ -150,7 +150,7 @@ func GetTweetFull(id TweetID, how_many int) (trove TweetTrove, err error) {
|
||||
return
|
||||
}
|
||||
|
||||
func (api *API) GetTweet(id TweetID, cursor string) (TweetResponse, error) {
|
||||
func (api *API) GetTweet(id TweetID, cursor string) (APIv1Response, error) {
|
||||
url, err := url.Parse(fmt.Sprintf("%s%d.json", API_CONVERSATION_BASE_PATH, id))
|
||||
if err != nil {
|
||||
panic(err)
|
||||
@ -162,13 +162,13 @@ func (api *API) GetTweet(id TweetID, cursor string) (TweetResponse, error) {
|
||||
add_tweet_query_params(&queryParams)
|
||||
url.RawQuery = queryParams.Encode()
|
||||
|
||||
var result TweetResponse
|
||||
var result APIv1Response
|
||||
err = api.do_http(url.String(), cursor, &result)
|
||||
return result, err
|
||||
}
|
||||
|
||||
// Resend the request to get more replies if necessary
|
||||
func (api *API) GetMoreReplies(tweet_id TweetID, response *TweetResponse, max_replies int) error {
|
||||
func (api *API) GetMoreReplies(tweet_id TweetID, response *APIv1Response, max_replies int) error {
|
||||
last_response := response
|
||||
for last_response.GetCursor() != "" && len(response.GlobalObjects.Tweets) < max_replies {
|
||||
fresh_response, err := api.GetTweet(tweet_id, last_response.GetCursor())
|
||||
|
@ -735,7 +735,7 @@ func (u UserResponse) ConvertToAPIUser() (APIUser, error) {
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
type Entry struct {
|
||||
type APIv1Entry struct {
|
||||
EntryID string `json:"entryId"`
|
||||
SortIndex int64 `json:"sortIndex,string"`
|
||||
Content struct {
|
||||
@ -769,17 +769,13 @@ type Entry struct {
|
||||
} `json:"content"`
|
||||
}
|
||||
|
||||
func (e Entry) GetTombstoneText() string {
|
||||
return e.Content.Item.Content.Tombstone.TombstoneInfo.RichText.Text
|
||||
}
|
||||
|
||||
type SortableEntries []Entry
|
||||
type SortableEntries []APIv1Entry
|
||||
|
||||
func (e SortableEntries) Len() int { return len(e) }
|
||||
func (e SortableEntries) Swap(i, j int) { e[i], e[j] = e[j], e[i] }
|
||||
func (e SortableEntries) Less(i, j int) bool { return e[i].SortIndex > e[j].SortIndex }
|
||||
|
||||
type TweetResponse struct {
|
||||
type APIv1Response struct {
|
||||
GlobalObjects struct {
|
||||
Tweets map[string]APITweet `json:"tweets"`
|
||||
Users map[string]APIUser `json:"users"`
|
||||
@ -791,7 +787,7 @@ type TweetResponse struct {
|
||||
Entries SortableEntries `json:"entries"`
|
||||
} `json:"addEntries"`
|
||||
ReplaceEntry struct {
|
||||
Entry Entry
|
||||
Entry APIv1Entry
|
||||
} `json:"replaceEntry"`
|
||||
MarkEntriesUnreadGreaterThanSortIndex struct {
|
||||
SortIndex int64 `json:"sortIndex,string"`
|
||||
@ -819,11 +815,9 @@ var tombstone_types = map[string]string{
|
||||
"This Post is from an account that no longer exists. Learn more": "no longer exists",
|
||||
}
|
||||
|
||||
/**
|
||||
* Insert tweets into GlobalObjects for each tombstone. Returns a list of users that need to
|
||||
* be fetched for tombstones.
|
||||
*/
|
||||
func (t *TweetResponse) HandleTombstones() []UserHandle {
|
||||
// Insert tweets into GlobalObjects for each tombstone. Returns a list of users that need to
|
||||
// be fetched for tombstones.
|
||||
func (t *APIv1Response) HandleTombstones() []UserHandle {
|
||||
ret := []UserHandle{}
|
||||
|
||||
// Handle tombstones in quote-tweets
|
||||
@ -857,7 +851,7 @@ func (t *TweetResponse) HandleTombstones() []UserHandle {
|
||||
entries := t.Timeline.Instructions[0].AddEntries.Entries
|
||||
sort.Sort(entries)
|
||||
for i, entry := range entries {
|
||||
if entry.GetTombstoneText() != "" {
|
||||
if entry.Content.Item.Content.Tombstone.TombstoneInfo.RichText.Text != "" {
|
||||
// Try to reconstruct the tombstone tweet
|
||||
var tombstoned_tweet APITweet
|
||||
tombstoned_tweet.ID = int64(i) // Set a default to prevent clobbering other tombstones
|
||||
@ -880,9 +874,10 @@ func (t *TweetResponse) HandleTombstones() []UserHandle {
|
||||
tombstoned_tweet.InReplyToStatusID = prev_tweet_id
|
||||
}
|
||||
|
||||
short_text, ok := tombstone_types[entry.GetTombstoneText()]
|
||||
short_text, ok := tombstone_types[entry.Content.Item.Content.Tombstone.TombstoneInfo.RichText.Text]
|
||||
if !ok {
|
||||
panic(fmt.Errorf("Unknown tombstone text %q:\n %w", entry.GetTombstoneText(), EXTERNAL_API_ERROR))
|
||||
panic(fmt.Errorf("Unknown tombstone text %q:\n %w",
|
||||
entry.Content.Item.Content.Tombstone.TombstoneInfo.RichText.Text, EXTERNAL_API_ERROR))
|
||||
}
|
||||
tombstoned_tweet.TombstoneText = short_text
|
||||
|
||||
@ -894,7 +889,7 @@ func (t *TweetResponse) HandleTombstones() []UserHandle {
|
||||
return ret
|
||||
}
|
||||
|
||||
func (t *TweetResponse) GetCursor() string {
|
||||
func (t *APIv1Response) GetCursorBottom() string {
|
||||
// TODO: is this function used anywhere other than Notifications?
|
||||
for _, instr := range t.Timeline.Instructions {
|
||||
if len(instr.AddEntries.Entries) > 0 {
|
||||
@ -914,7 +909,7 @@ func (t *TweetResponse) GetCursor() string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func (t *TweetResponse) GetCursorTop() string {
|
||||
func (t *APIv1Response) GetCursorTop() string {
|
||||
for _, instr := range t.Timeline.Instructions {
|
||||
for _, entry := range instr.AddEntries.Entries {
|
||||
if strings.Contains(entry.EntryID, "cursor-top") {
|
||||
@ -925,13 +920,11 @@ func (t *TweetResponse) GetCursorTop() string {
|
||||
return ""
|
||||
}
|
||||
|
||||
/**
|
||||
* Test for one case of end-of-feed. Cursor increments on each request for some reason, but
|
||||
* there's no new content. This seems to happen when there's a pinned tweet.
|
||||
*
|
||||
* In this case, we look for an "entries" object that has only cursors in it, and no tweets.
|
||||
*/
|
||||
func (t *TweetResponse) IsEndOfFeed() bool {
|
||||
// Test for one case of end-of-feed. Cursor increments on each request for some reason, but
|
||||
// there's no new content. This seems to happen when there's a pinned tweet.
|
||||
//
|
||||
// In this case, we look for an "entries" object that has only cursors in it, and no tweets.
|
||||
func (t *APIv1Response) IsEndOfFeed() bool {
|
||||
for _, instr := range t.Timeline.Instructions {
|
||||
entries := instr.AddEntries.Entries
|
||||
if len(entries) == 0 {
|
||||
@ -949,7 +942,7 @@ func (t *TweetResponse) IsEndOfFeed() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func (t *TweetResponse) ToTweetTrove() (TweetTrove, error) {
|
||||
func (t *APIv1Response) ToTweetTrove() (TweetTrove, error) {
|
||||
ret := NewTweetTrove()
|
||||
|
||||
for _, single_tweet := range t.GlobalObjects.Tweets {
|
||||
|
@ -13,7 +13,7 @@ import (
|
||||
)
|
||||
|
||||
// TODO: pagination
|
||||
func (api *API) GetNotificationsPage(cursor string) (TweetResponse, error) {
|
||||
func (api *API) GetNotificationsPage(cursor string) (APIv1Response, error) {
|
||||
url, err := url.Parse("https://api.twitter.com/2/notifications/all.json")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
@ -23,7 +23,7 @@ func (api *API) GetNotificationsPage(cursor string) (TweetResponse, error) {
|
||||
add_tweet_query_params(&query)
|
||||
url.RawQuery = query.Encode()
|
||||
|
||||
var result TweetResponse
|
||||
var result APIv1Response
|
||||
err = api.do_http(url.String(), cursor, &result)
|
||||
|
||||
return result, err
|
||||
@ -41,7 +41,7 @@ func (api *API) GetNotifications(how_many int) (TweetTrove, int64, error) {
|
||||
}
|
||||
|
||||
for len(trove.Notifications) < how_many {
|
||||
resp, err = api.GetNotificationsPage(resp.GetCursor())
|
||||
resp, err = api.GetNotificationsPage(resp.GetCursorBottom())
|
||||
if errors.Is(err, ErrRateLimited) {
|
||||
log.Warnf("Rate limited!")
|
||||
break
|
||||
@ -86,7 +86,7 @@ func (api *API) MarkNotificationsAsRead() error {
|
||||
}
|
||||
|
||||
// Check a Notifications result for unread notifications. Returns `0` if there are none.
|
||||
func (t TweetResponse) CheckUnreadNotifications() int64 {
|
||||
func (t APIv1Response) CheckUnreadNotifications() int64 {
|
||||
for _, instr := range t.Timeline.Instructions {
|
||||
if instr.MarkEntriesUnreadGreaterThanSortIndex.SortIndex != 0 {
|
||||
return instr.MarkEntriesUnreadGreaterThanSortIndex.SortIndex
|
||||
@ -129,7 +129,7 @@ func (api *API) GetNotificationDetailForAll(trove TweetTrove, to_scrape []Notifi
|
||||
return trove, nil
|
||||
}
|
||||
|
||||
func (t *TweetResponse) ToTweetTroveAsNotifications(current_user_id UserID) (TweetTrove, error) {
|
||||
func (t *APIv1Response) ToTweetTroveAsNotifications(current_user_id UserID) (TweetTrove, error) {
|
||||
ret, err := t.ToTweetTrove()
|
||||
if err != nil {
|
||||
return TweetTrove{}, err
|
||||
@ -251,7 +251,7 @@ func ParseSingleNotification(n APINotification) Notification {
|
||||
return ret
|
||||
}
|
||||
|
||||
func (api *API) GetNotificationDetail(n Notification) (TweetResponse, error) {
|
||||
func (api *API) GetNotificationDetail(n Notification) (APIv1Response, error) {
|
||||
url, err := url.Parse(fmt.Sprintf("https://twitter.com/i/api/2/notifications/view/%s.json", n.ID))
|
||||
if err != nil {
|
||||
panic(err)
|
||||
@ -261,13 +261,13 @@ func (api *API) GetNotificationDetail(n Notification) (TweetResponse, error) {
|
||||
add_tweet_query_params(&query)
|
||||
url.RawQuery = query.Encode()
|
||||
|
||||
var result TweetResponse
|
||||
var result APIv1Response
|
||||
err = api.do_http(url.String(), "", &result)
|
||||
|
||||
return result, err
|
||||
}
|
||||
|
||||
func (t *TweetResponse) ToTweetTroveAsNotificationDetail() (TweetTrove, []TweetID, error) {
|
||||
func (t *APIv1Response) ToTweetTroveAsNotificationDetail() (TweetTrove, []TweetID, error) {
|
||||
ids := []TweetID{}
|
||||
ret, err := t.ToTweetTrove()
|
||||
if err != nil {
|
||||
|
@ -18,7 +18,7 @@ func TestParseNotificationsPage(t *testing.T) {
|
||||
data, err := os.ReadFile("test_responses/notifications/notifications_response_first_page.json")
|
||||
require.NoError(err)
|
||||
|
||||
var resp TweetResponse
|
||||
var resp APIv1Response
|
||||
err = json.Unmarshal(data, &resp)
|
||||
require.NoError(err)
|
||||
|
||||
@ -164,7 +164,7 @@ func TestParseNotificationsPage(t *testing.T) {
|
||||
assert.Equal(int64(1724566381021), resp.CheckUnreadNotifications())
|
||||
|
||||
// Test cursor-bottom
|
||||
bottom_cursor := resp.GetCursor()
|
||||
bottom_cursor := resp.GetCursorBottom()
|
||||
assert.Equal("DAACDAABCgABFKncQJGVgAQIAAIAAAABCAADSQ3bEQgABIsN6BEACwACAAAAC0FaRkxRSXFNLTJJAAA", bottom_cursor)
|
||||
assert.False(resp.IsEndOfFeed())
|
||||
|
||||
@ -178,7 +178,7 @@ func TestParseNotificationsEndOfFeed(t *testing.T) {
|
||||
data, err := os.ReadFile("test_responses/notifications/notifications_end_of_feed.json")
|
||||
require.NoError(err)
|
||||
|
||||
var resp TweetResponse
|
||||
var resp APIv1Response
|
||||
err = json.Unmarshal(data, &resp)
|
||||
require.NoError(err)
|
||||
|
||||
@ -191,7 +191,7 @@ func TestParseNotificationDetail(t *testing.T) {
|
||||
data, err := os.ReadFile("test_responses/notifications/notification_detail.json")
|
||||
require.NoError(err)
|
||||
|
||||
var resp TweetResponse
|
||||
var resp APIv1Response
|
||||
err = json.Unmarshal(data, &resp)
|
||||
require.NoError(err)
|
||||
|
||||
|
@ -75,18 +75,18 @@ func TestUserProfileToAPIUser(t *testing.T) {
|
||||
assert.Equal(user_resp.Data.User.Result.Legacy.FollowersCount, result.FollowersCount)
|
||||
}
|
||||
|
||||
func TestGetCursor(t *testing.T) {
|
||||
func TestGetCursorBottom(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
data, err := os.ReadFile("test_responses/midriffs_anarchist_cookbook.json")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
var tweet_resp TweetResponse
|
||||
var tweet_resp APIv1Response
|
||||
err = json.Unmarshal(data, &tweet_resp)
|
||||
assert.NoError(err)
|
||||
|
||||
assert.Equal("LBmGhsC+ibH1peAmgICjpbS0m98mgICj7a2lmd8mhsC4rbmsmN8mgMCqkbT1p+AmgsC4ucv4o+AmhoCyrf+nlt8mhMC9qfOwlt8mJQISAAA=",
|
||||
tweet_resp.GetCursor())
|
||||
tweet_resp.GetCursorBottom())
|
||||
}
|
||||
|
||||
func TestIsEndOfFeed(t *testing.T) {
|
||||
@ -103,7 +103,7 @@ func TestIsEndOfFeed(t *testing.T) {
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
var tweet_resp TweetResponse
|
||||
var tweet_resp APIv1Response
|
||||
err = json.Unmarshal(data, &tweet_resp)
|
||||
assert.NoError(err)
|
||||
assert.Equal(v.is_end_of_feed, tweet_resp.IsEndOfFeed())
|
||||
@ -116,7 +116,7 @@ func TestHandleTombstonesHidden(t *testing.T) {
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
var tweet_resp TweetResponse
|
||||
var tweet_resp APIv1Response
|
||||
err = json.Unmarshal(data, &tweet_resp)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(2, len(tweet_resp.GlobalObjects.Tweets), "Before tombstone handling")
|
||||
@ -146,7 +146,7 @@ func TestHandleTombstonesDeleted(t *testing.T) {
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
var tweet_resp TweetResponse
|
||||
var tweet_resp APIv1Response
|
||||
err = json.Unmarshal(data, &tweet_resp)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(1, len(tweet_resp.GlobalObjects.Tweets), "Before tombstone handling")
|
||||
@ -169,7 +169,7 @@ func TestHandleTombstonesUnavailable(t *testing.T) {
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
var tweet_resp TweetResponse
|
||||
var tweet_resp APIv1Response
|
||||
err = json.Unmarshal(data, &tweet_resp)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(2, len(tweet_resp.GlobalObjects.Tweets), "Before tombstone handling")
|
||||
|
@ -217,13 +217,13 @@ func TestTweetWithSpace(t *testing.T) {
|
||||
assert.False(s.IsDetailsFetched)
|
||||
}
|
||||
|
||||
func TestParseTweetResponse(t *testing.T) {
|
||||
func TestParseAPIv1Response(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
data, err := os.ReadFile("test_responses/michael_malice_feed.json")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
var tweet_resp TweetResponse
|
||||
var tweet_resp APIv1Response
|
||||
err = json.Unmarshal(data, &tweet_resp)
|
||||
require.NoError(t, err)
|
||||
|
||||
@ -235,13 +235,13 @@ func TestParseTweetResponse(t *testing.T) {
|
||||
assert.Len(trove.Users, 9)
|
||||
}
|
||||
|
||||
func TestParseTweetResponseWithTombstones(t *testing.T) {
|
||||
func TestParseAPIv1ResponseWithTombstones(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
data, err := os.ReadFile("test_responses/tombstones/tombstone_deleted.json")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
var tweet_resp TweetResponse
|
||||
var tweet_resp APIv1Response
|
||||
err = json.Unmarshal(data, &tweet_resp)
|
||||
require.NoError(t, err)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user