Add scraping and saving of bookmarks

This commit is contained in:
Alessio 2024-05-28 21:55:42 -07:00
parent fc08fb0be4
commit 14a7f88cdb
13 changed files with 382 additions and 36 deletions

View File

@ -369,6 +369,10 @@ tw unlike_tweet https://twitter.com/elonmusk/status/1589023388676554753
# TODO: implement deleting a Like # TODO: implement deleting a Like
# test $(sqlite3 twitter.db "select count(*) from likes where tweet_id = 1589023388676554753 and user_id = (select id from users where handle like 'offline_twatter')") = "0" # test $(sqlite3 twitter.db "select count(*) from likes where tweet_id = 1589023388676554753 and user_id = (select id from users where handle like 'offline_twatter')") = "0"
# Test fetching bookmarks
tw get_bookmarks
test $(sqlite3 twitter.db "select count(*) from bookmarks") -ge "2"
test $(sqlite3 twitter.db "select count(*) from bookmarks where tweet_id = 1762239926437843421") = "1"
# Test fetch inbox # Test fetch inbox
test $(sqlite3 twitter.db "select count(*) from chat_rooms") = "0" test $(sqlite3 twitter.db "select count(*) from chat_rooms") = "0"
@ -377,6 +381,7 @@ tw fetch_inbox
test $(sqlite3 twitter.db "select count(*) from chat_rooms") -ge "1" test $(sqlite3 twitter.db "select count(*) from chat_rooms") -ge "1"
test $(sqlite3 twitter.db "select count(*) from chat_messages where chat_room_id = '1458284524761075714-1488963321701171204'") -ge "5" test $(sqlite3 twitter.db "select count(*) from chat_messages where chat_room_id = '1458284524761075714-1488963321701171204'") -ge "5"
# Test fetch a DM conversation # Test fetch a DM conversation
tw fetch_dm "1458284524761075714-1488963321701171204" tw fetch_dm "1458284524761075714-1488963321701171204"

View File

@ -76,7 +76,7 @@ func main() {
if len(args) < 2 { if len(args) < 2 {
if len(args) == 1 && (args[0] == "list_followed" || args[0] == "webserver" || args[0] == "fetch_timeline" || if len(args) == 1 && (args[0] == "list_followed" || args[0] == "webserver" || args[0] == "fetch_timeline" ||
args[0] == "fetch_timeline_following_only" || args[0] == "fetch_inbox") { args[0] == "fetch_timeline_following_only" || args[0] == "fetch_inbox" || args[0] == "get_bookmarks") {
// Doesn't need a target, so create a fake second arg // Doesn't need a target, so create a fake second arg
args = append(args, "") args = append(args, "")
} else { } else {
@ -167,6 +167,8 @@ func main() {
get_followers(target, *how_many) get_followers(target, *how_many)
case "get_followees": case "get_followees":
get_followees(target, *how_many) get_followees(target, *how_many)
case "get_bookmarks":
get_bookmarks(*how_many)
case "fetch_timeline": case "fetch_timeline":
fetch_timeline(false) // TODO: *how_many fetch_timeline(false) // TODO: *how_many
case "fetch_timeline_following_only": case "fetch_timeline_following_only":
@ -384,7 +386,18 @@ func get_followers(handle string, how_many int) {
happy_exit(fmt.Sprintf("Saved %d followers", len(trove.Users))) happy_exit(fmt.Sprintf("Saved %d followers", len(trove.Users)))
} }
func get_bookmarks(how_many int) {
trove, err := scraper.GetBookmarks(how_many)
if err != nil {
die(fmt.Sprintf("Error scraping bookmarks:\n %s", err.Error()), false, -2)
}
profile.SaveTweetTrove(trove, true)
happy_exit(fmt.Sprintf(
"Saved %d tweets, %d retweets, %d users, and %d bookmarks",
len(trove.Tweets), len(trove.Retweets), len(trove.Users), len(trove.Bookmarks)),
)
}
func fetch_timeline(is_following_only bool) { func fetch_timeline(is_following_only bool) {
trove, err := scraper.GetHomeTimeline("", is_following_only) trove, err := scraper.GetHomeTimeline("", is_following_only)
if err != nil { if err != nil {

View File

@ -0,0 +1,42 @@
package persistence
import (
"fmt"
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
)
func (p Profile) SaveBookmark(l scraper.Bookmark) error {
_, err := p.DB.NamedExec(`
insert into bookmarks (sort_order, user_id, tweet_id)
values (:sort_order, :user_id, :tweet_id)
on conflict do update set sort_order = max(sort_order, :sort_order)
`,
l,
)
if err != nil {
return fmt.Errorf("Error executing SaveBookmark(%#v):\n %w", l, err)
}
return nil
}
func (p Profile) DeleteBookmark(l scraper.Bookmark) error {
_, err := p.DB.NamedExec(`delete from bookmarks where user_id = :user_id and tweet_id = :tweet_id`, l)
if err != nil {
return fmt.Errorf("Error executing DeleteBookmark(%#v):\n %w", l, err)
}
return nil
}
func (p Profile) GetBookmarkBySortID(id scraper.BookmarkSortID) (scraper.Bookmark, error) {
var l scraper.Bookmark
err := p.DB.Get(&l, `
select sort_order, user_id, tweet_id
from bookmarks
where sort_order = ?
`, id)
if err != nil {
return l, fmt.Errorf("Error executing GetBookmarkBySortID(%d):\n %w", id, err)
}
return l, nil
}

View File

@ -0,0 +1,57 @@
package persistence_test
import (
"testing"
"github.com/stretchr/testify/require"
"github.com/go-test/deep"
)
func TestSaveAndLoadBookmark(t *testing.T) {
require := require.New(t)
profile_path := "test_profiles/TestBookmarksQueries"
profile := create_or_load_profile(profile_path)
bookmark := create_dummy_bookmark()
err := profile.SaveBookmark(bookmark)
require.NoError(err)
// Reload the Bookmark
new_bookmark, err := profile.GetBookmarkBySortID(bookmark.SortID)
require.NoError(err)
// Should come back the same
if diff := deep.Equal(bookmark, new_bookmark); diff != nil {
t.Error(diff)
}
// Test double-saving
err = profile.SaveBookmark(bookmark)
require.NoError(err)
new_bookmark, err = profile.GetBookmarkBySortID(bookmark.SortID)
require.NoError(err)
if diff := deep.Equal(bookmark, new_bookmark); diff != nil {
t.Error(diff)
}
}
func TestDeleteBookmark(t *testing.T) {
require := require.New(t)
profile_path := "test_profiles/TestBookmarksQueries"
profile := create_or_load_profile(profile_path)
bookmark := create_dummy_bookmark()
err := profile.SaveBookmark(bookmark)
require.NoError(err)
// Delete it
err = profile.DeleteBookmark(bookmark)
require.NoError(err)
// Should be gone
_, err = profile.GetBookmarkBySortID(bookmark.SortID)
require.Error(err)
}

View File

@ -18,10 +18,11 @@ const (
SORT_ORDER_MOST_LIKES SORT_ORDER_MOST_LIKES
SORT_ORDER_MOST_RETWEETS SORT_ORDER_MOST_RETWEETS
SORT_ORDER_LIKED_AT SORT_ORDER_LIKED_AT
SORT_ORDER_BOOKMARKED_AT
) )
func (o SortOrder) String() string { func (o SortOrder) String() string {
return []string{"newest", "oldest", "most likes", "most retweets", "liked at"}[o] return []string{"newest", "oldest", "most likes", "most retweets", "liked at", "bookmarked at"}[o]
} }
func SortOrderFromString(s string) (SortOrder, bool) { func SortOrderFromString(s string) (SortOrder, bool) {
@ -31,6 +32,7 @@ func SortOrderFromString(s string) (SortOrder, bool) {
"most likes": SORT_ORDER_MOST_LIKES, "most likes": SORT_ORDER_MOST_LIKES,
"most retweets": SORT_ORDER_MOST_RETWEETS, "most retweets": SORT_ORDER_MOST_RETWEETS,
"liked at": SORT_ORDER_LIKED_AT, "liked at": SORT_ORDER_LIKED_AT,
"bookmarked at": SORT_ORDER_BOOKMARKED_AT,
}[s] }[s]
return result, is_ok // Have to store as temporary variable b/c otherwise it interprets it as single-value and compile fails return result, is_ok // Have to store as temporary variable b/c otherwise it interprets it as single-value and compile fails
} }
@ -47,6 +49,8 @@ func (o SortOrder) OrderByClause() string {
return "order by num_retweets desc" return "order by num_retweets desc"
case SORT_ORDER_LIKED_AT: case SORT_ORDER_LIKED_AT:
return "order by likes_sort_order desc" return "order by likes_sort_order desc"
case SORT_ORDER_BOOKMARKED_AT:
return "order by bookmarks_sort_order desc"
default: default:
panic(fmt.Sprintf("Invalid sort order: %d", o)) panic(fmt.Sprintf("Invalid sort order: %d", o))
} }
@ -63,6 +67,8 @@ func (o SortOrder) PaginationWhereClause() string {
return "num_retweets < ?" return "num_retweets < ?"
case SORT_ORDER_LIKED_AT: case SORT_ORDER_LIKED_AT:
return "likes_sort_order < ?" return "likes_sort_order < ?"
case SORT_ORDER_BOOKMARKED_AT:
return "bookmarks_sort_order < ?"
default: default:
panic(fmt.Sprintf("Invalid sort order: %d", o)) panic(fmt.Sprintf("Invalid sort order: %d", o))
} }
@ -79,6 +85,8 @@ func (o SortOrder) NextCursorValue(r CursorResult) int {
return r.NumRetweets return r.NumRetweets
case SORT_ORDER_LIKED_AT: case SORT_ORDER_LIKED_AT:
return r.LikeSortOrder return r.LikeSortOrder
case SORT_ORDER_BOOKMARKED_AT:
return r.BookmarkSortOrder
default: default:
panic(fmt.Sprintf("Invalid sort order: %d", o)) panic(fmt.Sprintf("Invalid sort order: %d", o))
} }
@ -125,9 +133,10 @@ const (
type CursorResult struct { type CursorResult struct {
scraper.Tweet scraper.Tweet
scraper.Retweet scraper.Retweet
Chrono int `db:"chrono"` Chrono int `db:"chrono"`
LikeSortOrder int `db:"likes_sort_order"` LikeSortOrder int `db:"likes_sort_order"`
ByUserID scraper.UserID `db:"by_user_id"` BookmarkSortOrder int `db:"bookmarks_sort_order"`
ByUserID scraper.UserID `db:"by_user_id"`
} }
type Cursor struct { type Cursor struct {
@ -137,26 +146,27 @@ type Cursor struct {
PageSize int PageSize int
// Search params // Search params
Keywords []string Keywords []string
FromUserHandle scraper.UserHandle // Tweeted by this user FromUserHandle scraper.UserHandle // Tweeted by this user
RetweetedByUserHandle scraper.UserHandle // Retweeted by this user RetweetedByUserHandle scraper.UserHandle // Retweeted by this user
ByUserHandle scraper.UserHandle // Either tweeted or retweeted by this user ByUserHandle scraper.UserHandle // Either tweeted or retweeted by this user
ToUserHandles []scraper.UserHandle // In reply to these users ToUserHandles []scraper.UserHandle // In reply to these users
LikedByUserHandle scraper.UserHandle // Liked by this user LikedByUserHandle scraper.UserHandle // Liked by this user
ListID scraper.ListID // Either tweeted or retweeted by users from this List BookmarkedByUserHandle scraper.UserHandle // Bookmarked by this user
FollowedByUserHandle scraper.UserHandle // Either tweeted or retweeted by users followed by this user ListID scraper.ListID // Either tweeted or retweeted by users from this List
SinceTimestamp scraper.Timestamp FollowedByUserHandle scraper.UserHandle // Either tweeted or retweeted by users followed by this user
UntilTimestamp scraper.Timestamp SinceTimestamp scraper.Timestamp
TombstoneType string UntilTimestamp scraper.Timestamp
FilterLinks Filter TombstoneType string
FilterImages Filter FilterLinks Filter
FilterVideos Filter FilterImages Filter
FilterMedia Filter FilterVideos Filter
FilterPolls Filter FilterMedia Filter
FilterSpaces Filter FilterPolls Filter
FilterReplies Filter FilterSpaces Filter
FilterRetweets Filter FilterReplies Filter
FilterOfflineFollowed Filter FilterRetweets Filter
FilterOfflineFollowed Filter
} }
// Generate a cursor with some reasonable defaults // Generate a cursor with some reasonable defaults
@ -255,6 +265,22 @@ func NewUserFeedLikesCursor(h scraper.UserHandle) Cursor {
} }
} }
// Generate a cursor for a User's Bookmarks
func NewUserFeedBookmarksCursor(h scraper.UserHandle) Cursor {
return Cursor{
Keywords: []string{},
ToUserHandles: []scraper.UserHandle{},
SinceTimestamp: scraper.TimestampFromUnix(0),
UntilTimestamp: scraper.TimestampFromUnix(0),
CursorPosition: CURSOR_START,
CursorValue: 0,
SortOrder: SORT_ORDER_BOOKMARKED_AT,
PageSize: 50,
BookmarkedByUserHandle: h,
}
}
func NewCursorFromSearchQuery(q string) (Cursor, error) { func NewCursorFromSearchQuery(q string) (Cursor, error) {
ret := NewCursor() ret := NewCursor()
is_in_quotes := false is_in_quotes := false
@ -325,6 +351,8 @@ func (c *Cursor) apply_token(token string) error {
c.FilterRetweets = NONE // Clear the "exclude retweets" filter set by default in NewCursor c.FilterRetweets = NONE // Clear the "exclude retweets" filter set by default in NewCursor
case "liked_by": case "liked_by":
c.LikedByUserHandle = scraper.UserHandle(parts[1]) c.LikedByUserHandle = scraper.UserHandle(parts[1])
case "bookmarked_by":
c.BookmarkedByUserHandle = scraper.UserHandle(parts[1])
case "followed_by": case "followed_by":
c.FollowedByUserHandle = scraper.UserHandle(parts[1]) c.FollowedByUserHandle = scraper.UserHandle(parts[1])
case "list": case "list":
@ -513,6 +541,19 @@ func (p Profile) NextPage(c Cursor, current_user_id scraper.UserID) (Feed, error
where_clauses = append(where_clauses, "retweet_id = 0") where_clauses = append(where_clauses, "retweet_id = 0")
} }
bookmarked_by_filter_join_clause := ""
bookmarks_sort_order_field := ""
if c.BookmarkedByUserHandle != "" {
bookmarked_by_filter_join_clause = " join bookmarks filter_bookmarks on tweets.id = filter_bookmarks.tweet_id "
where_clauses = append(where_clauses, "filter_bookmarks.user_id = (select id from users where handle like ?) ")
bind_values = append(bind_values, c.BookmarkedByUserHandle)
bookmarks_sort_order_field = ", coalesce(filter_bookmarks.sort_order, -1) bookmarks_sort_order "
// Don't include retweets on "bookmarked by" searches because it doesn't distinguish which retweet
// version was the "bookmarked" one
where_clauses = append(where_clauses, "retweet_id = 0")
}
// Pagination // Pagination
if c.CursorPosition != CURSOR_START { if c.CursorPosition != CURSOR_START {
where_clauses = append(where_clauses, c.SortOrder.PaginationWhereClause()) where_clauses = append(where_clauses, c.SortOrder.PaginationWhereClause())
@ -525,21 +566,30 @@ func (p Profile) NextPage(c Cursor, current_user_id scraper.UserID) (Feed, error
where_clause = "where " + strings.Join(where_clauses, " and ") where_clause = "where " + strings.Join(where_clauses, " and ")
} }
// The Query:
// 1. Base query:
// a. Include "likes_sort_order" and "bookmarks_sort_order" fields, if they're in the filters
// b. Left join on "likes" table to get whether logged-in user has liked the tweet
// c. Left join on "likes" and "bookmarks" tables, if needed (i.e., if in the filters)
// d. Add 'where', 'order by', and (mildly unnecessary) 'limit' clauses
// 2. Two copies of the base query, one for "tweets" and one for "retweets", joined with "union"
// 3. Actual "limit" clause
q := `select * from ( q := `select * from (
select ` + TWEETS_ALL_SQL_FIELDS + likes_sort_order_field + `, select ` + TWEETS_ALL_SQL_FIELDS + likes_sort_order_field + bookmarks_sort_order_field + `,
0 tweet_id, 0 retweet_id, 0 retweeted_by, 0 retweeted_at, 0 tweet_id, 0 retweet_id, 0 retweeted_by, 0 retweeted_at,
posted_at chrono, tweets.user_id by_user_id posted_at chrono, tweets.user_id by_user_id
from tweets from tweets
left join tombstone_types on tweets.tombstone_type = tombstone_types.rowid left join tombstone_types on tweets.tombstone_type = tombstone_types.rowid
left join likes on tweets.id = likes.tweet_id and likes.user_id = ? left join likes on tweets.id = likes.tweet_id and likes.user_id = ?
` + liked_by_filter_join_clause + ` ` + liked_by_filter_join_clause + `
` + bookmarked_by_filter_join_clause + `
` + where_clause + ` ` + c.SortOrder.OrderByClause() + ` limit ? ` + where_clause + ` ` + c.SortOrder.OrderByClause() + ` limit ?
) )
union union
select * from ( select * from (
select ` + TWEETS_ALL_SQL_FIELDS + likes_sort_order_field + `, select ` + TWEETS_ALL_SQL_FIELDS + likes_sort_order_field + bookmarks_sort_order_field + `,
retweets.tweet_id, retweet_id, retweeted_by, retweeted_at, retweets.tweet_id, retweet_id, retweeted_by, retweeted_at,
retweeted_at chrono, retweeted_by by_user_id retweeted_at chrono, retweeted_by by_user_id
from retweets from retweets
@ -547,10 +597,9 @@ func (p Profile) NextPage(c Cursor, current_user_id scraper.UserID) (Feed, error
left join tombstone_types on tweets.tombstone_type = tombstone_types.rowid left join tombstone_types on tweets.tombstone_type = tombstone_types.rowid
left join likes on tweets.id = likes.tweet_id and likes.user_id = ? left join likes on tweets.id = likes.tweet_id and likes.user_id = ?
` + liked_by_filter_join_clause + ` ` + liked_by_filter_join_clause + `
` + where_clause + ` ` + bookmarked_by_filter_join_clause + `
` + c.SortOrder.OrderByClause() + ` ` + where_clause + ` ` + c.SortOrder.OrderByClause() + ` limit ?
limit ? )` + c.SortOrder.OrderByClause() + ` limit ?`
) ` + c.SortOrder.OrderByClause() + ` limit ?`
bind_values = append([]interface{}{current_user_id}, bind_values...) bind_values = append([]interface{}{current_user_id}, bind_values...)
bind_values = append(bind_values, c.PageSize) bind_values = append(bind_values, c.PageSize)

View File

@ -260,6 +260,21 @@ create index if not exists index_likes_user_id on likes (user_id);
create index if not exists index_likes_tweet_id on likes (tweet_id); create index if not exists index_likes_tweet_id on likes (tweet_id);
-- Bookmarks
------------
create table bookmarks(rowid integer primary key,
sort_order integer not null, -- Can't be unique because "-1" is used as "unknown" value
user_id integer not null,
tweet_id integer not null,
unique(user_id, tweet_id)
foreign key(tweet_id) references tweets(id)
foreign key(user_id) references users(id)
);
create index if not exists index_bookmarks_user_id on bookmarks (user_id);
create index if not exists index_bookmarks_tweet_id on bookmarks (tweet_id);
-- Direct Messages (DMs) -- Direct Messages (DMs)
-- --------------------- -- ---------------------

View File

@ -87,4 +87,11 @@ func (p Profile) SaveTweetTrove(trove TweetTrove, should_download bool) {
panic(fmt.Errorf("Error saving Like: %#v\n %w", l, err)) panic(fmt.Errorf("Error saving Like: %#v\n %w", l, err))
} }
} }
for _, b := range trove.Bookmarks {
err := p.SaveBookmark(b)
if err != nil {
panic(fmt.Errorf("Error saving Bookmark: %#v\n %w", b, err))
}
}
} }

View File

@ -304,6 +304,14 @@ func create_dummy_like() Like {
} }
} }
func create_dummy_bookmark() Bookmark {
return Bookmark{
TweetID: create_stable_tweet().ID,
UserID: create_stable_user().ID,
SortID: BookmarkSortID(12345),
}
}
func create_stable_chat_room() DMChatRoom { func create_stable_chat_room() DMChatRoom {
id := DMChatRoomID("some chat room ID") id := DMChatRoomID("some chat room ID")

View File

@ -297,6 +297,16 @@ var MIGRATIONS = []string{
foreign key(chat_message_id) references chat_messages(id) foreign key(chat_message_id) references chat_messages(id)
); );
create index if not exists index_chat_message_urls_chat_message_id on chat_message_urls (chat_message_id);`, create index if not exists index_chat_message_urls_chat_message_id on chat_message_urls (chat_message_id);`,
`create table bookmarks(rowid integer primary key,
sort_order integer not null, -- Can't be unique because "-1" is used as "unknown" value
user_id integer not null,
tweet_id integer not null,
unique(user_id, tweet_id)
foreign key(tweet_id) references tweets(id)
foreign key(user_id) references users(id)
);
create index if not exists index_bookmarks_user_id on bookmarks (user_id);
create index if not exists index_bookmarks_tweet_id on bookmarks (tweet_id);`,
} }
var ENGINE_DATABASE_VERSION = len(MIGRATIONS) var ENGINE_DATABASE_VERSION = len(MIGRATIONS)

View File

@ -74,6 +74,13 @@ type GraphqlFeatures struct {
// Spaces // Spaces
Spaces2022H2Clipping bool `json:"spaces_2022_h2_clipping,omitempty"` Spaces2022H2Clipping bool `json:"spaces_2022_h2_clipping,omitempty"`
Spaces2022H2SpacesCommunities bool `json:"spaces_2022_h2_spaces_communities,omitempty"` Spaces2022H2SpacesCommunities bool `json:"spaces_2022_h2_spaces_communities,omitempty"`
// Bookmarks
CommunitiesWebEnableTweetCommunityResultsFetch bool `json:"communities_web_enable_tweet_community_results_fetch,omitempty"`
RWebTipjarConsumptionEnabled bool `json:"rweb_tipjar_consumption_enabled,omitempty"`
ArticlesPreviewEnabled bool `json:"articles_preview_enabled,omitempty"`
GraphqlTimelineV2BookmarkTimeline bool `json:"graphql_timeline_v2_bookmark_timeline,omitempty"`
CreatorSubscriptionsQuoteTweetPreviewEnabled bool `json:"creator_subscriptions_quote_tweet_preview_enabled"`
} }
type GraphqlURL struct { type GraphqlURL struct {

View File

@ -599,6 +599,11 @@ type APIV2Response struct {
} `json:"timeline"` } `json:"timeline"`
} `json:"search_timeline"` } `json:"search_timeline"`
} `json:"search_by_raw_query"` } `json:"search_by_raw_query"`
BookmarkTimelineV2 struct {
Timeline struct {
Instructions []APIV2Instruction `json:"instructions"`
} `json:"timeline"`
} `json:"bookmark_timeline_v2"`
} `json:"data"` } `json:"data"`
Errors []struct { Errors []struct {
Message string `json:"message"` Message string `json:"message"`
@ -637,6 +642,12 @@ func (api_response APIV2Response) GetMainInstruction() *APIV2Instruction {
return &instructions[i] return &instructions[i]
} }
} }
instructions = api_response.Data.BookmarkTimelineV2.Timeline.Instructions
for i := range instructions {
if instructions[i].Type == "TimelineAddEntries" {
return &instructions[i]
}
}
panic("No 'TimelineAddEntries' found") panic("No 'TimelineAddEntries' found")
} }
@ -831,6 +842,45 @@ func (r APIV2Response) ToTweetTroveAsLikes() (TweetTrove, error) {
return ret, err return ret, err
} }
func (r APIV2Response) ToTweetTroveAsBookmarks() (TweetTrove, error) {
ret, err := r.ToTweetTrove()
if err != nil {
return ret, err
}
// Post-process tweets as Bookmarks
for _, entry := range r.GetMainInstruction().Entries {
// Skip cursors
if entry.Content.EntryType == "TimelineTimelineCursor" {
continue
}
// Assume it's not a TimelineModule or a Tombstone
if entry.Content.EntryType != "TimelineTimelineItem" {
panic(fmt.Sprintf("Unknown Bookmark entry type: %s", entry.Content.EntryType))
}
if entry.Content.ItemContent.ItemType == "TimelineTombstone" {
panic(fmt.Sprintf("Bookmarkd tweet is a tombstone: %#v", entry))
}
// Generate a "Bookmark" from the entry
tweet, is_ok := ret.Tweets[TweetID(entry.Content.ItemContent.TweetResults.Result._Result.ID)]
if !is_ok {
// For TweetWithVisibilityResults
tweet, is_ok = ret.Tweets[TweetID(entry.Content.ItemContent.TweetResults.Result.Tweet.ID)]
if !is_ok {
log.Warnf("ID: %d", entry.Content.ItemContent.TweetResults.Result._Result.ID)
log.Warnf("Entry JSON: %s", entry.OriginalJSON)
panic(ret.Tweets)
}
}
ret.Bookmarks[BookmarkSortID(entry.SortIndex)] = Bookmark{
SortID: BookmarkSortID(entry.SortIndex),
TweetID: tweet.ID,
}
}
return ret, err
}
type PaginatedQuery interface { type PaginatedQuery interface {
NextPage(api *API, cursor string) (APIV2Response, error) NextPage(api *API, cursor string) (APIV2Response, error)
ToTweetTrove(r APIV2Response) (TweetTrove, error) ToTweetTrove(r APIV2Response) (TweetTrove, error)
@ -1085,6 +1135,75 @@ func GetUserLikes(user_id UserID, how_many int) (TweetTrove, error) {
return the_api.GetPaginatedQuery(PaginatedUserLikes{user_id}, how_many) return the_api.GetPaginatedQuery(PaginatedUserLikes{user_id}, how_many)
} }
func (api *API) GetBookmarks(cursor string) (APIV2Response, error) {
url, err := url.Parse(GraphqlURL{
BaseUrl: "https://twitter.com/i/api/graphql/xLjCVTqYWz8CGSprLU349w/Bookmarks",
Variables: GraphqlVariables{
Count: 20,
Cursor: cursor,
IncludePromotedContent: false,
},
Features: GraphqlFeatures{
ResponsiveWebTwitterBlueVerifiedBadgeIsEnabled: true,
ResponsiveWebGraphqlTimelineNavigationEnabled: true,
UnifiedCardsAdMetadataContainerDynamicCardContentQueryEnabled: true,
TweetypieUnmentionOptimizationEnabled: true,
ResponsiveWebUcGqlEnabled: true,
VibeApiEnabled: true,
ResponsiveWebEditTweetApiEnabled: true,
GraphqlIsTranslatableRWebTweetIsTranslatableEnabled: true,
StandardizedNudgesMisinfo: true,
InteractiveTextEnabled: true,
ResponsiveWebEnhanceCardsEnabled: true,
TweetWithVisibilityResultsPreferGqlLimitedActionsPolicyEnabled: false,
ResponsiveWebTextConversationsEnabled: false,
VerifiedPhoneLabelEnabled: false,
CommunitiesWebEnableTweetCommunityResultsFetch: true,
RWebTipjarConsumptionEnabled: true,
ArticlesPreviewEnabled: true,
GraphqlTimelineV2BookmarkTimeline: true,
CreatorSubscriptionsQuoteTweetPreviewEnabled: false,
},
}.String())
if err != nil {
panic(err)
}
var response APIV2Response
err = api.do_http(url.String(), cursor, &response)
if err != nil {
panic(err)
}
return response, nil
}
type PaginatedBookmarks struct {
user_id UserID
}
func (p PaginatedBookmarks) NextPage(api *API, cursor string) (APIV2Response, error) {
return api.GetBookmarks(cursor)
}
func (p PaginatedBookmarks) ToTweetTrove(r APIV2Response) (TweetTrove, error) {
ret, err := r.ToTweetTroveAsBookmarks()
if err != nil {
return TweetTrove{}, err
}
// Fill out the bookmarking UserID
for i := range ret.Bookmarks {
l := ret.Bookmarks[i]
l.UserID = p.user_id
ret.Bookmarks[i] = l
}
return ret, nil
}
func GetBookmarks(how_many int) (TweetTrove, error) {
return the_api.GetPaginatedQuery(PaginatedBookmarks{the_api.UserID}, how_many)
}
func (api *API) GetHomeTimeline(cursor string, is_following_only bool) (TweetTrove, error) { func (api *API) GetHomeTimeline(cursor string, is_following_only bool) (TweetTrove, error) {
var url string var url string
body_struct := struct { body_struct := struct {

9
pkg/scraper/bookmark.go Normal file
View File

@ -0,0 +1,9 @@
package scraper
type BookmarkSortID int64
type Bookmark struct {
SortID BookmarkSortID `db:"sort_order"`
UserID UserID `db:"user_id"`
TweetID TweetID `db:"tweet_id"`
}

View File

@ -8,11 +8,12 @@ import (
) )
type TweetTrove struct { type TweetTrove struct {
Tweets map[TweetID]Tweet Tweets map[TweetID]Tweet
Users map[UserID]User Users map[UserID]User
Retweets map[TweetID]Retweet Retweets map[TweetID]Retweet
Spaces map[SpaceID]Space Spaces map[SpaceID]Space
Likes map[LikeSortID]Like Likes map[LikeSortID]Like
Bookmarks map[BookmarkSortID]Bookmark
TombstoneUsers []UserHandle TombstoneUsers []UserHandle
} }
@ -24,6 +25,7 @@ func NewTweetTrove() TweetTrove {
ret.Retweets = make(map[TweetID]Retweet) ret.Retweets = make(map[TweetID]Retweet)
ret.Spaces = make(map[SpaceID]Space) ret.Spaces = make(map[SpaceID]Space)
ret.Likes = make(map[LikeSortID]Like) ret.Likes = make(map[LikeSortID]Like)
ret.Bookmarks = make(map[BookmarkSortID]Bookmark)
ret.TombstoneUsers = []UserHandle{} ret.TombstoneUsers = []UserHandle{}
return ret return ret
} }
@ -59,6 +61,9 @@ func (t1 *TweetTrove) MergeWith(t2 TweetTrove) {
for id, val := range t2.Likes { for id, val := range t2.Likes {
t1.Likes[id] = val t1.Likes[id] = val
} }
for id, val := range t2.Bookmarks {
t1.Bookmarks[id] = val
}
t1.TombstoneUsers = append(t1.TombstoneUsers, t2.TombstoneUsers...) t1.TombstoneUsers = append(t1.TombstoneUsers, t2.TombstoneUsers...)
} }