Add scraping and saving of bookmarks
This commit is contained in:
parent
fc08fb0be4
commit
14a7f88cdb
@ -369,6 +369,10 @@ tw unlike_tweet https://twitter.com/elonmusk/status/1589023388676554753
|
|||||||
# TODO: implement deleting a Like
|
# TODO: implement deleting a Like
|
||||||
# test $(sqlite3 twitter.db "select count(*) from likes where tweet_id = 1589023388676554753 and user_id = (select id from users where handle like 'offline_twatter')") = "0"
|
# test $(sqlite3 twitter.db "select count(*) from likes where tweet_id = 1589023388676554753 and user_id = (select id from users where handle like 'offline_twatter')") = "0"
|
||||||
|
|
||||||
|
# Test fetching bookmarks
|
||||||
|
tw get_bookmarks
|
||||||
|
test $(sqlite3 twitter.db "select count(*) from bookmarks") -ge "2"
|
||||||
|
test $(sqlite3 twitter.db "select count(*) from bookmarks where tweet_id = 1762239926437843421") = "1"
|
||||||
|
|
||||||
# Test fetch inbox
|
# Test fetch inbox
|
||||||
test $(sqlite3 twitter.db "select count(*) from chat_rooms") = "0"
|
test $(sqlite3 twitter.db "select count(*) from chat_rooms") = "0"
|
||||||
@ -377,6 +381,7 @@ tw fetch_inbox
|
|||||||
test $(sqlite3 twitter.db "select count(*) from chat_rooms") -ge "1"
|
test $(sqlite3 twitter.db "select count(*) from chat_rooms") -ge "1"
|
||||||
test $(sqlite3 twitter.db "select count(*) from chat_messages where chat_room_id = '1458284524761075714-1488963321701171204'") -ge "5"
|
test $(sqlite3 twitter.db "select count(*) from chat_messages where chat_room_id = '1458284524761075714-1488963321701171204'") -ge "5"
|
||||||
|
|
||||||
|
|
||||||
# Test fetch a DM conversation
|
# Test fetch a DM conversation
|
||||||
tw fetch_dm "1458284524761075714-1488963321701171204"
|
tw fetch_dm "1458284524761075714-1488963321701171204"
|
||||||
|
|
||||||
|
@ -76,7 +76,7 @@ func main() {
|
|||||||
|
|
||||||
if len(args) < 2 {
|
if len(args) < 2 {
|
||||||
if len(args) == 1 && (args[0] == "list_followed" || args[0] == "webserver" || args[0] == "fetch_timeline" ||
|
if len(args) == 1 && (args[0] == "list_followed" || args[0] == "webserver" || args[0] == "fetch_timeline" ||
|
||||||
args[0] == "fetch_timeline_following_only" || args[0] == "fetch_inbox") {
|
args[0] == "fetch_timeline_following_only" || args[0] == "fetch_inbox" || args[0] == "get_bookmarks") {
|
||||||
// Doesn't need a target, so create a fake second arg
|
// Doesn't need a target, so create a fake second arg
|
||||||
args = append(args, "")
|
args = append(args, "")
|
||||||
} else {
|
} else {
|
||||||
@ -167,6 +167,8 @@ func main() {
|
|||||||
get_followers(target, *how_many)
|
get_followers(target, *how_many)
|
||||||
case "get_followees":
|
case "get_followees":
|
||||||
get_followees(target, *how_many)
|
get_followees(target, *how_many)
|
||||||
|
case "get_bookmarks":
|
||||||
|
get_bookmarks(*how_many)
|
||||||
case "fetch_timeline":
|
case "fetch_timeline":
|
||||||
fetch_timeline(false) // TODO: *how_many
|
fetch_timeline(false) // TODO: *how_many
|
||||||
case "fetch_timeline_following_only":
|
case "fetch_timeline_following_only":
|
||||||
@ -384,7 +386,18 @@ func get_followers(handle string, how_many int) {
|
|||||||
|
|
||||||
happy_exit(fmt.Sprintf("Saved %d followers", len(trove.Users)))
|
happy_exit(fmt.Sprintf("Saved %d followers", len(trove.Users)))
|
||||||
}
|
}
|
||||||
|
func get_bookmarks(how_many int) {
|
||||||
|
trove, err := scraper.GetBookmarks(how_many)
|
||||||
|
if err != nil {
|
||||||
|
die(fmt.Sprintf("Error scraping bookmarks:\n %s", err.Error()), false, -2)
|
||||||
|
}
|
||||||
|
profile.SaveTweetTrove(trove, true)
|
||||||
|
|
||||||
|
happy_exit(fmt.Sprintf(
|
||||||
|
"Saved %d tweets, %d retweets, %d users, and %d bookmarks",
|
||||||
|
len(trove.Tweets), len(trove.Retweets), len(trove.Users), len(trove.Bookmarks)),
|
||||||
|
)
|
||||||
|
}
|
||||||
func fetch_timeline(is_following_only bool) {
|
func fetch_timeline(is_following_only bool) {
|
||||||
trove, err := scraper.GetHomeTimeline("", is_following_only)
|
trove, err := scraper.GetHomeTimeline("", is_following_only)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
42
pkg/persistence/bookmark_queries.go
Normal file
42
pkg/persistence/bookmark_queries.go
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
package persistence
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||||
|
)
|
||||||
|
|
||||||
|
func (p Profile) SaveBookmark(l scraper.Bookmark) error {
|
||||||
|
_, err := p.DB.NamedExec(`
|
||||||
|
insert into bookmarks (sort_order, user_id, tweet_id)
|
||||||
|
values (:sort_order, :user_id, :tweet_id)
|
||||||
|
on conflict do update set sort_order = max(sort_order, :sort_order)
|
||||||
|
`,
|
||||||
|
l,
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("Error executing SaveBookmark(%#v):\n %w", l, err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p Profile) DeleteBookmark(l scraper.Bookmark) error {
|
||||||
|
_, err := p.DB.NamedExec(`delete from bookmarks where user_id = :user_id and tweet_id = :tweet_id`, l)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("Error executing DeleteBookmark(%#v):\n %w", l, err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p Profile) GetBookmarkBySortID(id scraper.BookmarkSortID) (scraper.Bookmark, error) {
|
||||||
|
var l scraper.Bookmark
|
||||||
|
err := p.DB.Get(&l, `
|
||||||
|
select sort_order, user_id, tweet_id
|
||||||
|
from bookmarks
|
||||||
|
where sort_order = ?
|
||||||
|
`, id)
|
||||||
|
if err != nil {
|
||||||
|
return l, fmt.Errorf("Error executing GetBookmarkBySortID(%d):\n %w", id, err)
|
||||||
|
}
|
||||||
|
return l, nil
|
||||||
|
}
|
57
pkg/persistence/bookmark_queries_test.go
Normal file
57
pkg/persistence/bookmark_queries_test.go
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
package persistence_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
|
"github.com/go-test/deep"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestSaveAndLoadBookmark(t *testing.T) {
|
||||||
|
require := require.New(t)
|
||||||
|
|
||||||
|
profile_path := "test_profiles/TestBookmarksQueries"
|
||||||
|
profile := create_or_load_profile(profile_path)
|
||||||
|
|
||||||
|
bookmark := create_dummy_bookmark()
|
||||||
|
err := profile.SaveBookmark(bookmark)
|
||||||
|
require.NoError(err)
|
||||||
|
|
||||||
|
// Reload the Bookmark
|
||||||
|
new_bookmark, err := profile.GetBookmarkBySortID(bookmark.SortID)
|
||||||
|
require.NoError(err)
|
||||||
|
|
||||||
|
// Should come back the same
|
||||||
|
if diff := deep.Equal(bookmark, new_bookmark); diff != nil {
|
||||||
|
t.Error(diff)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test double-saving
|
||||||
|
err = profile.SaveBookmark(bookmark)
|
||||||
|
require.NoError(err)
|
||||||
|
new_bookmark, err = profile.GetBookmarkBySortID(bookmark.SortID)
|
||||||
|
require.NoError(err)
|
||||||
|
if diff := deep.Equal(bookmark, new_bookmark); diff != nil {
|
||||||
|
t.Error(diff)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDeleteBookmark(t *testing.T) {
|
||||||
|
require := require.New(t)
|
||||||
|
|
||||||
|
profile_path := "test_profiles/TestBookmarksQueries"
|
||||||
|
profile := create_or_load_profile(profile_path)
|
||||||
|
|
||||||
|
bookmark := create_dummy_bookmark()
|
||||||
|
err := profile.SaveBookmark(bookmark)
|
||||||
|
require.NoError(err)
|
||||||
|
|
||||||
|
// Delete it
|
||||||
|
err = profile.DeleteBookmark(bookmark)
|
||||||
|
require.NoError(err)
|
||||||
|
|
||||||
|
// Should be gone
|
||||||
|
_, err = profile.GetBookmarkBySortID(bookmark.SortID)
|
||||||
|
require.Error(err)
|
||||||
|
}
|
@ -18,10 +18,11 @@ const (
|
|||||||
SORT_ORDER_MOST_LIKES
|
SORT_ORDER_MOST_LIKES
|
||||||
SORT_ORDER_MOST_RETWEETS
|
SORT_ORDER_MOST_RETWEETS
|
||||||
SORT_ORDER_LIKED_AT
|
SORT_ORDER_LIKED_AT
|
||||||
|
SORT_ORDER_BOOKMARKED_AT
|
||||||
)
|
)
|
||||||
|
|
||||||
func (o SortOrder) String() string {
|
func (o SortOrder) String() string {
|
||||||
return []string{"newest", "oldest", "most likes", "most retweets", "liked at"}[o]
|
return []string{"newest", "oldest", "most likes", "most retweets", "liked at", "bookmarked at"}[o]
|
||||||
}
|
}
|
||||||
|
|
||||||
func SortOrderFromString(s string) (SortOrder, bool) {
|
func SortOrderFromString(s string) (SortOrder, bool) {
|
||||||
@ -31,6 +32,7 @@ func SortOrderFromString(s string) (SortOrder, bool) {
|
|||||||
"most likes": SORT_ORDER_MOST_LIKES,
|
"most likes": SORT_ORDER_MOST_LIKES,
|
||||||
"most retweets": SORT_ORDER_MOST_RETWEETS,
|
"most retweets": SORT_ORDER_MOST_RETWEETS,
|
||||||
"liked at": SORT_ORDER_LIKED_AT,
|
"liked at": SORT_ORDER_LIKED_AT,
|
||||||
|
"bookmarked at": SORT_ORDER_BOOKMARKED_AT,
|
||||||
}[s]
|
}[s]
|
||||||
return result, is_ok // Have to store as temporary variable b/c otherwise it interprets it as single-value and compile fails
|
return result, is_ok // Have to store as temporary variable b/c otherwise it interprets it as single-value and compile fails
|
||||||
}
|
}
|
||||||
@ -47,6 +49,8 @@ func (o SortOrder) OrderByClause() string {
|
|||||||
return "order by num_retweets desc"
|
return "order by num_retweets desc"
|
||||||
case SORT_ORDER_LIKED_AT:
|
case SORT_ORDER_LIKED_AT:
|
||||||
return "order by likes_sort_order desc"
|
return "order by likes_sort_order desc"
|
||||||
|
case SORT_ORDER_BOOKMARKED_AT:
|
||||||
|
return "order by bookmarks_sort_order desc"
|
||||||
default:
|
default:
|
||||||
panic(fmt.Sprintf("Invalid sort order: %d", o))
|
panic(fmt.Sprintf("Invalid sort order: %d", o))
|
||||||
}
|
}
|
||||||
@ -63,6 +67,8 @@ func (o SortOrder) PaginationWhereClause() string {
|
|||||||
return "num_retweets < ?"
|
return "num_retweets < ?"
|
||||||
case SORT_ORDER_LIKED_AT:
|
case SORT_ORDER_LIKED_AT:
|
||||||
return "likes_sort_order < ?"
|
return "likes_sort_order < ?"
|
||||||
|
case SORT_ORDER_BOOKMARKED_AT:
|
||||||
|
return "bookmarks_sort_order < ?"
|
||||||
default:
|
default:
|
||||||
panic(fmt.Sprintf("Invalid sort order: %d", o))
|
panic(fmt.Sprintf("Invalid sort order: %d", o))
|
||||||
}
|
}
|
||||||
@ -79,6 +85,8 @@ func (o SortOrder) NextCursorValue(r CursorResult) int {
|
|||||||
return r.NumRetweets
|
return r.NumRetweets
|
||||||
case SORT_ORDER_LIKED_AT:
|
case SORT_ORDER_LIKED_AT:
|
||||||
return r.LikeSortOrder
|
return r.LikeSortOrder
|
||||||
|
case SORT_ORDER_BOOKMARKED_AT:
|
||||||
|
return r.BookmarkSortOrder
|
||||||
default:
|
default:
|
||||||
panic(fmt.Sprintf("Invalid sort order: %d", o))
|
panic(fmt.Sprintf("Invalid sort order: %d", o))
|
||||||
}
|
}
|
||||||
@ -127,6 +135,7 @@ type CursorResult struct {
|
|||||||
scraper.Retweet
|
scraper.Retweet
|
||||||
Chrono int `db:"chrono"`
|
Chrono int `db:"chrono"`
|
||||||
LikeSortOrder int `db:"likes_sort_order"`
|
LikeSortOrder int `db:"likes_sort_order"`
|
||||||
|
BookmarkSortOrder int `db:"bookmarks_sort_order"`
|
||||||
ByUserID scraper.UserID `db:"by_user_id"`
|
ByUserID scraper.UserID `db:"by_user_id"`
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -143,6 +152,7 @@ type Cursor struct {
|
|||||||
ByUserHandle scraper.UserHandle // Either tweeted or retweeted by this user
|
ByUserHandle scraper.UserHandle // Either tweeted or retweeted by this user
|
||||||
ToUserHandles []scraper.UserHandle // In reply to these users
|
ToUserHandles []scraper.UserHandle // In reply to these users
|
||||||
LikedByUserHandle scraper.UserHandle // Liked by this user
|
LikedByUserHandle scraper.UserHandle // Liked by this user
|
||||||
|
BookmarkedByUserHandle scraper.UserHandle // Bookmarked by this user
|
||||||
ListID scraper.ListID // Either tweeted or retweeted by users from this List
|
ListID scraper.ListID // Either tweeted or retweeted by users from this List
|
||||||
FollowedByUserHandle scraper.UserHandle // Either tweeted or retweeted by users followed by this user
|
FollowedByUserHandle scraper.UserHandle // Either tweeted or retweeted by users followed by this user
|
||||||
SinceTimestamp scraper.Timestamp
|
SinceTimestamp scraper.Timestamp
|
||||||
@ -255,6 +265,22 @@ func NewUserFeedLikesCursor(h scraper.UserHandle) Cursor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Generate a cursor for a User's Bookmarks
|
||||||
|
func NewUserFeedBookmarksCursor(h scraper.UserHandle) Cursor {
|
||||||
|
return Cursor{
|
||||||
|
Keywords: []string{},
|
||||||
|
ToUserHandles: []scraper.UserHandle{},
|
||||||
|
SinceTimestamp: scraper.TimestampFromUnix(0),
|
||||||
|
UntilTimestamp: scraper.TimestampFromUnix(0),
|
||||||
|
CursorPosition: CURSOR_START,
|
||||||
|
CursorValue: 0,
|
||||||
|
SortOrder: SORT_ORDER_BOOKMARKED_AT,
|
||||||
|
PageSize: 50,
|
||||||
|
|
||||||
|
BookmarkedByUserHandle: h,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func NewCursorFromSearchQuery(q string) (Cursor, error) {
|
func NewCursorFromSearchQuery(q string) (Cursor, error) {
|
||||||
ret := NewCursor()
|
ret := NewCursor()
|
||||||
is_in_quotes := false
|
is_in_quotes := false
|
||||||
@ -325,6 +351,8 @@ func (c *Cursor) apply_token(token string) error {
|
|||||||
c.FilterRetweets = NONE // Clear the "exclude retweets" filter set by default in NewCursor
|
c.FilterRetweets = NONE // Clear the "exclude retweets" filter set by default in NewCursor
|
||||||
case "liked_by":
|
case "liked_by":
|
||||||
c.LikedByUserHandle = scraper.UserHandle(parts[1])
|
c.LikedByUserHandle = scraper.UserHandle(parts[1])
|
||||||
|
case "bookmarked_by":
|
||||||
|
c.BookmarkedByUserHandle = scraper.UserHandle(parts[1])
|
||||||
case "followed_by":
|
case "followed_by":
|
||||||
c.FollowedByUserHandle = scraper.UserHandle(parts[1])
|
c.FollowedByUserHandle = scraper.UserHandle(parts[1])
|
||||||
case "list":
|
case "list":
|
||||||
@ -513,6 +541,19 @@ func (p Profile) NextPage(c Cursor, current_user_id scraper.UserID) (Feed, error
|
|||||||
where_clauses = append(where_clauses, "retweet_id = 0")
|
where_clauses = append(where_clauses, "retweet_id = 0")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bookmarked_by_filter_join_clause := ""
|
||||||
|
bookmarks_sort_order_field := ""
|
||||||
|
if c.BookmarkedByUserHandle != "" {
|
||||||
|
bookmarked_by_filter_join_clause = " join bookmarks filter_bookmarks on tweets.id = filter_bookmarks.tweet_id "
|
||||||
|
where_clauses = append(where_clauses, "filter_bookmarks.user_id = (select id from users where handle like ?) ")
|
||||||
|
bind_values = append(bind_values, c.BookmarkedByUserHandle)
|
||||||
|
bookmarks_sort_order_field = ", coalesce(filter_bookmarks.sort_order, -1) bookmarks_sort_order "
|
||||||
|
|
||||||
|
// Don't include retweets on "bookmarked by" searches because it doesn't distinguish which retweet
|
||||||
|
// version was the "bookmarked" one
|
||||||
|
where_clauses = append(where_clauses, "retweet_id = 0")
|
||||||
|
}
|
||||||
|
|
||||||
// Pagination
|
// Pagination
|
||||||
if c.CursorPosition != CURSOR_START {
|
if c.CursorPosition != CURSOR_START {
|
||||||
where_clauses = append(where_clauses, c.SortOrder.PaginationWhereClause())
|
where_clauses = append(where_clauses, c.SortOrder.PaginationWhereClause())
|
||||||
@ -525,21 +566,30 @@ func (p Profile) NextPage(c Cursor, current_user_id scraper.UserID) (Feed, error
|
|||||||
where_clause = "where " + strings.Join(where_clauses, " and ")
|
where_clause = "where " + strings.Join(where_clauses, " and ")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The Query:
|
||||||
|
// 1. Base query:
|
||||||
|
// a. Include "likes_sort_order" and "bookmarks_sort_order" fields, if they're in the filters
|
||||||
|
// b. Left join on "likes" table to get whether logged-in user has liked the tweet
|
||||||
|
// c. Left join on "likes" and "bookmarks" tables, if needed (i.e., if in the filters)
|
||||||
|
// d. Add 'where', 'order by', and (mildly unnecessary) 'limit' clauses
|
||||||
|
// 2. Two copies of the base query, one for "tweets" and one for "retweets", joined with "union"
|
||||||
|
// 3. Actual "limit" clause
|
||||||
q := `select * from (
|
q := `select * from (
|
||||||
select ` + TWEETS_ALL_SQL_FIELDS + likes_sort_order_field + `,
|
select ` + TWEETS_ALL_SQL_FIELDS + likes_sort_order_field + bookmarks_sort_order_field + `,
|
||||||
0 tweet_id, 0 retweet_id, 0 retweeted_by, 0 retweeted_at,
|
0 tweet_id, 0 retweet_id, 0 retweeted_by, 0 retweeted_at,
|
||||||
posted_at chrono, tweets.user_id by_user_id
|
posted_at chrono, tweets.user_id by_user_id
|
||||||
from tweets
|
from tweets
|
||||||
left join tombstone_types on tweets.tombstone_type = tombstone_types.rowid
|
left join tombstone_types on tweets.tombstone_type = tombstone_types.rowid
|
||||||
left join likes on tweets.id = likes.tweet_id and likes.user_id = ?
|
left join likes on tweets.id = likes.tweet_id and likes.user_id = ?
|
||||||
` + liked_by_filter_join_clause + `
|
` + liked_by_filter_join_clause + `
|
||||||
|
` + bookmarked_by_filter_join_clause + `
|
||||||
` + where_clause + ` ` + c.SortOrder.OrderByClause() + ` limit ?
|
` + where_clause + ` ` + c.SortOrder.OrderByClause() + ` limit ?
|
||||||
)
|
)
|
||||||
|
|
||||||
union
|
union
|
||||||
|
|
||||||
select * from (
|
select * from (
|
||||||
select ` + TWEETS_ALL_SQL_FIELDS + likes_sort_order_field + `,
|
select ` + TWEETS_ALL_SQL_FIELDS + likes_sort_order_field + bookmarks_sort_order_field + `,
|
||||||
retweets.tweet_id, retweet_id, retweeted_by, retweeted_at,
|
retweets.tweet_id, retweet_id, retweeted_by, retweeted_at,
|
||||||
retweeted_at chrono, retweeted_by by_user_id
|
retweeted_at chrono, retweeted_by by_user_id
|
||||||
from retweets
|
from retweets
|
||||||
@ -547,9 +597,8 @@ func (p Profile) NextPage(c Cursor, current_user_id scraper.UserID) (Feed, error
|
|||||||
left join tombstone_types on tweets.tombstone_type = tombstone_types.rowid
|
left join tombstone_types on tweets.tombstone_type = tombstone_types.rowid
|
||||||
left join likes on tweets.id = likes.tweet_id and likes.user_id = ?
|
left join likes on tweets.id = likes.tweet_id and likes.user_id = ?
|
||||||
` + liked_by_filter_join_clause + `
|
` + liked_by_filter_join_clause + `
|
||||||
` + where_clause + `
|
` + bookmarked_by_filter_join_clause + `
|
||||||
` + c.SortOrder.OrderByClause() + `
|
` + where_clause + ` ` + c.SortOrder.OrderByClause() + ` limit ?
|
||||||
limit ?
|
|
||||||
)` + c.SortOrder.OrderByClause() + ` limit ?`
|
)` + c.SortOrder.OrderByClause() + ` limit ?`
|
||||||
|
|
||||||
bind_values = append([]interface{}{current_user_id}, bind_values...)
|
bind_values = append([]interface{}{current_user_id}, bind_values...)
|
||||||
|
@ -260,6 +260,21 @@ create index if not exists index_likes_user_id on likes (user_id);
|
|||||||
create index if not exists index_likes_tweet_id on likes (tweet_id);
|
create index if not exists index_likes_tweet_id on likes (tweet_id);
|
||||||
|
|
||||||
|
|
||||||
|
-- Bookmarks
|
||||||
|
------------
|
||||||
|
|
||||||
|
create table bookmarks(rowid integer primary key,
|
||||||
|
sort_order integer not null, -- Can't be unique because "-1" is used as "unknown" value
|
||||||
|
user_id integer not null,
|
||||||
|
tweet_id integer not null,
|
||||||
|
unique(user_id, tweet_id)
|
||||||
|
foreign key(tweet_id) references tweets(id)
|
||||||
|
foreign key(user_id) references users(id)
|
||||||
|
);
|
||||||
|
create index if not exists index_bookmarks_user_id on bookmarks (user_id);
|
||||||
|
create index if not exists index_bookmarks_tweet_id on bookmarks (tweet_id);
|
||||||
|
|
||||||
|
|
||||||
-- Direct Messages (DMs)
|
-- Direct Messages (DMs)
|
||||||
-- ---------------------
|
-- ---------------------
|
||||||
|
|
||||||
|
@ -87,4 +87,11 @@ func (p Profile) SaveTweetTrove(trove TweetTrove, should_download bool) {
|
|||||||
panic(fmt.Errorf("Error saving Like: %#v\n %w", l, err))
|
panic(fmt.Errorf("Error saving Like: %#v\n %w", l, err))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for _, b := range trove.Bookmarks {
|
||||||
|
err := p.SaveBookmark(b)
|
||||||
|
if err != nil {
|
||||||
|
panic(fmt.Errorf("Error saving Bookmark: %#v\n %w", b, err))
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -304,6 +304,14 @@ func create_dummy_like() Like {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func create_dummy_bookmark() Bookmark {
|
||||||
|
return Bookmark{
|
||||||
|
TweetID: create_stable_tweet().ID,
|
||||||
|
UserID: create_stable_user().ID,
|
||||||
|
SortID: BookmarkSortID(12345),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func create_stable_chat_room() DMChatRoom {
|
func create_stable_chat_room() DMChatRoom {
|
||||||
id := DMChatRoomID("some chat room ID")
|
id := DMChatRoomID("some chat room ID")
|
||||||
|
|
||||||
|
@ -297,6 +297,16 @@ var MIGRATIONS = []string{
|
|||||||
foreign key(chat_message_id) references chat_messages(id)
|
foreign key(chat_message_id) references chat_messages(id)
|
||||||
);
|
);
|
||||||
create index if not exists index_chat_message_urls_chat_message_id on chat_message_urls (chat_message_id);`,
|
create index if not exists index_chat_message_urls_chat_message_id on chat_message_urls (chat_message_id);`,
|
||||||
|
`create table bookmarks(rowid integer primary key,
|
||||||
|
sort_order integer not null, -- Can't be unique because "-1" is used as "unknown" value
|
||||||
|
user_id integer not null,
|
||||||
|
tweet_id integer not null,
|
||||||
|
unique(user_id, tweet_id)
|
||||||
|
foreign key(tweet_id) references tweets(id)
|
||||||
|
foreign key(user_id) references users(id)
|
||||||
|
);
|
||||||
|
create index if not exists index_bookmarks_user_id on bookmarks (user_id);
|
||||||
|
create index if not exists index_bookmarks_tweet_id on bookmarks (tweet_id);`,
|
||||||
}
|
}
|
||||||
var ENGINE_DATABASE_VERSION = len(MIGRATIONS)
|
var ENGINE_DATABASE_VERSION = len(MIGRATIONS)
|
||||||
|
|
||||||
|
@ -74,6 +74,13 @@ type GraphqlFeatures struct {
|
|||||||
// Spaces
|
// Spaces
|
||||||
Spaces2022H2Clipping bool `json:"spaces_2022_h2_clipping,omitempty"`
|
Spaces2022H2Clipping bool `json:"spaces_2022_h2_clipping,omitempty"`
|
||||||
Spaces2022H2SpacesCommunities bool `json:"spaces_2022_h2_spaces_communities,omitempty"`
|
Spaces2022H2SpacesCommunities bool `json:"spaces_2022_h2_spaces_communities,omitempty"`
|
||||||
|
|
||||||
|
// Bookmarks
|
||||||
|
CommunitiesWebEnableTweetCommunityResultsFetch bool `json:"communities_web_enable_tweet_community_results_fetch,omitempty"`
|
||||||
|
RWebTipjarConsumptionEnabled bool `json:"rweb_tipjar_consumption_enabled,omitempty"`
|
||||||
|
ArticlesPreviewEnabled bool `json:"articles_preview_enabled,omitempty"`
|
||||||
|
GraphqlTimelineV2BookmarkTimeline bool `json:"graphql_timeline_v2_bookmark_timeline,omitempty"`
|
||||||
|
CreatorSubscriptionsQuoteTweetPreviewEnabled bool `json:"creator_subscriptions_quote_tweet_preview_enabled"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type GraphqlURL struct {
|
type GraphqlURL struct {
|
||||||
|
@ -599,6 +599,11 @@ type APIV2Response struct {
|
|||||||
} `json:"timeline"`
|
} `json:"timeline"`
|
||||||
} `json:"search_timeline"`
|
} `json:"search_timeline"`
|
||||||
} `json:"search_by_raw_query"`
|
} `json:"search_by_raw_query"`
|
||||||
|
BookmarkTimelineV2 struct {
|
||||||
|
Timeline struct {
|
||||||
|
Instructions []APIV2Instruction `json:"instructions"`
|
||||||
|
} `json:"timeline"`
|
||||||
|
} `json:"bookmark_timeline_v2"`
|
||||||
} `json:"data"`
|
} `json:"data"`
|
||||||
Errors []struct {
|
Errors []struct {
|
||||||
Message string `json:"message"`
|
Message string `json:"message"`
|
||||||
@ -637,6 +642,12 @@ func (api_response APIV2Response) GetMainInstruction() *APIV2Instruction {
|
|||||||
return &instructions[i]
|
return &instructions[i]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
instructions = api_response.Data.BookmarkTimelineV2.Timeline.Instructions
|
||||||
|
for i := range instructions {
|
||||||
|
if instructions[i].Type == "TimelineAddEntries" {
|
||||||
|
return &instructions[i]
|
||||||
|
}
|
||||||
|
}
|
||||||
panic("No 'TimelineAddEntries' found")
|
panic("No 'TimelineAddEntries' found")
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -831,6 +842,45 @@ func (r APIV2Response) ToTweetTroveAsLikes() (TweetTrove, error) {
|
|||||||
return ret, err
|
return ret, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (r APIV2Response) ToTweetTroveAsBookmarks() (TweetTrove, error) {
|
||||||
|
ret, err := r.ToTweetTrove()
|
||||||
|
if err != nil {
|
||||||
|
return ret, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Post-process tweets as Bookmarks
|
||||||
|
for _, entry := range r.GetMainInstruction().Entries {
|
||||||
|
// Skip cursors
|
||||||
|
if entry.Content.EntryType == "TimelineTimelineCursor" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Assume it's not a TimelineModule or a Tombstone
|
||||||
|
if entry.Content.EntryType != "TimelineTimelineItem" {
|
||||||
|
panic(fmt.Sprintf("Unknown Bookmark entry type: %s", entry.Content.EntryType))
|
||||||
|
}
|
||||||
|
if entry.Content.ItemContent.ItemType == "TimelineTombstone" {
|
||||||
|
panic(fmt.Sprintf("Bookmarkd tweet is a tombstone: %#v", entry))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate a "Bookmark" from the entry
|
||||||
|
tweet, is_ok := ret.Tweets[TweetID(entry.Content.ItemContent.TweetResults.Result._Result.ID)]
|
||||||
|
if !is_ok {
|
||||||
|
// For TweetWithVisibilityResults
|
||||||
|
tweet, is_ok = ret.Tweets[TweetID(entry.Content.ItemContent.TweetResults.Result.Tweet.ID)]
|
||||||
|
if !is_ok {
|
||||||
|
log.Warnf("ID: %d", entry.Content.ItemContent.TweetResults.Result._Result.ID)
|
||||||
|
log.Warnf("Entry JSON: %s", entry.OriginalJSON)
|
||||||
|
panic(ret.Tweets)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ret.Bookmarks[BookmarkSortID(entry.SortIndex)] = Bookmark{
|
||||||
|
SortID: BookmarkSortID(entry.SortIndex),
|
||||||
|
TweetID: tweet.ID,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ret, err
|
||||||
|
}
|
||||||
|
|
||||||
type PaginatedQuery interface {
|
type PaginatedQuery interface {
|
||||||
NextPage(api *API, cursor string) (APIV2Response, error)
|
NextPage(api *API, cursor string) (APIV2Response, error)
|
||||||
ToTweetTrove(r APIV2Response) (TweetTrove, error)
|
ToTweetTrove(r APIV2Response) (TweetTrove, error)
|
||||||
@ -1085,6 +1135,75 @@ func GetUserLikes(user_id UserID, how_many int) (TweetTrove, error) {
|
|||||||
return the_api.GetPaginatedQuery(PaginatedUserLikes{user_id}, how_many)
|
return the_api.GetPaginatedQuery(PaginatedUserLikes{user_id}, how_many)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (api *API) GetBookmarks(cursor string) (APIV2Response, error) {
|
||||||
|
url, err := url.Parse(GraphqlURL{
|
||||||
|
BaseUrl: "https://twitter.com/i/api/graphql/xLjCVTqYWz8CGSprLU349w/Bookmarks",
|
||||||
|
Variables: GraphqlVariables{
|
||||||
|
Count: 20,
|
||||||
|
Cursor: cursor,
|
||||||
|
IncludePromotedContent: false,
|
||||||
|
},
|
||||||
|
Features: GraphqlFeatures{
|
||||||
|
ResponsiveWebTwitterBlueVerifiedBadgeIsEnabled: true,
|
||||||
|
ResponsiveWebGraphqlTimelineNavigationEnabled: true,
|
||||||
|
UnifiedCardsAdMetadataContainerDynamicCardContentQueryEnabled: true,
|
||||||
|
TweetypieUnmentionOptimizationEnabled: true,
|
||||||
|
ResponsiveWebUcGqlEnabled: true,
|
||||||
|
VibeApiEnabled: true,
|
||||||
|
ResponsiveWebEditTweetApiEnabled: true,
|
||||||
|
GraphqlIsTranslatableRWebTweetIsTranslatableEnabled: true,
|
||||||
|
StandardizedNudgesMisinfo: true,
|
||||||
|
InteractiveTextEnabled: true,
|
||||||
|
ResponsiveWebEnhanceCardsEnabled: true,
|
||||||
|
TweetWithVisibilityResultsPreferGqlLimitedActionsPolicyEnabled: false,
|
||||||
|
ResponsiveWebTextConversationsEnabled: false,
|
||||||
|
VerifiedPhoneLabelEnabled: false,
|
||||||
|
|
||||||
|
CommunitiesWebEnableTweetCommunityResultsFetch: true,
|
||||||
|
RWebTipjarConsumptionEnabled: true,
|
||||||
|
ArticlesPreviewEnabled: true,
|
||||||
|
GraphqlTimelineV2BookmarkTimeline: true,
|
||||||
|
CreatorSubscriptionsQuoteTweetPreviewEnabled: false,
|
||||||
|
},
|
||||||
|
}.String())
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var response APIV2Response
|
||||||
|
err = api.do_http(url.String(), cursor, &response)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
return response, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type PaginatedBookmarks struct {
|
||||||
|
user_id UserID
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p PaginatedBookmarks) NextPage(api *API, cursor string) (APIV2Response, error) {
|
||||||
|
return api.GetBookmarks(cursor)
|
||||||
|
}
|
||||||
|
func (p PaginatedBookmarks) ToTweetTrove(r APIV2Response) (TweetTrove, error) {
|
||||||
|
ret, err := r.ToTweetTroveAsBookmarks()
|
||||||
|
if err != nil {
|
||||||
|
return TweetTrove{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fill out the bookmarking UserID
|
||||||
|
for i := range ret.Bookmarks {
|
||||||
|
l := ret.Bookmarks[i]
|
||||||
|
l.UserID = p.user_id
|
||||||
|
ret.Bookmarks[i] = l
|
||||||
|
}
|
||||||
|
return ret, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetBookmarks(how_many int) (TweetTrove, error) {
|
||||||
|
return the_api.GetPaginatedQuery(PaginatedBookmarks{the_api.UserID}, how_many)
|
||||||
|
}
|
||||||
|
|
||||||
func (api *API) GetHomeTimeline(cursor string, is_following_only bool) (TweetTrove, error) {
|
func (api *API) GetHomeTimeline(cursor string, is_following_only bool) (TweetTrove, error) {
|
||||||
var url string
|
var url string
|
||||||
body_struct := struct {
|
body_struct := struct {
|
||||||
|
9
pkg/scraper/bookmark.go
Normal file
9
pkg/scraper/bookmark.go
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
package scraper
|
||||||
|
|
||||||
|
type BookmarkSortID int64
|
||||||
|
|
||||||
|
type Bookmark struct {
|
||||||
|
SortID BookmarkSortID `db:"sort_order"`
|
||||||
|
UserID UserID `db:"user_id"`
|
||||||
|
TweetID TweetID `db:"tweet_id"`
|
||||||
|
}
|
@ -13,6 +13,7 @@ type TweetTrove struct {
|
|||||||
Retweets map[TweetID]Retweet
|
Retweets map[TweetID]Retweet
|
||||||
Spaces map[SpaceID]Space
|
Spaces map[SpaceID]Space
|
||||||
Likes map[LikeSortID]Like
|
Likes map[LikeSortID]Like
|
||||||
|
Bookmarks map[BookmarkSortID]Bookmark
|
||||||
|
|
||||||
TombstoneUsers []UserHandle
|
TombstoneUsers []UserHandle
|
||||||
}
|
}
|
||||||
@ -24,6 +25,7 @@ func NewTweetTrove() TweetTrove {
|
|||||||
ret.Retweets = make(map[TweetID]Retweet)
|
ret.Retweets = make(map[TweetID]Retweet)
|
||||||
ret.Spaces = make(map[SpaceID]Space)
|
ret.Spaces = make(map[SpaceID]Space)
|
||||||
ret.Likes = make(map[LikeSortID]Like)
|
ret.Likes = make(map[LikeSortID]Like)
|
||||||
|
ret.Bookmarks = make(map[BookmarkSortID]Bookmark)
|
||||||
ret.TombstoneUsers = []UserHandle{}
|
ret.TombstoneUsers = []UserHandle{}
|
||||||
return ret
|
return ret
|
||||||
}
|
}
|
||||||
@ -59,6 +61,9 @@ func (t1 *TweetTrove) MergeWith(t2 TweetTrove) {
|
|||||||
for id, val := range t2.Likes {
|
for id, val := range t2.Likes {
|
||||||
t1.Likes[id] = val
|
t1.Likes[id] = val
|
||||||
}
|
}
|
||||||
|
for id, val := range t2.Bookmarks {
|
||||||
|
t1.Bookmarks[id] = val
|
||||||
|
}
|
||||||
|
|
||||||
t1.TombstoneUsers = append(t1.TombstoneUsers, t2.TombstoneUsers...)
|
t1.TombstoneUsers = append(t1.TombstoneUsers, t2.TombstoneUsers...)
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user