From fd0c625de5bccd6464cb8dfe7fa22dcb8570cce8 Mon Sep 17 00:00:00 2001 From: Alessio Date: Thu, 17 Aug 2023 18:21:22 -0300 Subject: [PATCH] Optimize Timeline query by splitting it into sub-queries so it can use indexes --- pkg/persistence/compound_ssf_queries.go | 11 ++++++++--- pkg/persistence/schema.sql | 3 ++- pkg/persistence/versions.go | 2 ++ pkg/scraper/api_types_spaces.go | 1 - 4 files changed, 12 insertions(+), 5 deletions(-) diff --git a/pkg/persistence/compound_ssf_queries.go b/pkg/persistence/compound_ssf_queries.go index 57bbf42..66cb825 100644 --- a/pkg/persistence/compound_ssf_queries.go +++ b/pkg/persistence/compound_ssf_queries.go @@ -259,7 +259,8 @@ func (p Profile) NextPage(c Cursor) (Feed, error) { where_clause := "where " + strings.Join(where_clauses, " and ") - q := `select id, user_id, text, posted_at, num_likes, num_retweets, num_replies, num_quote_tweets, in_reply_to_id, quoted_tweet_id, + q := `select * from ( + select id, user_id, text, posted_at, num_likes, num_retweets, num_replies, num_quote_tweets, in_reply_to_id, quoted_tweet_id, mentions, reply_mentions, hashtags, ifnull(space_id, '') space_id, ifnull(tombstone_types.short_name, "") tombstone_type, is_expandable, is_stub, is_content_downloaded, is_conversation_scraped, last_scraped_at, @@ -267,10 +268,12 @@ func (p Profile) NextPage(c Cursor) (Feed, error) { posted_at chrono, user_id by_user_id from tweets left join tombstone_types on tweets.tombstone_type = tombstone_types.rowid - ` + where_clause + ` + ` + where_clause + ` ` + c.SortOrder.OrderByClause() + ` limit ? + ) union + select * from ( select id, user_id, text, posted_at, num_likes, num_retweets, num_replies, num_quote_tweets, in_reply_to_id, quoted_tweet_id, mentions, reply_mentions, hashtags, ifnull(space_id, '') space_id, ifnull(tombstone_types.short_name, "") tombstone_type, is_expandable, @@ -282,8 +285,10 @@ func (p Profile) NextPage(c Cursor) (Feed, error) { left join tombstone_types on tweets.tombstone_type = tombstone_types.rowid ` + where_clause + ` ` + c.SortOrder.OrderByClause() + ` - limit ?` + limit ? + ) ` + c.SortOrder.OrderByClause() + ` limit ?` + bind_values = append(bind_values, c.PageSize) bind_values = append(bind_values, bind_values...) bind_values = append(bind_values, c.PageSize) diff --git a/pkg/persistence/schema.sql b/pkg/persistence/schema.sql index 540cb3f..2e0592e 100644 --- a/pkg/persistence/schema.sql +++ b/pkg/persistence/schema.sql @@ -64,7 +64,7 @@ create table tweets (rowid integer primary key, ); create index if not exists index_tweets_in_reply_to_id on tweets (in_reply_to_id); create index if not exists index_tweets_user_id on tweets (user_id); - +create index if not exists index_tweets_posted_at on tweets (posted_at); create table retweets(rowid integer primary key, retweet_id integer not null unique, @@ -74,6 +74,7 @@ create table retweets(rowid integer primary key, foreign key(tweet_id) references tweets(id) foreign key(retweeted_by) references users(id) ); +create index if not exists index_retweets_retweeted_at on retweets (retweeted_at); create table urls (rowid integer primary key, tweet_id integer not null, diff --git a/pkg/persistence/versions.go b/pkg/persistence/versions.go index 97bc480..bc2a3ad 100644 --- a/pkg/persistence/versions.go +++ b/pkg/persistence/versions.go @@ -116,6 +116,8 @@ var MIGRATIONS = []string{ foreign key(user_id) references users(id) foreign key(tweet_id) references tweets(id) );`, + `create index if not exists index_tweets_posted_at on tweets (posted_at); + create index if not exists index_retweets_retweeted_at on retweets (retweeted_at)`, } var ENGINE_DATABASE_VERSION = len(MIGRATIONS) diff --git a/pkg/scraper/api_types_spaces.go b/pkg/scraper/api_types_spaces.go index a3404bd..2c015ed 100644 --- a/pkg/scraper/api_types_spaces.go +++ b/pkg/scraper/api_types_spaces.go @@ -93,7 +93,6 @@ func (r SpaceResponse) ToTweetTrove() TweetTrove { } func (api API) GetSpace(id SpaceID) (SpaceResponse, error) { - // TODO: break up this URL into params so it's readable url, err := url.Parse(GraphqlURL{ BaseUrl: "https://twitter.com/i/api/graphql/Ha9BKBF0uAz9d4-lz0jnYA/AudioSpaceById", Variables: GraphqlVariables{