Improve expressiveness of Cursor querying, and implement Offline Timeline

This commit is contained in:
Alessio 2023-08-11 20:56:30 -03:00
parent c3d52348fc
commit 5568a86651
3 changed files with 128 additions and 15 deletions

View File

@ -68,7 +68,7 @@ func (p Profile) fill_content(trove *TweetTrove) {
is_content_downloaded, is_followed is_content_downloaded, is_followed
from users from users
where id in (` + strings.Repeat("?,", len(user_ids)-1) + `?)` where id in (` + strings.Repeat("?,", len(user_ids)-1) + `?)`
fmt.Printf("%s\n", userquery) // fmt.Printf("%s\n", userquery)
err := p.DB.Select(&users, userquery, user_ids...) err := p.DB.Select(&users, userquery, user_ids...)
if err != nil { if err != nil {
panic(err) panic(err)
@ -82,7 +82,7 @@ func (p Profile) fill_content(trove *TweetTrove) {
var images []Image var images []Image
imgquery := ` imgquery := `
select id, tweet_id, width, height, remote_url, local_filename, is_downloaded from images where tweet_id in (` + in_clause + `)` select id, tweet_id, width, height, remote_url, local_filename, is_downloaded from images where tweet_id in (` + in_clause + `)`
fmt.Printf("%s\n", imgquery) // TODO: SQL logger // fmt.Printf("%s\n", imgquery) // TODO: SQL logger
err := p.DB.Select(&images, imgquery, tweet_ids...) err := p.DB.Select(&images, imgquery, tweet_ids...)
if err != nil { if err != nil {
panic(err) panic(err)

View File

@ -73,10 +73,23 @@ const (
CURSOR_END CURSOR_END
) )
// Whether to require, exclude, or indifferent a type of content
type Filter int
const (
// Filter is not used
NONE Filter = iota
// All results must match the filter
REQUIRE
// Results must not match the filter
EXCLUDE
)
type CursorResult struct { type CursorResult struct {
scraper.Tweet scraper.Tweet
scraper.Retweet scraper.Retweet
Chrono int `db:"chrono"` Chrono int `db:"chrono"`
ByUserID scraper.UserID `db:"by_user_id"`
} }
type Cursor struct { type Cursor struct {
@ -92,10 +105,13 @@ type Cursor struct {
RetweetedByUserHandle scraper.UserHandle RetweetedByUserHandle scraper.UserHandle
SinceTimestamp scraper.Timestamp SinceTimestamp scraper.Timestamp
UntilTimestamp scraper.Timestamp UntilTimestamp scraper.Timestamp
FilterLinks bool FilterLinks Filter
FilterImages bool FilterImages Filter
FilterVideos bool FilterVideos Filter
FilterPolls bool FilterPolls Filter
FilterReplies Filter
FilterRetweets Filter
FilterOfflineFollowed Filter
} }
func NewCursor() Cursor { func NewCursor() Cursor {
@ -108,6 +124,23 @@ func NewCursor() Cursor {
CursorValue: 0, CursorValue: 0,
SortOrder: SORT_ORDER_NEWEST, SortOrder: SORT_ORDER_NEWEST,
PageSize: 50, PageSize: 50,
FilterRetweets: EXCLUDE,
}
}
func NewTimelineCursor() Cursor {
return Cursor{
Keywords: []string{},
ToUserHandles: []scraper.UserHandle{},
SinceTimestamp: scraper.TimestampFromUnix(0),
UntilTimestamp: scraper.TimestampFromUnix(0),
CursorPosition: CURSOR_START,
CursorValue: 0,
SortOrder: SORT_ORDER_NEWEST,
PageSize: 50,
FilterOfflineFollowed: REQUIRE,
} }
} }
@ -130,8 +163,10 @@ func (p Profile) NextPage(c Cursor) (Feed, error) {
where_clauses = append(where_clauses, "reply_mentions like ?") where_clauses = append(where_clauses, "reply_mentions like ?")
bind_values = append(bind_values, fmt.Sprintf("%%%s%%", to_user)) bind_values = append(bind_values, fmt.Sprintf("%%%s%%", to_user))
} }
where_clauses = append(where_clauses, "retweeted_by = coalesce((select id from users where handle like ?), 0)") if c.RetweetedByUserHandle != "" {
bind_values = append(bind_values, c.RetweetedByUserHandle) where_clauses = append(where_clauses, "retweeted_by = (select id from users where handle like ?)")
bind_values = append(bind_values, c.RetweetedByUserHandle)
}
// Since and until timestamps // Since and until timestamps
if c.SinceTimestamp.Unix() != 0 { if c.SinceTimestamp.Unix() != 0 {
@ -144,17 +179,49 @@ func (p Profile) NextPage(c Cursor) (Feed, error) {
} }
// Media filters // Media filters
if c.FilterLinks { switch c.FilterLinks {
case REQUIRE:
where_clauses = append(where_clauses, "exists (select 1 from urls where urls.tweet_id = tweets.id)") where_clauses = append(where_clauses, "exists (select 1 from urls where urls.tweet_id = tweets.id)")
case EXCLUDE:
where_clauses = append(where_clauses, "not exists (select 1 from urls where urls.tweet_id = tweets.id)")
} }
if c.FilterImages { switch c.FilterImages {
case REQUIRE:
where_clauses = append(where_clauses, "exists (select 1 from images where images.tweet_id = tweets.id)") where_clauses = append(where_clauses, "exists (select 1 from images where images.tweet_id = tweets.id)")
case EXCLUDE:
where_clauses = append(where_clauses, "not exists (select 1 from images where images.tweet_id = tweets.id)")
} }
if c.FilterVideos { switch c.FilterVideos {
case REQUIRE:
where_clauses = append(where_clauses, "exists (select 1 from videos where videos.tweet_id = tweets.id)") where_clauses = append(where_clauses, "exists (select 1 from videos where videos.tweet_id = tweets.id)")
case EXCLUDE:
where_clauses = append(where_clauses, "not exists (select 1 from videos where videos.tweet_id = tweets.id)")
} }
if c.FilterPolls { switch c.FilterPolls {
case REQUIRE:
where_clauses = append(where_clauses, "exists (select 1 from polls where polls.tweet_id = tweets.id)") where_clauses = append(where_clauses, "exists (select 1 from polls where polls.tweet_id = tweets.id)")
case EXCLUDE:
where_clauses = append(where_clauses, "not exists (select 1 from polls where polls.tweet_id = tweets.id)")
}
// Filter by lists (e.g., offline-followed)
switch c.FilterOfflineFollowed {
case REQUIRE:
where_clauses = append(where_clauses, "by_user_id in (select id from users where is_followed = 1)")
case EXCLUDE:
where_clauses = append(where_clauses, "by_user_id not in (select id from users where is_followed = 1)")
}
switch c.FilterReplies {
case REQUIRE:
where_clauses = append(where_clauses, "in_reply_to_id != 0")
case EXCLUDE:
where_clauses = append(where_clauses, "in_reply_to_id = 0")
}
switch c.FilterRetweets {
case REQUIRE:
where_clauses = append(where_clauses, "retweet_id != 0")
case EXCLUDE:
where_clauses = append(where_clauses, "retweet_id = 0")
} }
// Pagination // Pagination
@ -170,7 +237,7 @@ func (p Profile) NextPage(c Cursor) (Feed, error) {
is_expandable, is_expandable,
is_stub, is_content_downloaded, is_conversation_scraped, last_scraped_at, is_stub, is_content_downloaded, is_conversation_scraped, last_scraped_at,
0 tweet_id, 0 retweet_id, 0 retweeted_by, 0 retweeted_at, 0 tweet_id, 0 retweet_id, 0 retweeted_by, 0 retweeted_at,
posted_at chrono posted_at chrono, user_id by_user_id
from tweets from tweets
left join tombstone_types on tweets.tombstone_type = tombstone_types.rowid left join tombstone_types on tweets.tombstone_type = tombstone_types.rowid
` + where_clause + ` ` + where_clause + `
@ -182,7 +249,7 @@ func (p Profile) NextPage(c Cursor) (Feed, error) {
is_expandable, is_expandable,
is_stub, is_content_downloaded, is_conversation_scraped, last_scraped_at, is_stub, is_content_downloaded, is_conversation_scraped, last_scraped_at,
tweet_id, retweet_id, retweeted_by, retweeted_at, tweet_id, retweet_id, retweeted_by, retweeted_at,
retweeted_at chrono retweeted_at chrono, retweeted_by by_user_id
from retweets from retweets
left join tweets on retweets.tweet_id = tweets.id left join tweets on retweets.tweet_id = tweets.id
left join tombstone_types on tweets.tombstone_type = tombstone_types.rowid left join tombstone_types on tweets.tombstone_type = tombstone_types.rowid
@ -193,6 +260,8 @@ func (p Profile) NextPage(c Cursor) (Feed, error) {
bind_values = append(bind_values, bind_values...) bind_values = append(bind_values, bind_values...)
bind_values = append(bind_values, c.PageSize) bind_values = append(bind_values, c.PageSize)
// fmt.Printf("Query: %s\n", q)
// fmt.Printf("Bind values: %#v\n", bind_values)
// Run the query // Run the query
var results []CursorResult var results []CursorResult
err := p.DB.Select(&results, q, bind_values...) err := p.DB.Select(&results, q, bind_values...)
@ -203,6 +272,7 @@ func (p Profile) NextPage(c Cursor) (Feed, error) {
// Assemble the feed // Assemble the feed
ret := NewFeed() ret := NewFeed()
for _, val := range results { for _, val := range results {
// fmt.Printf("\tResult: %#v\n", val)
ret.Tweets[val.Tweet.ID] = val.Tweet ret.Tweets[val.Tweet.ID] = val.Tweet
if val.Retweet.RetweetID != 0 { if val.Retweet.RetweetID != 0 {
ret.Retweets[val.Retweet.RetweetID] = val.Retweet ret.Retweets[val.Retweet.RetweetID] = val.Retweet

View File

@ -62,6 +62,7 @@ func TestCursorSearchWithRetweets(t *testing.T) {
c := persistence.NewCursor() c := persistence.NewCursor()
c.PageSize = 3 c.PageSize = 3
c.RetweetedByUserHandle = "cernovich" c.RetweetedByUserHandle = "cernovich"
c.FilterRetweets = persistence.REQUIRE
c.SortOrder = persistence.SORT_ORDER_OLDEST c.SortOrder = persistence.SORT_ORDER_OLDEST
feed, err := profile.NextPage(c) feed, err := profile.NextPage(c)
@ -87,3 +88,45 @@ func TestCursorSearchWithRetweets(t *testing.T) {
next_cursor = feed.CursorBottom next_cursor = feed.CursorBottom
assert.Equal(next_cursor.CursorPosition, persistence.CURSOR_END) assert.Equal(next_cursor.CursorPosition, persistence.CURSOR_END)
} }
// Offline Following Timeline
func TestTimeline(t *testing.T) {
require := require.New(t)
assert := assert.New(t)
profile, err := persistence.LoadProfile("../../sample_data/profile")
require.NoError(err)
c := persistence.NewTimelineCursor()
c.PageSize = 5
feed, err := profile.NextPage(c)
require.NoError(err)
assert.Len(feed.Items, 5)
assert.Len(feed.Retweets, 4)
assert.Equal(feed.Items[0].RetweetID, TweetID(1490135787144237058))
assert.Equal(feed.Items[1].RetweetID, TweetID(1490135787124232222))
assert.Equal(feed.Items[2].RetweetID, TweetID(1490119308692766723))
assert.Equal(feed.Items[3].RetweetID, TweetID(1490100255987171332))
assert.Equal(feed.Items[4].TweetID, TweetID(1453461248142495744))
next_cursor := feed.CursorBottom
assert.Equal(next_cursor.CursorPosition, persistence.CURSOR_MIDDLE)
assert.Equal(next_cursor.SortOrder, c.SortOrder)
assert.Equal(next_cursor.Keywords, c.Keywords)
assert.Equal(next_cursor.PageSize, c.PageSize)
assert.Equal(next_cursor.CursorValue, 1635367140)
next_cursor.CursorValue = 1631935323 // Scroll down a bit, kind of randomly
feed, err = profile.NextPage(next_cursor)
require.NoError(err)
assert.Len(feed.Items, 5)
assert.Len(feed.Retweets, 1)
assert.Equal(feed.Items[0].TweetID, TweetID(1439027915404939265))
assert.Equal(feed.Items[1].TweetID, TweetID(1413773185296650241))
assert.Equal(feed.Items[2].TweetID, TweetID(1413664406995566593))
assert.Equal(feed.Items[3].RetweetID, TweetID(144919526660333333))
assert.Equal(feed.Items[4].TweetID, TweetID(1413658466795737091))
}