Improve expressiveness of Cursor querying, and implement Offline Timeline
This commit is contained in:
parent
c3d52348fc
commit
5568a86651
@ -68,7 +68,7 @@ func (p Profile) fill_content(trove *TweetTrove) {
|
|||||||
is_content_downloaded, is_followed
|
is_content_downloaded, is_followed
|
||||||
from users
|
from users
|
||||||
where id in (` + strings.Repeat("?,", len(user_ids)-1) + `?)`
|
where id in (` + strings.Repeat("?,", len(user_ids)-1) + `?)`
|
||||||
fmt.Printf("%s\n", userquery)
|
// fmt.Printf("%s\n", userquery)
|
||||||
err := p.DB.Select(&users, userquery, user_ids...)
|
err := p.DB.Select(&users, userquery, user_ids...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
@ -82,7 +82,7 @@ func (p Profile) fill_content(trove *TweetTrove) {
|
|||||||
var images []Image
|
var images []Image
|
||||||
imgquery := `
|
imgquery := `
|
||||||
select id, tweet_id, width, height, remote_url, local_filename, is_downloaded from images where tweet_id in (` + in_clause + `)`
|
select id, tweet_id, width, height, remote_url, local_filename, is_downloaded from images where tweet_id in (` + in_clause + `)`
|
||||||
fmt.Printf("%s\n", imgquery) // TODO: SQL logger
|
// fmt.Printf("%s\n", imgquery) // TODO: SQL logger
|
||||||
err := p.DB.Select(&images, imgquery, tweet_ids...)
|
err := p.DB.Select(&images, imgquery, tweet_ids...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
|
@ -73,10 +73,23 @@ const (
|
|||||||
CURSOR_END
|
CURSOR_END
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// Whether to require, exclude, or indifferent a type of content
|
||||||
|
type Filter int
|
||||||
|
|
||||||
|
const (
|
||||||
|
// Filter is not used
|
||||||
|
NONE Filter = iota
|
||||||
|
// All results must match the filter
|
||||||
|
REQUIRE
|
||||||
|
// Results must not match the filter
|
||||||
|
EXCLUDE
|
||||||
|
)
|
||||||
|
|
||||||
type CursorResult struct {
|
type CursorResult struct {
|
||||||
scraper.Tweet
|
scraper.Tweet
|
||||||
scraper.Retweet
|
scraper.Retweet
|
||||||
Chrono int `db:"chrono"`
|
Chrono int `db:"chrono"`
|
||||||
|
ByUserID scraper.UserID `db:"by_user_id"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type Cursor struct {
|
type Cursor struct {
|
||||||
@ -92,10 +105,13 @@ type Cursor struct {
|
|||||||
RetweetedByUserHandle scraper.UserHandle
|
RetweetedByUserHandle scraper.UserHandle
|
||||||
SinceTimestamp scraper.Timestamp
|
SinceTimestamp scraper.Timestamp
|
||||||
UntilTimestamp scraper.Timestamp
|
UntilTimestamp scraper.Timestamp
|
||||||
FilterLinks bool
|
FilterLinks Filter
|
||||||
FilterImages bool
|
FilterImages Filter
|
||||||
FilterVideos bool
|
FilterVideos Filter
|
||||||
FilterPolls bool
|
FilterPolls Filter
|
||||||
|
FilterReplies Filter
|
||||||
|
FilterRetweets Filter
|
||||||
|
FilterOfflineFollowed Filter
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewCursor() Cursor {
|
func NewCursor() Cursor {
|
||||||
@ -108,6 +124,23 @@ func NewCursor() Cursor {
|
|||||||
CursorValue: 0,
|
CursorValue: 0,
|
||||||
SortOrder: SORT_ORDER_NEWEST,
|
SortOrder: SORT_ORDER_NEWEST,
|
||||||
PageSize: 50,
|
PageSize: 50,
|
||||||
|
|
||||||
|
FilterRetweets: EXCLUDE,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewTimelineCursor() Cursor {
|
||||||
|
return Cursor{
|
||||||
|
Keywords: []string{},
|
||||||
|
ToUserHandles: []scraper.UserHandle{},
|
||||||
|
SinceTimestamp: scraper.TimestampFromUnix(0),
|
||||||
|
UntilTimestamp: scraper.TimestampFromUnix(0),
|
||||||
|
CursorPosition: CURSOR_START,
|
||||||
|
CursorValue: 0,
|
||||||
|
SortOrder: SORT_ORDER_NEWEST,
|
||||||
|
PageSize: 50,
|
||||||
|
|
||||||
|
FilterOfflineFollowed: REQUIRE,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -130,8 +163,10 @@ func (p Profile) NextPage(c Cursor) (Feed, error) {
|
|||||||
where_clauses = append(where_clauses, "reply_mentions like ?")
|
where_clauses = append(where_clauses, "reply_mentions like ?")
|
||||||
bind_values = append(bind_values, fmt.Sprintf("%%%s%%", to_user))
|
bind_values = append(bind_values, fmt.Sprintf("%%%s%%", to_user))
|
||||||
}
|
}
|
||||||
where_clauses = append(where_clauses, "retweeted_by = coalesce((select id from users where handle like ?), 0)")
|
if c.RetweetedByUserHandle != "" {
|
||||||
bind_values = append(bind_values, c.RetweetedByUserHandle)
|
where_clauses = append(where_clauses, "retweeted_by = (select id from users where handle like ?)")
|
||||||
|
bind_values = append(bind_values, c.RetweetedByUserHandle)
|
||||||
|
}
|
||||||
|
|
||||||
// Since and until timestamps
|
// Since and until timestamps
|
||||||
if c.SinceTimestamp.Unix() != 0 {
|
if c.SinceTimestamp.Unix() != 0 {
|
||||||
@ -144,17 +179,49 @@ func (p Profile) NextPage(c Cursor) (Feed, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Media filters
|
// Media filters
|
||||||
if c.FilterLinks {
|
switch c.FilterLinks {
|
||||||
|
case REQUIRE:
|
||||||
where_clauses = append(where_clauses, "exists (select 1 from urls where urls.tweet_id = tweets.id)")
|
where_clauses = append(where_clauses, "exists (select 1 from urls where urls.tweet_id = tweets.id)")
|
||||||
|
case EXCLUDE:
|
||||||
|
where_clauses = append(where_clauses, "not exists (select 1 from urls where urls.tweet_id = tweets.id)")
|
||||||
}
|
}
|
||||||
if c.FilterImages {
|
switch c.FilterImages {
|
||||||
|
case REQUIRE:
|
||||||
where_clauses = append(where_clauses, "exists (select 1 from images where images.tweet_id = tweets.id)")
|
where_clauses = append(where_clauses, "exists (select 1 from images where images.tweet_id = tweets.id)")
|
||||||
|
case EXCLUDE:
|
||||||
|
where_clauses = append(where_clauses, "not exists (select 1 from images where images.tweet_id = tweets.id)")
|
||||||
}
|
}
|
||||||
if c.FilterVideos {
|
switch c.FilterVideos {
|
||||||
|
case REQUIRE:
|
||||||
where_clauses = append(where_clauses, "exists (select 1 from videos where videos.tweet_id = tweets.id)")
|
where_clauses = append(where_clauses, "exists (select 1 from videos where videos.tweet_id = tweets.id)")
|
||||||
|
case EXCLUDE:
|
||||||
|
where_clauses = append(where_clauses, "not exists (select 1 from videos where videos.tweet_id = tweets.id)")
|
||||||
}
|
}
|
||||||
if c.FilterPolls {
|
switch c.FilterPolls {
|
||||||
|
case REQUIRE:
|
||||||
where_clauses = append(where_clauses, "exists (select 1 from polls where polls.tweet_id = tweets.id)")
|
where_clauses = append(where_clauses, "exists (select 1 from polls where polls.tweet_id = tweets.id)")
|
||||||
|
case EXCLUDE:
|
||||||
|
where_clauses = append(where_clauses, "not exists (select 1 from polls where polls.tweet_id = tweets.id)")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Filter by lists (e.g., offline-followed)
|
||||||
|
switch c.FilterOfflineFollowed {
|
||||||
|
case REQUIRE:
|
||||||
|
where_clauses = append(where_clauses, "by_user_id in (select id from users where is_followed = 1)")
|
||||||
|
case EXCLUDE:
|
||||||
|
where_clauses = append(where_clauses, "by_user_id not in (select id from users where is_followed = 1)")
|
||||||
|
}
|
||||||
|
switch c.FilterReplies {
|
||||||
|
case REQUIRE:
|
||||||
|
where_clauses = append(where_clauses, "in_reply_to_id != 0")
|
||||||
|
case EXCLUDE:
|
||||||
|
where_clauses = append(where_clauses, "in_reply_to_id = 0")
|
||||||
|
}
|
||||||
|
switch c.FilterRetweets {
|
||||||
|
case REQUIRE:
|
||||||
|
where_clauses = append(where_clauses, "retweet_id != 0")
|
||||||
|
case EXCLUDE:
|
||||||
|
where_clauses = append(where_clauses, "retweet_id = 0")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Pagination
|
// Pagination
|
||||||
@ -170,7 +237,7 @@ func (p Profile) NextPage(c Cursor) (Feed, error) {
|
|||||||
is_expandable,
|
is_expandable,
|
||||||
is_stub, is_content_downloaded, is_conversation_scraped, last_scraped_at,
|
is_stub, is_content_downloaded, is_conversation_scraped, last_scraped_at,
|
||||||
0 tweet_id, 0 retweet_id, 0 retweeted_by, 0 retweeted_at,
|
0 tweet_id, 0 retweet_id, 0 retweeted_by, 0 retweeted_at,
|
||||||
posted_at chrono
|
posted_at chrono, user_id by_user_id
|
||||||
from tweets
|
from tweets
|
||||||
left join tombstone_types on tweets.tombstone_type = tombstone_types.rowid
|
left join tombstone_types on tweets.tombstone_type = tombstone_types.rowid
|
||||||
` + where_clause + `
|
` + where_clause + `
|
||||||
@ -182,7 +249,7 @@ func (p Profile) NextPage(c Cursor) (Feed, error) {
|
|||||||
is_expandable,
|
is_expandable,
|
||||||
is_stub, is_content_downloaded, is_conversation_scraped, last_scraped_at,
|
is_stub, is_content_downloaded, is_conversation_scraped, last_scraped_at,
|
||||||
tweet_id, retweet_id, retweeted_by, retweeted_at,
|
tweet_id, retweet_id, retweeted_by, retweeted_at,
|
||||||
retweeted_at chrono
|
retweeted_at chrono, retweeted_by by_user_id
|
||||||
from retweets
|
from retweets
|
||||||
left join tweets on retweets.tweet_id = tweets.id
|
left join tweets on retweets.tweet_id = tweets.id
|
||||||
left join tombstone_types on tweets.tombstone_type = tombstone_types.rowid
|
left join tombstone_types on tweets.tombstone_type = tombstone_types.rowid
|
||||||
@ -193,6 +260,8 @@ func (p Profile) NextPage(c Cursor) (Feed, error) {
|
|||||||
bind_values = append(bind_values, bind_values...)
|
bind_values = append(bind_values, bind_values...)
|
||||||
bind_values = append(bind_values, c.PageSize)
|
bind_values = append(bind_values, c.PageSize)
|
||||||
|
|
||||||
|
// fmt.Printf("Query: %s\n", q)
|
||||||
|
// fmt.Printf("Bind values: %#v\n", bind_values)
|
||||||
// Run the query
|
// Run the query
|
||||||
var results []CursorResult
|
var results []CursorResult
|
||||||
err := p.DB.Select(&results, q, bind_values...)
|
err := p.DB.Select(&results, q, bind_values...)
|
||||||
@ -203,6 +272,7 @@ func (p Profile) NextPage(c Cursor) (Feed, error) {
|
|||||||
// Assemble the feed
|
// Assemble the feed
|
||||||
ret := NewFeed()
|
ret := NewFeed()
|
||||||
for _, val := range results {
|
for _, val := range results {
|
||||||
|
// fmt.Printf("\tResult: %#v\n", val)
|
||||||
ret.Tweets[val.Tweet.ID] = val.Tweet
|
ret.Tweets[val.Tweet.ID] = val.Tweet
|
||||||
if val.Retweet.RetweetID != 0 {
|
if val.Retweet.RetweetID != 0 {
|
||||||
ret.Retweets[val.Retweet.RetweetID] = val.Retweet
|
ret.Retweets[val.Retweet.RetweetID] = val.Retweet
|
||||||
|
@ -62,6 +62,7 @@ func TestCursorSearchWithRetweets(t *testing.T) {
|
|||||||
c := persistence.NewCursor()
|
c := persistence.NewCursor()
|
||||||
c.PageSize = 3
|
c.PageSize = 3
|
||||||
c.RetweetedByUserHandle = "cernovich"
|
c.RetweetedByUserHandle = "cernovich"
|
||||||
|
c.FilterRetweets = persistence.REQUIRE
|
||||||
c.SortOrder = persistence.SORT_ORDER_OLDEST
|
c.SortOrder = persistence.SORT_ORDER_OLDEST
|
||||||
|
|
||||||
feed, err := profile.NextPage(c)
|
feed, err := profile.NextPage(c)
|
||||||
@ -87,3 +88,45 @@ func TestCursorSearchWithRetweets(t *testing.T) {
|
|||||||
next_cursor = feed.CursorBottom
|
next_cursor = feed.CursorBottom
|
||||||
assert.Equal(next_cursor.CursorPosition, persistence.CURSOR_END)
|
assert.Equal(next_cursor.CursorPosition, persistence.CURSOR_END)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Offline Following Timeline
|
||||||
|
func TestTimeline(t *testing.T) {
|
||||||
|
require := require.New(t)
|
||||||
|
assert := assert.New(t)
|
||||||
|
|
||||||
|
profile, err := persistence.LoadProfile("../../sample_data/profile")
|
||||||
|
require.NoError(err)
|
||||||
|
|
||||||
|
c := persistence.NewTimelineCursor()
|
||||||
|
c.PageSize = 5
|
||||||
|
|
||||||
|
feed, err := profile.NextPage(c)
|
||||||
|
require.NoError(err)
|
||||||
|
|
||||||
|
assert.Len(feed.Items, 5)
|
||||||
|
assert.Len(feed.Retweets, 4)
|
||||||
|
assert.Equal(feed.Items[0].RetweetID, TweetID(1490135787144237058))
|
||||||
|
assert.Equal(feed.Items[1].RetweetID, TweetID(1490135787124232222))
|
||||||
|
assert.Equal(feed.Items[2].RetweetID, TweetID(1490119308692766723))
|
||||||
|
assert.Equal(feed.Items[3].RetweetID, TweetID(1490100255987171332))
|
||||||
|
assert.Equal(feed.Items[4].TweetID, TweetID(1453461248142495744))
|
||||||
|
|
||||||
|
next_cursor := feed.CursorBottom
|
||||||
|
assert.Equal(next_cursor.CursorPosition, persistence.CURSOR_MIDDLE)
|
||||||
|
assert.Equal(next_cursor.SortOrder, c.SortOrder)
|
||||||
|
assert.Equal(next_cursor.Keywords, c.Keywords)
|
||||||
|
assert.Equal(next_cursor.PageSize, c.PageSize)
|
||||||
|
assert.Equal(next_cursor.CursorValue, 1635367140)
|
||||||
|
|
||||||
|
next_cursor.CursorValue = 1631935323 // Scroll down a bit, kind of randomly
|
||||||
|
feed, err = profile.NextPage(next_cursor)
|
||||||
|
require.NoError(err)
|
||||||
|
|
||||||
|
assert.Len(feed.Items, 5)
|
||||||
|
assert.Len(feed.Retweets, 1)
|
||||||
|
assert.Equal(feed.Items[0].TweetID, TweetID(1439027915404939265))
|
||||||
|
assert.Equal(feed.Items[1].TweetID, TweetID(1413773185296650241))
|
||||||
|
assert.Equal(feed.Items[2].TweetID, TweetID(1413664406995566593))
|
||||||
|
assert.Equal(feed.Items[3].RetweetID, TweetID(144919526660333333))
|
||||||
|
assert.Equal(feed.Items[4].TweetID, TweetID(1413658466795737091))
|
||||||
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user