Implement search query parsing (not yet hooked up to search bar)
This commit is contained in:
parent
addcf0ea52
commit
fa33199489
@ -1,8 +1,10 @@
|
|||||||
package persistence
|
package persistence
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"strings"
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||||
)
|
)
|
||||||
@ -114,6 +116,7 @@ type Cursor struct {
|
|||||||
FilterImages Filter
|
FilterImages Filter
|
||||||
FilterVideos Filter
|
FilterVideos Filter
|
||||||
FilterPolls Filter
|
FilterPolls Filter
|
||||||
|
FilterSpaces Filter
|
||||||
FilterReplies Filter
|
FilterReplies Filter
|
||||||
FilterRetweets Filter
|
FilterRetweets Filter
|
||||||
FilterOfflineFollowed Filter
|
FilterOfflineFollowed Filter
|
||||||
@ -167,6 +170,97 @@ func NewUserFeedCursor(h scraper.UserHandle) Cursor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func NewCursorFromSearchQuery(q string) (Cursor, error) {
|
||||||
|
ret := NewCursor()
|
||||||
|
is_in_quotes := false
|
||||||
|
current_token := ""
|
||||||
|
|
||||||
|
for _, char := range q {
|
||||||
|
if char == ' ' && !is_in_quotes {
|
||||||
|
// Token is finished
|
||||||
|
if current_token == "" {
|
||||||
|
// Ignore empty tokens
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Add the completed token
|
||||||
|
if err := ret.apply_token(current_token); err != nil {
|
||||||
|
return Cursor{}, err
|
||||||
|
}
|
||||||
|
current_token = ""
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if char == '"' {
|
||||||
|
if is_in_quotes {
|
||||||
|
is_in_quotes = false
|
||||||
|
if err := ret.apply_token(current_token); err != nil {
|
||||||
|
return Cursor{}, err
|
||||||
|
}
|
||||||
|
current_token = ""
|
||||||
|
continue
|
||||||
|
} else {
|
||||||
|
is_in_quotes = true
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// current_token = fmt.Sprintf("%s%s", current_token, char)
|
||||||
|
current_token += string(char)
|
||||||
|
}
|
||||||
|
|
||||||
|
// End of query string is reached
|
||||||
|
if is_in_quotes {
|
||||||
|
return Cursor{}, ErrUnmatchedQuotes
|
||||||
|
}
|
||||||
|
if current_token != "" {
|
||||||
|
if err := ret.apply_token(current_token); err != nil {
|
||||||
|
return Cursor{}, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ret, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var ErrInvalidQuery = errors.New("invalid search query")
|
||||||
|
var ErrUnmatchedQuotes = fmt.Errorf("%w (unmatched quotes)", ErrInvalidQuery)
|
||||||
|
|
||||||
|
func (c *Cursor) apply_token(token string) error {
|
||||||
|
parts := strings.Split(token, ":")
|
||||||
|
if len(parts) < 2 {
|
||||||
|
c.Keywords = append(c.Keywords, token)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
var err error
|
||||||
|
switch parts[0] {
|
||||||
|
case "from":
|
||||||
|
c.FromUserHandle = scraper.UserHandle(parts[1])
|
||||||
|
case "to":
|
||||||
|
c.ToUserHandles = append(c.ToUserHandles, scraper.UserHandle(parts[1]))
|
||||||
|
case "retweeted_by":
|
||||||
|
c.RetweetedByUserHandle = scraper.UserHandle(parts[1])
|
||||||
|
case "since":
|
||||||
|
c.SinceTimestamp.Time, err = time.Parse("2006-01-02", parts[1])
|
||||||
|
case "until":
|
||||||
|
c.UntilTimestamp.Time, err = time.Parse("2006-01-02", parts[1])
|
||||||
|
case "filter":
|
||||||
|
switch parts[1] {
|
||||||
|
case "links":
|
||||||
|
c.FilterLinks = REQUIRE
|
||||||
|
case "images":
|
||||||
|
c.FilterImages = REQUIRE
|
||||||
|
case "videos":
|
||||||
|
c.FilterVideos = REQUIRE
|
||||||
|
case "polls":
|
||||||
|
c.FilterPolls = REQUIRE
|
||||||
|
case "spaces":
|
||||||
|
c.FilterSpaces = REQUIRE
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("query token %q: %w", token, ErrInvalidQuery)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func (p Profile) NextPage(c Cursor) (Feed, error) {
|
func (p Profile) NextPage(c Cursor) (Feed, error) {
|
||||||
where_clauses := []string{}
|
where_clauses := []string{}
|
||||||
bind_values := []interface{}{}
|
bind_values := []interface{}{}
|
||||||
@ -230,6 +324,12 @@ func (p Profile) NextPage(c Cursor) (Feed, error) {
|
|||||||
case EXCLUDE:
|
case EXCLUDE:
|
||||||
where_clauses = append(where_clauses, "not exists (select 1 from polls where polls.tweet_id = tweets.id)")
|
where_clauses = append(where_clauses, "not exists (select 1 from polls where polls.tweet_id = tweets.id)")
|
||||||
}
|
}
|
||||||
|
switch c.FilterSpaces {
|
||||||
|
case REQUIRE:
|
||||||
|
where_clauses = append(where_clauses, "space_id != 0")
|
||||||
|
case EXCLUDE:
|
||||||
|
where_clauses = append(where_clauses, "space_id = 0")
|
||||||
|
}
|
||||||
|
|
||||||
// Filter by lists (e.g., offline-followed)
|
// Filter by lists (e.g., offline-followed)
|
||||||
switch c.FilterOfflineFollowed {
|
switch c.FilterOfflineFollowed {
|
||||||
|
110
pkg/persistence/compound_ssf_queries_parse_test.go
Normal file
110
pkg/persistence/compound_ssf_queries_parse_test.go
Normal file
@ -0,0 +1,110 @@
|
|||||||
|
package persistence_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
|
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/persistence"
|
||||||
|
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestTokenizeSearchString(t *testing.T) {
|
||||||
|
require := require.New(t)
|
||||||
|
assert := assert.New(t)
|
||||||
|
|
||||||
|
c, err := persistence.NewCursorFromSearchQuery("think")
|
||||||
|
require.NoError(err)
|
||||||
|
assert.Len(c.Keywords, 1)
|
||||||
|
assert.Equal(c.Keywords[0], "think")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTokenizeSearchStringMultipleWords(t *testing.T) {
|
||||||
|
require := require.New(t)
|
||||||
|
assert := assert.New(t)
|
||||||
|
|
||||||
|
c, err := persistence.NewCursorFromSearchQuery("think tank")
|
||||||
|
require.NoError(err)
|
||||||
|
assert.Len(c.Keywords, 2)
|
||||||
|
assert.Equal(c.Keywords[0], "think")
|
||||||
|
assert.Equal(c.Keywords[1], "tank")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTokenizeSearchStringQuotedTokens(t *testing.T) {
|
||||||
|
require := require.New(t)
|
||||||
|
assert := assert.New(t)
|
||||||
|
|
||||||
|
c, err := persistence.NewCursorFromSearchQuery("\"think tank\"")
|
||||||
|
require.NoError(err)
|
||||||
|
assert.Len(c.Keywords, 1)
|
||||||
|
assert.Equal("think tank", c.Keywords[0])
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTokenizeSearchStringFromUser(t *testing.T) {
|
||||||
|
require := require.New(t)
|
||||||
|
assert := assert.New(t)
|
||||||
|
|
||||||
|
c, err := persistence.NewCursorFromSearchQuery("from:cernovich retweeted_by:blehbleh to:somebody")
|
||||||
|
require.NoError(err)
|
||||||
|
assert.Len(c.Keywords, 0)
|
||||||
|
assert.Equal(c.FromUserHandle, UserHandle("cernovich"))
|
||||||
|
assert.Equal(c.RetweetedByUserHandle, UserHandle("blehbleh"))
|
||||||
|
assert.Equal(c.ToUserHandles, []UserHandle{"somebody"})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestComplexSearchString(t *testing.T) {
|
||||||
|
require := require.New(t)
|
||||||
|
assert := assert.New(t)
|
||||||
|
|
||||||
|
c, err := persistence.NewCursorFromSearchQuery("stupid \"think tank\" from:kashi")
|
||||||
|
require.NoError(err)
|
||||||
|
assert.Len(c.Keywords, 2)
|
||||||
|
assert.Equal("stupid", c.Keywords[0])
|
||||||
|
assert.Equal("think tank", c.Keywords[1])
|
||||||
|
assert.Equal(c.FromUserHandle, UserHandle("kashi"))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSearchStringBadQuotes(t *testing.T) {
|
||||||
|
require := require.New(t)
|
||||||
|
assert := assert.New(t)
|
||||||
|
|
||||||
|
_, err := persistence.NewCursorFromSearchQuery("asdf \"fjk")
|
||||||
|
require.Error(err)
|
||||||
|
assert.ErrorIs(err, persistence.ErrUnmatchedQuotes)
|
||||||
|
assert.ErrorIs(err, persistence.ErrInvalidQuery)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSearchWithDates(t *testing.T) {
|
||||||
|
require := require.New(t)
|
||||||
|
assert := assert.New(t)
|
||||||
|
|
||||||
|
c, err := persistence.NewCursorFromSearchQuery("since:2020-01-01 until:2020-05-01")
|
||||||
|
require.NoError(err)
|
||||||
|
assert.Equal(c.SinceTimestamp.Time, time.Date(2020, 1, 1, 0, 0, 0, 0, time.UTC))
|
||||||
|
assert.Equal(c.UntilTimestamp.Time, time.Date(2020, 5, 1, 0, 0, 0, 0, time.UTC))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSearchWithInvalidDates(t *testing.T) {
|
||||||
|
require := require.New(t)
|
||||||
|
assert := assert.New(t)
|
||||||
|
|
||||||
|
_, err := persistence.NewCursorFromSearchQuery("since:fawejk")
|
||||||
|
require.Error(err)
|
||||||
|
assert.ErrorIs(err, persistence.ErrInvalidQuery)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSearchContentFilters(t *testing.T) {
|
||||||
|
require := require.New(t)
|
||||||
|
assert := assert.New(t)
|
||||||
|
|
||||||
|
c, err := persistence.NewCursorFromSearchQuery("filter:links filter:videos filter:images filter:polls filter:spaces")
|
||||||
|
require.NoError(err)
|
||||||
|
assert.Equal(c.FilterLinks, persistence.REQUIRE)
|
||||||
|
assert.Equal(c.FilterVideos, persistence.REQUIRE)
|
||||||
|
assert.Equal(c.FilterImages, persistence.REQUIRE)
|
||||||
|
assert.Equal(c.FilterPolls, persistence.REQUIRE)
|
||||||
|
assert.Equal(c.FilterSpaces, persistence.REQUIRE)
|
||||||
|
}
|
@ -3,6 +3,8 @@ package persistence_test
|
|||||||
import (
|
import (
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
@ -130,3 +132,137 @@ func TestTimeline(t *testing.T) {
|
|||||||
assert.Equal(feed.Items[3].RetweetID, TweetID(144919526660333333))
|
assert.Equal(feed.Items[3].RetweetID, TweetID(144919526660333333))
|
||||||
assert.Equal(feed.Items[4].TweetID, TweetID(1413658466795737091))
|
assert.Equal(feed.Items[4].TweetID, TweetID(1413658466795737091))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestKeywordSearch(t *testing.T) {
|
||||||
|
require := require.New(t)
|
||||||
|
assert := assert.New(t)
|
||||||
|
|
||||||
|
profile, err := persistence.LoadProfile("../../sample_data/profile")
|
||||||
|
require.NoError(err)
|
||||||
|
c := persistence.NewCursor()
|
||||||
|
|
||||||
|
// Multiple words without quotes
|
||||||
|
c.Keywords = []string{"who", "are"}
|
||||||
|
feed, err := profile.NextPage(c)
|
||||||
|
require.NoError(err)
|
||||||
|
assert.True(len(feed.Items) > 1)
|
||||||
|
|
||||||
|
// Add quotes
|
||||||
|
c.Keywords = []string{"who are"}
|
||||||
|
feed, err = profile.NextPage(c)
|
||||||
|
require.NoError(err)
|
||||||
|
assert.Len(feed.Items, 1)
|
||||||
|
assert.Equal(feed.Items[0].TweetID, TweetID(1261483383483293700))
|
||||||
|
|
||||||
|
// With gibberish (no matches)
|
||||||
|
c.Keywords = []string{"fasdfjkafsldfjsff"}
|
||||||
|
feed, err = profile.NextPage(c)
|
||||||
|
require.NoError(err)
|
||||||
|
assert.Len(feed.Items, 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSearchReplyingToUser(t *testing.T) {
|
||||||
|
require := require.New(t)
|
||||||
|
assert := assert.New(t)
|
||||||
|
|
||||||
|
profile, err := persistence.LoadProfile("../../sample_data/profile")
|
||||||
|
require.NoError(err)
|
||||||
|
c := persistence.NewCursor()
|
||||||
|
|
||||||
|
// Replying to a user
|
||||||
|
c.ToUserHandles = []UserHandle{"spacex"}
|
||||||
|
feed, err := profile.NextPage(c)
|
||||||
|
require.NoError(err)
|
||||||
|
assert.Len(feed.Items, 2)
|
||||||
|
assert.Equal(feed.Items[0].TweetID, TweetID(1428951883058753537))
|
||||||
|
assert.Equal(feed.Items[1].TweetID, TweetID(1428939163961790466))
|
||||||
|
|
||||||
|
// Replying to two users
|
||||||
|
c.ToUserHandles = []UserHandle{"spacex", "covfefeanon"}
|
||||||
|
feed, err = profile.NextPage(c)
|
||||||
|
require.NoError(err)
|
||||||
|
assert.Len(feed.Items, 1)
|
||||||
|
assert.Equal(feed.Items[0].TweetID, TweetID(1428939163961790466))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSearchDateFilters(t *testing.T) {
|
||||||
|
require := require.New(t)
|
||||||
|
assert := assert.New(t)
|
||||||
|
|
||||||
|
profile, err := persistence.LoadProfile("../../sample_data/profile")
|
||||||
|
require.NoError(err)
|
||||||
|
c := persistence.NewCursor()
|
||||||
|
c.SortOrder = persistence.SORT_ORDER_MOST_LIKES
|
||||||
|
|
||||||
|
// Since timestamp
|
||||||
|
c.SinceTimestamp.Time = time.Date(2021, 10, 1, 0, 0, 0, 0, time.UTC)
|
||||||
|
c.FromUserHandle = UserHandle("cernovich")
|
||||||
|
feed, err := profile.NextPage(c)
|
||||||
|
require.NoError(err)
|
||||||
|
assert.Len(feed.Items, 1)
|
||||||
|
assert.Equal(feed.Items[0].TweetID, TweetID(1453461248142495744))
|
||||||
|
|
||||||
|
// Until timestamp
|
||||||
|
c.SinceTimestamp = TimestampFromUnix(0)
|
||||||
|
c.UntilTimestamp.Time = time.Date(2021, 10, 1, 0, 0, 0, 0, time.UTC)
|
||||||
|
feed, err = profile.NextPage(c)
|
||||||
|
require.NoError(err)
|
||||||
|
assert.Len(feed.Items, 3)
|
||||||
|
assert.Equal(feed.Items[0].TweetID, TweetID(1439027915404939265))
|
||||||
|
assert.Equal(feed.Items[1].TweetID, TweetID(1439068749336748043))
|
||||||
|
assert.Equal(feed.Items[2].TweetID, TweetID(1439067163508150272))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSearchMediaFilters(t *testing.T) {
|
||||||
|
require := require.New(t)
|
||||||
|
assert := assert.New(t)
|
||||||
|
|
||||||
|
profile, err := persistence.LoadProfile("../../sample_data/profile")
|
||||||
|
require.NoError(err)
|
||||||
|
|
||||||
|
// Links
|
||||||
|
c := persistence.NewCursor()
|
||||||
|
c.SortOrder = persistence.SORT_ORDER_MOST_LIKES
|
||||||
|
c.FilterLinks = persistence.REQUIRE
|
||||||
|
feed, err := profile.NextPage(c)
|
||||||
|
require.NoError(err)
|
||||||
|
assert.Len(feed.Items, 2)
|
||||||
|
assert.Equal(feed.Items[0].TweetID, TweetID(1438642143170646017))
|
||||||
|
assert.Equal(feed.Items[1].TweetID, TweetID(1413665734866186243))
|
||||||
|
|
||||||
|
// Images
|
||||||
|
c = persistence.NewCursor()
|
||||||
|
c.SortOrder = persistence.SORT_ORDER_MOST_LIKES
|
||||||
|
c.FilterImages = persistence.REQUIRE
|
||||||
|
feed, err = profile.NextPage(c)
|
||||||
|
require.NoError(err)
|
||||||
|
assert.Len(feed.Items, 2)
|
||||||
|
assert.Equal(feed.Items[0].TweetID, TweetID(1261483383483293700))
|
||||||
|
assert.Equal(feed.Items[1].TweetID, TweetID(1426669666928414720))
|
||||||
|
|
||||||
|
// Videos
|
||||||
|
c = persistence.NewCursor()
|
||||||
|
c.SortOrder = persistence.SORT_ORDER_MOST_LIKES
|
||||||
|
c.FilterVideos = persistence.REQUIRE
|
||||||
|
feed, err = profile.NextPage(c)
|
||||||
|
require.NoError(err)
|
||||||
|
assert.Len(feed.Items, 2)
|
||||||
|
assert.Equal(feed.Items[0].TweetID, TweetID(1426619468327882761))
|
||||||
|
assert.Equal(feed.Items[1].TweetID, TweetID(1453461248142495744))
|
||||||
|
|
||||||
|
// Polls
|
||||||
|
c = persistence.NewCursor()
|
||||||
|
c.FilterPolls = persistence.REQUIRE
|
||||||
|
feed, err = profile.NextPage(c)
|
||||||
|
require.NoError(err)
|
||||||
|
assert.Len(feed.Items, 1)
|
||||||
|
assert.Equal(feed.Items[0].TweetID, TweetID(1465534109573390348))
|
||||||
|
|
||||||
|
// Spaces
|
||||||
|
c = persistence.NewCursor()
|
||||||
|
c.FilterSpaces = persistence.REQUIRE
|
||||||
|
feed, err = profile.NextPage(c)
|
||||||
|
require.NoError(err)
|
||||||
|
assert.Len(feed.Items, 1)
|
||||||
|
assert.Equal(feed.Items[0].TweetID, TweetID(1624833173514293249))
|
||||||
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user