Implement search query parsing (not yet hooked up to search bar)
This commit is contained in:
parent
addcf0ea52
commit
fa33199489
@ -1,8 +1,10 @@
|
||||
package persistence
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||
)
|
||||
@ -114,6 +116,7 @@ type Cursor struct {
|
||||
FilterImages Filter
|
||||
FilterVideos Filter
|
||||
FilterPolls Filter
|
||||
FilterSpaces Filter
|
||||
FilterReplies Filter
|
||||
FilterRetweets Filter
|
||||
FilterOfflineFollowed Filter
|
||||
@ -167,6 +170,97 @@ func NewUserFeedCursor(h scraper.UserHandle) Cursor {
|
||||
}
|
||||
}
|
||||
|
||||
func NewCursorFromSearchQuery(q string) (Cursor, error) {
|
||||
ret := NewCursor()
|
||||
is_in_quotes := false
|
||||
current_token := ""
|
||||
|
||||
for _, char := range q {
|
||||
if char == ' ' && !is_in_quotes {
|
||||
// Token is finished
|
||||
if current_token == "" {
|
||||
// Ignore empty tokens
|
||||
continue
|
||||
}
|
||||
// Add the completed token
|
||||
if err := ret.apply_token(current_token); err != nil {
|
||||
return Cursor{}, err
|
||||
}
|
||||
current_token = ""
|
||||
continue
|
||||
}
|
||||
|
||||
if char == '"' {
|
||||
if is_in_quotes {
|
||||
is_in_quotes = false
|
||||
if err := ret.apply_token(current_token); err != nil {
|
||||
return Cursor{}, err
|
||||
}
|
||||
current_token = ""
|
||||
continue
|
||||
} else {
|
||||
is_in_quotes = true
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// current_token = fmt.Sprintf("%s%s", current_token, char)
|
||||
current_token += string(char)
|
||||
}
|
||||
|
||||
// End of query string is reached
|
||||
if is_in_quotes {
|
||||
return Cursor{}, ErrUnmatchedQuotes
|
||||
}
|
||||
if current_token != "" {
|
||||
if err := ret.apply_token(current_token); err != nil {
|
||||
return Cursor{}, err
|
||||
}
|
||||
}
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
var ErrInvalidQuery = errors.New("invalid search query")
|
||||
var ErrUnmatchedQuotes = fmt.Errorf("%w (unmatched quotes)", ErrInvalidQuery)
|
||||
|
||||
func (c *Cursor) apply_token(token string) error {
|
||||
parts := strings.Split(token, ":")
|
||||
if len(parts) < 2 {
|
||||
c.Keywords = append(c.Keywords, token)
|
||||
return nil
|
||||
}
|
||||
var err error
|
||||
switch parts[0] {
|
||||
case "from":
|
||||
c.FromUserHandle = scraper.UserHandle(parts[1])
|
||||
case "to":
|
||||
c.ToUserHandles = append(c.ToUserHandles, scraper.UserHandle(parts[1]))
|
||||
case "retweeted_by":
|
||||
c.RetweetedByUserHandle = scraper.UserHandle(parts[1])
|
||||
case "since":
|
||||
c.SinceTimestamp.Time, err = time.Parse("2006-01-02", parts[1])
|
||||
case "until":
|
||||
c.UntilTimestamp.Time, err = time.Parse("2006-01-02", parts[1])
|
||||
case "filter":
|
||||
switch parts[1] {
|
||||
case "links":
|
||||
c.FilterLinks = REQUIRE
|
||||
case "images":
|
||||
c.FilterImages = REQUIRE
|
||||
case "videos":
|
||||
c.FilterVideos = REQUIRE
|
||||
case "polls":
|
||||
c.FilterPolls = REQUIRE
|
||||
case "spaces":
|
||||
c.FilterSpaces = REQUIRE
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
return fmt.Errorf("query token %q: %w", token, ErrInvalidQuery)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p Profile) NextPage(c Cursor) (Feed, error) {
|
||||
where_clauses := []string{}
|
||||
bind_values := []interface{}{}
|
||||
@ -230,6 +324,12 @@ func (p Profile) NextPage(c Cursor) (Feed, error) {
|
||||
case EXCLUDE:
|
||||
where_clauses = append(where_clauses, "not exists (select 1 from polls where polls.tweet_id = tweets.id)")
|
||||
}
|
||||
switch c.FilterSpaces {
|
||||
case REQUIRE:
|
||||
where_clauses = append(where_clauses, "space_id != 0")
|
||||
case EXCLUDE:
|
||||
where_clauses = append(where_clauses, "space_id = 0")
|
||||
}
|
||||
|
||||
// Filter by lists (e.g., offline-followed)
|
||||
switch c.FilterOfflineFollowed {
|
||||
|
110
pkg/persistence/compound_ssf_queries_parse_test.go
Normal file
110
pkg/persistence/compound_ssf_queries_parse_test.go
Normal file
@ -0,0 +1,110 @@
|
||||
package persistence_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/persistence"
|
||||
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||
)
|
||||
|
||||
func TestTokenizeSearchString(t *testing.T) {
|
||||
require := require.New(t)
|
||||
assert := assert.New(t)
|
||||
|
||||
c, err := persistence.NewCursorFromSearchQuery("think")
|
||||
require.NoError(err)
|
||||
assert.Len(c.Keywords, 1)
|
||||
assert.Equal(c.Keywords[0], "think")
|
||||
}
|
||||
|
||||
func TestTokenizeSearchStringMultipleWords(t *testing.T) {
|
||||
require := require.New(t)
|
||||
assert := assert.New(t)
|
||||
|
||||
c, err := persistence.NewCursorFromSearchQuery("think tank")
|
||||
require.NoError(err)
|
||||
assert.Len(c.Keywords, 2)
|
||||
assert.Equal(c.Keywords[0], "think")
|
||||
assert.Equal(c.Keywords[1], "tank")
|
||||
}
|
||||
|
||||
func TestTokenizeSearchStringQuotedTokens(t *testing.T) {
|
||||
require := require.New(t)
|
||||
assert := assert.New(t)
|
||||
|
||||
c, err := persistence.NewCursorFromSearchQuery("\"think tank\"")
|
||||
require.NoError(err)
|
||||
assert.Len(c.Keywords, 1)
|
||||
assert.Equal("think tank", c.Keywords[0])
|
||||
}
|
||||
|
||||
func TestTokenizeSearchStringFromUser(t *testing.T) {
|
||||
require := require.New(t)
|
||||
assert := assert.New(t)
|
||||
|
||||
c, err := persistence.NewCursorFromSearchQuery("from:cernovich retweeted_by:blehbleh to:somebody")
|
||||
require.NoError(err)
|
||||
assert.Len(c.Keywords, 0)
|
||||
assert.Equal(c.FromUserHandle, UserHandle("cernovich"))
|
||||
assert.Equal(c.RetweetedByUserHandle, UserHandle("blehbleh"))
|
||||
assert.Equal(c.ToUserHandles, []UserHandle{"somebody"})
|
||||
}
|
||||
|
||||
func TestComplexSearchString(t *testing.T) {
|
||||
require := require.New(t)
|
||||
assert := assert.New(t)
|
||||
|
||||
c, err := persistence.NewCursorFromSearchQuery("stupid \"think tank\" from:kashi")
|
||||
require.NoError(err)
|
||||
assert.Len(c.Keywords, 2)
|
||||
assert.Equal("stupid", c.Keywords[0])
|
||||
assert.Equal("think tank", c.Keywords[1])
|
||||
assert.Equal(c.FromUserHandle, UserHandle("kashi"))
|
||||
}
|
||||
|
||||
func TestSearchStringBadQuotes(t *testing.T) {
|
||||
require := require.New(t)
|
||||
assert := assert.New(t)
|
||||
|
||||
_, err := persistence.NewCursorFromSearchQuery("asdf \"fjk")
|
||||
require.Error(err)
|
||||
assert.ErrorIs(err, persistence.ErrUnmatchedQuotes)
|
||||
assert.ErrorIs(err, persistence.ErrInvalidQuery)
|
||||
}
|
||||
|
||||
func TestSearchWithDates(t *testing.T) {
|
||||
require := require.New(t)
|
||||
assert := assert.New(t)
|
||||
|
||||
c, err := persistence.NewCursorFromSearchQuery("since:2020-01-01 until:2020-05-01")
|
||||
require.NoError(err)
|
||||
assert.Equal(c.SinceTimestamp.Time, time.Date(2020, 1, 1, 0, 0, 0, 0, time.UTC))
|
||||
assert.Equal(c.UntilTimestamp.Time, time.Date(2020, 5, 1, 0, 0, 0, 0, time.UTC))
|
||||
}
|
||||
|
||||
func TestSearchWithInvalidDates(t *testing.T) {
|
||||
require := require.New(t)
|
||||
assert := assert.New(t)
|
||||
|
||||
_, err := persistence.NewCursorFromSearchQuery("since:fawejk")
|
||||
require.Error(err)
|
||||
assert.ErrorIs(err, persistence.ErrInvalidQuery)
|
||||
}
|
||||
|
||||
func TestSearchContentFilters(t *testing.T) {
|
||||
require := require.New(t)
|
||||
assert := assert.New(t)
|
||||
|
||||
c, err := persistence.NewCursorFromSearchQuery("filter:links filter:videos filter:images filter:polls filter:spaces")
|
||||
require.NoError(err)
|
||||
assert.Equal(c.FilterLinks, persistence.REQUIRE)
|
||||
assert.Equal(c.FilterVideos, persistence.REQUIRE)
|
||||
assert.Equal(c.FilterImages, persistence.REQUIRE)
|
||||
assert.Equal(c.FilterPolls, persistence.REQUIRE)
|
||||
assert.Equal(c.FilterSpaces, persistence.REQUIRE)
|
||||
}
|
@ -3,6 +3,8 @@ package persistence_test
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
@ -130,3 +132,137 @@ func TestTimeline(t *testing.T) {
|
||||
assert.Equal(feed.Items[3].RetweetID, TweetID(144919526660333333))
|
||||
assert.Equal(feed.Items[4].TweetID, TweetID(1413658466795737091))
|
||||
}
|
||||
|
||||
func TestKeywordSearch(t *testing.T) {
|
||||
require := require.New(t)
|
||||
assert := assert.New(t)
|
||||
|
||||
profile, err := persistence.LoadProfile("../../sample_data/profile")
|
||||
require.NoError(err)
|
||||
c := persistence.NewCursor()
|
||||
|
||||
// Multiple words without quotes
|
||||
c.Keywords = []string{"who", "are"}
|
||||
feed, err := profile.NextPage(c)
|
||||
require.NoError(err)
|
||||
assert.True(len(feed.Items) > 1)
|
||||
|
||||
// Add quotes
|
||||
c.Keywords = []string{"who are"}
|
||||
feed, err = profile.NextPage(c)
|
||||
require.NoError(err)
|
||||
assert.Len(feed.Items, 1)
|
||||
assert.Equal(feed.Items[0].TweetID, TweetID(1261483383483293700))
|
||||
|
||||
// With gibberish (no matches)
|
||||
c.Keywords = []string{"fasdfjkafsldfjsff"}
|
||||
feed, err = profile.NextPage(c)
|
||||
require.NoError(err)
|
||||
assert.Len(feed.Items, 0)
|
||||
}
|
||||
|
||||
func TestSearchReplyingToUser(t *testing.T) {
|
||||
require := require.New(t)
|
||||
assert := assert.New(t)
|
||||
|
||||
profile, err := persistence.LoadProfile("../../sample_data/profile")
|
||||
require.NoError(err)
|
||||
c := persistence.NewCursor()
|
||||
|
||||
// Replying to a user
|
||||
c.ToUserHandles = []UserHandle{"spacex"}
|
||||
feed, err := profile.NextPage(c)
|
||||
require.NoError(err)
|
||||
assert.Len(feed.Items, 2)
|
||||
assert.Equal(feed.Items[0].TweetID, TweetID(1428951883058753537))
|
||||
assert.Equal(feed.Items[1].TweetID, TweetID(1428939163961790466))
|
||||
|
||||
// Replying to two users
|
||||
c.ToUserHandles = []UserHandle{"spacex", "covfefeanon"}
|
||||
feed, err = profile.NextPage(c)
|
||||
require.NoError(err)
|
||||
assert.Len(feed.Items, 1)
|
||||
assert.Equal(feed.Items[0].TweetID, TweetID(1428939163961790466))
|
||||
}
|
||||
|
||||
func TestSearchDateFilters(t *testing.T) {
|
||||
require := require.New(t)
|
||||
assert := assert.New(t)
|
||||
|
||||
profile, err := persistence.LoadProfile("../../sample_data/profile")
|
||||
require.NoError(err)
|
||||
c := persistence.NewCursor()
|
||||
c.SortOrder = persistence.SORT_ORDER_MOST_LIKES
|
||||
|
||||
// Since timestamp
|
||||
c.SinceTimestamp.Time = time.Date(2021, 10, 1, 0, 0, 0, 0, time.UTC)
|
||||
c.FromUserHandle = UserHandle("cernovich")
|
||||
feed, err := profile.NextPage(c)
|
||||
require.NoError(err)
|
||||
assert.Len(feed.Items, 1)
|
||||
assert.Equal(feed.Items[0].TweetID, TweetID(1453461248142495744))
|
||||
|
||||
// Until timestamp
|
||||
c.SinceTimestamp = TimestampFromUnix(0)
|
||||
c.UntilTimestamp.Time = time.Date(2021, 10, 1, 0, 0, 0, 0, time.UTC)
|
||||
feed, err = profile.NextPage(c)
|
||||
require.NoError(err)
|
||||
assert.Len(feed.Items, 3)
|
||||
assert.Equal(feed.Items[0].TweetID, TweetID(1439027915404939265))
|
||||
assert.Equal(feed.Items[1].TweetID, TweetID(1439068749336748043))
|
||||
assert.Equal(feed.Items[2].TweetID, TweetID(1439067163508150272))
|
||||
}
|
||||
|
||||
func TestSearchMediaFilters(t *testing.T) {
|
||||
require := require.New(t)
|
||||
assert := assert.New(t)
|
||||
|
||||
profile, err := persistence.LoadProfile("../../sample_data/profile")
|
||||
require.NoError(err)
|
||||
|
||||
// Links
|
||||
c := persistence.NewCursor()
|
||||
c.SortOrder = persistence.SORT_ORDER_MOST_LIKES
|
||||
c.FilterLinks = persistence.REQUIRE
|
||||
feed, err := profile.NextPage(c)
|
||||
require.NoError(err)
|
||||
assert.Len(feed.Items, 2)
|
||||
assert.Equal(feed.Items[0].TweetID, TweetID(1438642143170646017))
|
||||
assert.Equal(feed.Items[1].TweetID, TweetID(1413665734866186243))
|
||||
|
||||
// Images
|
||||
c = persistence.NewCursor()
|
||||
c.SortOrder = persistence.SORT_ORDER_MOST_LIKES
|
||||
c.FilterImages = persistence.REQUIRE
|
||||
feed, err = profile.NextPage(c)
|
||||
require.NoError(err)
|
||||
assert.Len(feed.Items, 2)
|
||||
assert.Equal(feed.Items[0].TweetID, TweetID(1261483383483293700))
|
||||
assert.Equal(feed.Items[1].TweetID, TweetID(1426669666928414720))
|
||||
|
||||
// Videos
|
||||
c = persistence.NewCursor()
|
||||
c.SortOrder = persistence.SORT_ORDER_MOST_LIKES
|
||||
c.FilterVideos = persistence.REQUIRE
|
||||
feed, err = profile.NextPage(c)
|
||||
require.NoError(err)
|
||||
assert.Len(feed.Items, 2)
|
||||
assert.Equal(feed.Items[0].TweetID, TweetID(1426619468327882761))
|
||||
assert.Equal(feed.Items[1].TweetID, TweetID(1453461248142495744))
|
||||
|
||||
// Polls
|
||||
c = persistence.NewCursor()
|
||||
c.FilterPolls = persistence.REQUIRE
|
||||
feed, err = profile.NextPage(c)
|
||||
require.NoError(err)
|
||||
assert.Len(feed.Items, 1)
|
||||
assert.Equal(feed.Items[0].TweetID, TweetID(1465534109573390348))
|
||||
|
||||
// Spaces
|
||||
c = persistence.NewCursor()
|
||||
c.FilterSpaces = persistence.REQUIRE
|
||||
feed, err = profile.NextPage(c)
|
||||
require.NoError(err)
|
||||
assert.Len(feed.Items, 1)
|
||||
assert.Equal(feed.Items[0].TweetID, TweetID(1624833173514293249))
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user