From 8608f06bca68c0da2e4c8586d9cad67cc0d64f84 Mon Sep 17 00:00:00 2001 From: Alessio Date: Sun, 27 Aug 2023 15:33:22 -0300 Subject: [PATCH] Fix entity recognition to avoid matching email --- internal/webserver/helpers_test.go | 12 ++++++++++++ internal/webserver/response_helpers.go | 6 +++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/internal/webserver/helpers_test.go b/internal/webserver/helpers_test.go index 37ef41f..b68ac52 100644 --- a/internal/webserver/helpers_test.go +++ b/internal/webserver/helpers_test.go @@ -44,3 +44,15 @@ func TestGetEntitiesHashtagAndMention(t *testing.T) { assert.Equal(entities[4].EntityType, ENTITY_TYPE_TEXT) assert.Equal(entities[4].Contents, " in it") } + +func TestGetEntitiesNoMatchEmail(t *testing.T) { + assert := assert.New(t) + require := require.New(t) + + s := "My email is somebody@somedomain.com" + entities := get_entities(s) + + require.Len(entities, 1) + assert.Equal(entities[0].EntityType, ENTITY_TYPE_TEXT) + assert.Equal(entities[0].Contents, s) +} diff --git a/internal/webserver/response_helpers.go b/internal/webserver/response_helpers.go index 12994c9..2961a5a 100644 --- a/internal/webserver/response_helpers.go +++ b/internal/webserver/response_helpers.go @@ -167,7 +167,11 @@ type Entity struct { func get_entities(text string) []Entity { ret := []Entity{} start := 0 - for _, idxs := range regexp.MustCompile(`[@#]\w+`).FindAllStringIndex(text, -1) { + for _, idxs := range regexp.MustCompile(`(\s|^)[@#]\w+`).FindAllStringIndex(text, -1) { + // Handle leading whitespace. Only match start-of-string or leading whitespace to avoid matching, e.g., emails + if text[idxs[0]] == ' ' || text[idxs[0]] == '\n' { + idxs[0] += 1 + } if start != idxs[0] { ret = append(ret, Entity{ENTITY_TYPE_TEXT, text[start:idxs[0]]}) }