BUGFIX: fix incorrect text normalization on tweet with multiple images

This commit is contained in:
Alessio 2021-07-23 19:13:20 -07:00
parent 700e3e0412
commit 6c111ab976
3 changed files with 4 additions and 2 deletions

View File

@ -56,13 +56,13 @@ type APITweet struct {
func (t *APITweet) NormalizeContent() {
// Remove embedded links at the end of the text
if len(t.Entities.URLs) == 1 {
if len(t.Entities.URLs) == 1 { // TODO: should this be `>= 1`, like below?
url := t.Entities.URLs[0].URL
if strings.Index(t.FullText, url) == len(t.FullText) - len(url) {
t.FullText = t.FullText[0:len(t.FullText) - len(url)] // Also strip the newline
}
}
if len(t.Entities.Media) == 1 {
if len(t.Entities.Media) >= 1 {
url := t.Entities.Media[0].URL
if strings.Index(t.FullText, url) == len(t.FullText) - len(url) {
t.FullText = t.FullText[0:len(t.FullText) - len(url)] // Also strip the trailing space

View File

@ -17,6 +17,7 @@ func TestNormalizeContent(t *testing.T) {
{"test_responses/tweet_with_gif_reply.json", ""},
{"test_responses/tweet_with_image.json", "this saddens me every time"},
{"test_responses/tweet_with_reply.json", "I always liked \"The Anarchist's Cookbook.\""},
{"test_responses/tweet_with_4_images.json", "These are public health officials who are making decisions about your lifestyle because they know more about health, fitness and well-being than you do"},
}
for _, v := range test_cases {
data, err := ioutil.ReadFile(v.filename)

File diff suppressed because one or more lines are too long