BUGFIX: fix incorrect text normalization on tweet with multiple images

2021-07-23 19:13:20 -07:00 · 2021-07-23 19:13:20 -07:00 · 6c111ab976
commit 6c111ab976
parent 700e3e0412
3 changed files with 4 additions and 2 deletions
--- a/scraper/api_types.go
+++ b/scraper/api_types.go
@ -56,13 +56,13 @@ type APITweet struct {

 func (t *APITweet) NormalizeContent() {
 	// Remove embedded links at the end of the text
-	if len(t.Entities.URLs) == 1 {
+	if len(t.Entities.URLs) == 1 {  // TODO: should this be `>= 1`, like below?
 		url := t.Entities.URLs[0].URL
 		if strings.Index(t.FullText, url) == len(t.FullText) - len(url) {
 			t.FullText = t.FullText[0:len(t.FullText) - len(url)]  // Also strip the newline
 		}
 	}
-	if len(t.Entities.Media) == 1 {
+	if len(t.Entities.Media) >= 1 {
 		url := t.Entities.Media[0].URL
 		if strings.Index(t.FullText, url) == len(t.FullText) - len(url) {
 			t.FullText = t.FullText[0:len(t.FullText) - len(url)]  // Also strip the trailing space
--- a/scraper/api_types_test.go
+++ b/scraper/api_types_test.go
@ -17,6 +17,7 @@ func TestNormalizeContent(t *testing.T) {
 		{"test_responses/tweet_with_gif_reply.json", ""},
 		{"test_responses/tweet_with_image.json", "this saddens me every time"},
 		{"test_responses/tweet_with_reply.json", "I always liked \"The Anarchist's Cookbook.\""},
+		{"test_responses/tweet_with_4_images.json", "These are public health officials who are making decisions about your lifestyle because they know more about health, fitness and well-being than you do"},
 	}
 	for _, v := range test_cases {
 		data, err := ioutil.ReadFile(v.filename)
--- a/scraper/test_responses/tweet_with_4_images.json
+++ b/scraper/test_responses/tweet_with_4_images.json