BUGFIX: fix incorrect text normalization on tweet with multiple images
This commit is contained in:
parent
700e3e0412
commit
6c111ab976
@ -56,13 +56,13 @@ type APITweet struct {
|
||||
|
||||
func (t *APITweet) NormalizeContent() {
|
||||
// Remove embedded links at the end of the text
|
||||
if len(t.Entities.URLs) == 1 {
|
||||
if len(t.Entities.URLs) == 1 { // TODO: should this be `>= 1`, like below?
|
||||
url := t.Entities.URLs[0].URL
|
||||
if strings.Index(t.FullText, url) == len(t.FullText) - len(url) {
|
||||
t.FullText = t.FullText[0:len(t.FullText) - len(url)] // Also strip the newline
|
||||
}
|
||||
}
|
||||
if len(t.Entities.Media) == 1 {
|
||||
if len(t.Entities.Media) >= 1 {
|
||||
url := t.Entities.Media[0].URL
|
||||
if strings.Index(t.FullText, url) == len(t.FullText) - len(url) {
|
||||
t.FullText = t.FullText[0:len(t.FullText) - len(url)] // Also strip the trailing space
|
||||
|
@ -17,6 +17,7 @@ func TestNormalizeContent(t *testing.T) {
|
||||
{"test_responses/tweet_with_gif_reply.json", ""},
|
||||
{"test_responses/tweet_with_image.json", "this saddens me every time"},
|
||||
{"test_responses/tweet_with_reply.json", "I always liked \"The Anarchist's Cookbook.\""},
|
||||
{"test_responses/tweet_with_4_images.json", "These are public health officials who are making decisions about your lifestyle because they know more about health, fitness and well-being than you do"},
|
||||
}
|
||||
for _, v := range test_cases {
|
||||
data, err := ioutil.ReadFile(v.filename)
|
||||
|
1
scraper/test_responses/tweet_with_4_images.json
Normal file
1
scraper/test_responses/tweet_with_4_images.json
Normal file
File diff suppressed because one or more lines are too long
Loading…
x
Reference in New Issue
Block a user