Escape HTML entities in tweet text
This commit is contained in:
parent
8af6b6d3a6
commit
bf2dbede94
@ -2,6 +2,7 @@ package scraper
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"html"
|
||||
"time"
|
||||
"strings"
|
||||
"encoding/json"
|
||||
@ -194,6 +195,7 @@ func (t *APITweet) NormalizeContent() {
|
||||
}
|
||||
}
|
||||
}
|
||||
t.FullText = html.UnescapeString(t.FullText)
|
||||
t.FullText = strings.TrimSpace(t.FullText)
|
||||
}
|
||||
|
||||
|
@ -27,6 +27,7 @@ func TestNormalizeContent(t *testing.T) {
|
||||
{"test_responses/single_tweets/tweet_with_quoted_tweet_as_link.json", "", 1422680899670274048, 0, 0, ""},
|
||||
{"test_responses/single_tweets/tweet_with_quoted_tweet_as_link2.json", "sometimes they're too dimwitted to even get the wrong title right", 1396194494710788100, 1395882872729477131, 0, ""},
|
||||
{"test_responses/single_tweets/tweet_with_quoted_tweet_as_link3.json", "I was using an analogy about creating out-groups but the Germans sure love their literalism", 1442092399358930946, 1335678942020300802, 0, ""},
|
||||
{"test_responses/single_tweets/tweet_with_html_entities.json", "By the 1970s the elite consensus was that \"the hunt for atomic spies\" had been a grotesque over-reaction to minor leaks that cost the lives of the Rosenbergs & ruined many innocents. Only when the USSR fell was it discovered that they & other spies had given away ALL the secrets", 0, 0, 0, ""},
|
||||
}
|
||||
|
||||
for _, v := range test_cases {
|
||||
|
@ -0,0 +1,20 @@
|
||||
{
|
||||
"created_at": "Thu Dec 23 20:55:48 +0000 2021",
|
||||
"id_str": "1474121585510563845",
|
||||
"full_text": "By the 1970s the elite consensus was that \"the hunt for atomic spies\" had been a grotesque over-reaction to minor leaks that cost the lives of the Rosenbergs & ruined many innocents. Only when the USSR fell was it discovered that they & other spies had given away ALL the secrets",
|
||||
"display_text_range":
|
||||
[
|
||||
0,
|
||||
288
|
||||
],
|
||||
"entities":
|
||||
{},
|
||||
"source": "<a href=\"https://mobile.twitter.com\" rel=\"nofollow\">Twitter Web App</a>",
|
||||
"user_id_str": "1239676915386068993",
|
||||
"retweet_count": 239,
|
||||
"favorite_count": 1118,
|
||||
"reply_count": 26,
|
||||
"quote_count": 26,
|
||||
"conversation_id_str": "1474121585510563845",
|
||||
"lang": "en"
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user