Escape HTML entities in tweet text
This commit is contained in:
parent
8af6b6d3a6
commit
bf2dbede94
@ -2,6 +2,7 @@ package scraper
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"html"
|
||||||
"time"
|
"time"
|
||||||
"strings"
|
"strings"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
@ -194,6 +195,7 @@ func (t *APITweet) NormalizeContent() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
t.FullText = html.UnescapeString(t.FullText)
|
||||||
t.FullText = strings.TrimSpace(t.FullText)
|
t.FullText = strings.TrimSpace(t.FullText)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -27,6 +27,7 @@ func TestNormalizeContent(t *testing.T) {
|
|||||||
{"test_responses/single_tweets/tweet_with_quoted_tweet_as_link.json", "", 1422680899670274048, 0, 0, ""},
|
{"test_responses/single_tweets/tweet_with_quoted_tweet_as_link.json", "", 1422680899670274048, 0, 0, ""},
|
||||||
{"test_responses/single_tweets/tweet_with_quoted_tweet_as_link2.json", "sometimes they're too dimwitted to even get the wrong title right", 1396194494710788100, 1395882872729477131, 0, ""},
|
{"test_responses/single_tweets/tweet_with_quoted_tweet_as_link2.json", "sometimes they're too dimwitted to even get the wrong title right", 1396194494710788100, 1395882872729477131, 0, ""},
|
||||||
{"test_responses/single_tweets/tweet_with_quoted_tweet_as_link3.json", "I was using an analogy about creating out-groups but the Germans sure love their literalism", 1442092399358930946, 1335678942020300802, 0, ""},
|
{"test_responses/single_tweets/tweet_with_quoted_tweet_as_link3.json", "I was using an analogy about creating out-groups but the Germans sure love their literalism", 1442092399358930946, 1335678942020300802, 0, ""},
|
||||||
|
{"test_responses/single_tweets/tweet_with_html_entities.json", "By the 1970s the elite consensus was that \"the hunt for atomic spies\" had been a grotesque over-reaction to minor leaks that cost the lives of the Rosenbergs & ruined many innocents. Only when the USSR fell was it discovered that they & other spies had given away ALL the secrets", 0, 0, 0, ""},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, v := range test_cases {
|
for _, v := range test_cases {
|
||||||
|
@ -0,0 +1,20 @@
|
|||||||
|
{
|
||||||
|
"created_at": "Thu Dec 23 20:55:48 +0000 2021",
|
||||||
|
"id_str": "1474121585510563845",
|
||||||
|
"full_text": "By the 1970s the elite consensus was that \"the hunt for atomic spies\" had been a grotesque over-reaction to minor leaks that cost the lives of the Rosenbergs & ruined many innocents. Only when the USSR fell was it discovered that they & other spies had given away ALL the secrets",
|
||||||
|
"display_text_range":
|
||||||
|
[
|
||||||
|
0,
|
||||||
|
288
|
||||||
|
],
|
||||||
|
"entities":
|
||||||
|
{},
|
||||||
|
"source": "<a href=\"https://mobile.twitter.com\" rel=\"nofollow\">Twitter Web App</a>",
|
||||||
|
"user_id_str": "1239676915386068993",
|
||||||
|
"retweet_count": 239,
|
||||||
|
"favorite_count": 1118,
|
||||||
|
"reply_count": 26,
|
||||||
|
"quote_count": 26,
|
||||||
|
"conversation_id_str": "1474121585510563845",
|
||||||
|
"lang": "en"
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user