diff --git a/scraper/api_types.go b/scraper/api_types.go index 903898f..3940e49 100644 --- a/scraper/api_types.go +++ b/scraper/api_types.go @@ -2,6 +2,7 @@ package scraper import ( "fmt" + "html" "time" "strings" "encoding/json" @@ -194,6 +195,7 @@ func (t *APITweet) NormalizeContent() { } } } + t.FullText = html.UnescapeString(t.FullText) t.FullText = strings.TrimSpace(t.FullText) } diff --git a/scraper/api_types_test.go b/scraper/api_types_test.go index 38d5c68..432300e 100644 --- a/scraper/api_types_test.go +++ b/scraper/api_types_test.go @@ -27,6 +27,7 @@ func TestNormalizeContent(t *testing.T) { {"test_responses/single_tweets/tweet_with_quoted_tweet_as_link.json", "", 1422680899670274048, 0, 0, ""}, {"test_responses/single_tweets/tweet_with_quoted_tweet_as_link2.json", "sometimes they're too dimwitted to even get the wrong title right", 1396194494710788100, 1395882872729477131, 0, ""}, {"test_responses/single_tweets/tweet_with_quoted_tweet_as_link3.json", "I was using an analogy about creating out-groups but the Germans sure love their literalism", 1442092399358930946, 1335678942020300802, 0, ""}, + {"test_responses/single_tweets/tweet_with_html_entities.json", "By the 1970s the elite consensus was that \"the hunt for atomic spies\" had been a grotesque over-reaction to minor leaks that cost the lives of the Rosenbergs & ruined many innocents. Only when the USSR fell was it discovered that they & other spies had given away ALL the secrets", 0, 0, 0, ""}, } for _, v := range test_cases { diff --git a/scraper/test_responses/single_tweets/tweet_with_html_entities.json b/scraper/test_responses/single_tweets/tweet_with_html_entities.json new file mode 100644 index 0000000..43ecb6d --- /dev/null +++ b/scraper/test_responses/single_tweets/tweet_with_html_entities.json @@ -0,0 +1,20 @@ +{ + "created_at": "Thu Dec 23 20:55:48 +0000 2021", + "id_str": "1474121585510563845", + "full_text": "By the 1970s the elite consensus was that \"the hunt for atomic spies\" had been a grotesque over-reaction to minor leaks that cost the lives of the Rosenbergs & ruined many innocents. Only when the USSR fell was it discovered that they & other spies had given away ALL the secrets", + "display_text_range": + [ + 0, + 288 + ], + "entities": + {}, + "source": "Twitter Web App", + "user_id_str": "1239676915386068993", + "retweet_count": 239, + "favorite_count": 1118, + "reply_count": 26, + "quote_count": 26, + "conversation_id_str": "1474121585510563845", + "lang": "en" +}