offline-twitter/scraper/tweet_test.go

327 lines
10 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package scraper_test
import (
"encoding/json"
"io/ioutil"
"testing"
"offline_twitter/scraper"
)
func load_tweet_from_file(filename string) scraper.Tweet{
data, err := ioutil.ReadFile(filename)
if err != nil {
panic(err)
}
var apitweet scraper.APITweet
err = json.Unmarshal(data, &apitweet)
if err != nil {
panic(err)
}
tweet, err := scraper.ParseSingleTweet(apitweet)
if err != nil {
panic(err)
}
return tweet
}
func TestParseSingleTweet(t *testing.T) {
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_unicode_chars.json")
expected_text := "The fact that @michaelmalice new book The Anarchist Handbook is just absolutely destroying on the charts is the largest white pill Ive swallowed in years."
actual_text := tweet.Text
if actual_text != expected_text {
t.Errorf("Expected: %q; got %q", expected_text, actual_text)
}
if len(tweet.Mentions) != 1 || tweet.Mentions[0] != "michaelmalice" {
t.Errorf("Expected %v, got %v", []string{"michaelmalice"}, tweet.Mentions)
}
if len(tweet.Urls) != 0 {
t.Errorf("Expected %d urls, but got %d", 0, len(tweet.Urls))
}
if tweet.PostedAt.Unix() != 1621639105 {
t.Errorf("Expected %d, got %d", 1621639105, tweet.PostedAt.Unix())
}
if tweet.QuotedTweetID != 0 {
t.Errorf("Incorrectly believes it quote-tweets tweet with ID %d", tweet.QuotedTweetID)
}
if len(tweet.Polls) != 0 {
t.Errorf("Should not have any polls")
}
}
func TestParseTweetWithImage(t *testing.T) {
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_image.json")
expected_text := "this saddens me every time"
if tweet.Text != expected_text {
t.Errorf("Expected: %q, got: %q", expected_text, tweet.Text)
}
if len(tweet.Images) != 1 {
t.Errorf("Expected 1 images but got %d", len(tweet.Images))
}
}
func TestParseTweetWithQuotedTweetAsLink(t *testing.T) {
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_quoted_tweet_as_link2.json")
expected_text := "sometimes they're too dimwitted to even get the wrong title right"
if tweet.Text != expected_text {
t.Errorf("Expected: %q, got: %q", expected_text, tweet.Text)
}
expected_replied_id := scraper.TweetID(1395882872729477131)
if tweet.InReplyToID != expected_replied_id {
t.Errorf("Expected %q, got %q", expected_replied_id, tweet.InReplyToID)
}
if len(tweet.ReplyMentions) != 0 {
t.Errorf("Wanted %v, got %v", []string{}, tweet.ReplyMentions)
}
expected_quoted_id := scraper.TweetID(1396194494710788100)
if tweet.QuotedTweetID != expected_quoted_id {
t.Errorf("Should be a quoted tweet with ID %d, but got %d instead", expected_quoted_id, tweet.QuotedTweetID)
}
if len(tweet.Polls) != 0 {
t.Errorf("Should not have any polls")
}
}
func TestParseTweetWithVideo(t *testing.T) {
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_video.json")
expected_video := "https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12"
if len(tweet.Videos) != 1 || tweet.Videos[0].RemoteURL != expected_video {
t.Errorf("Expected video URL %q, but got %+v", expected_video, tweet.Videos)
}
if tweet.Videos[0].IsGif != false {
t.Errorf("Expected it to be a regular video, but it was a gif")
}
if len(tweet.Images) != 0 {
t.Errorf("Should not have any images, but has %d", len(tweet.Images))
}
}
func TestParseTweetWithGif(t *testing.T) {
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_that_is_a_reply_with_gif.json")
expected_video := "https://video.twimg.com/tweet_video/E189-VhVoAYcrDv.mp4"
if len(tweet.Videos) != 1 {
t.Errorf("Expected 1 video (a gif), but got %d instead", len(tweet.Videos))
}
if tweet.Videos[0].RemoteURL != expected_video {
t.Errorf("Expected video URL %q, but got %+v", expected_video, tweet.Videos)
}
if tweet.Videos[0].IsGif != true {
t.Errorf("Expected video to be a gif, but it wasn't")
}
}
func TestParseTweetWithUrl(t *testing.T) {
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_url_card.json")
if len(tweet.Urls) != 1 {
t.Errorf("Expected %d urls, but got %d", 1, len(tweet.Urls))
}
expected_url_text := "https://reason.com/2021/08/30/la-teachers-union-cecily-myart-cruz-learning-loss/"
if tweet.Urls[0].Text != expected_url_text {
t.Errorf("Expected Url text to be %q, but got %q", expected_url_text, tweet.Urls[0].Text)
}
if !tweet.Urls[0].HasCard {
t.Errorf("Expected it to have a card, but it doesn't")
}
expected_url_domain := "reason.com"
if tweet.Urls[0].Domain != expected_url_domain {
t.Errorf("Expected Url text to be %q, but got %q", expected_url_domain, tweet.Urls[0].Domain)
}
if len(tweet.Polls) != 0 {
t.Errorf("Should not have any polls")
}
}
func TestParseTweetWithUrlButNoCard(t *testing.T) {
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_url_but_no_card.json")
if len(tweet.Urls) != 1 {
t.Errorf("Expected %d urls, but got %d", 1, len(tweet.Urls))
}
expected_url_text := "https://www.politico.com/newsletters/west-wing-playbook/2021/09/16/the-jennifer-rubin-wh-symbiosis-494364"
if tweet.Urls[0].Text != expected_url_text {
t.Errorf("Expected Url text to be %q, but got %q", expected_url_text, tweet.Urls[0].Text)
}
if tweet.Urls[0].HasCard {
t.Errorf("Expected url not to have a card, but it thinks it has one")
}
}
func TestParseTweetWithMultipleUrls(t *testing.T) {
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_multiple_urls.json")
if len(tweet.Urls) != 3 {
t.Errorf("Expected %d urls, got %d instead", 3, len(tweet.Urls))
}
if tweet.Urls[0].HasCard {
t.Errorf("Expected url not to have a card, but it does: %d", 0)
}
if tweet.Urls[1].HasCard {
t.Errorf("Expected url not to have a card, but it does: %d", 1)
}
if !tweet.Urls[2].HasCard {
t.Errorf("Expected url to have a card, but it doesn't: %d", 2)
}
expected_title := "Bidens victory came from the suburbs"
if tweet.Urls[2].Title != expected_title {
t.Errorf("Expected title to be %q, but got %q", expected_title, tweet.Urls[2].Title)
}
if len(tweet.Polls) != 0 {
t.Errorf("Should not have any polls")
}
}
func TestTweetWithLotsOfReplyMentions(t *testing.T) {
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_at_mentions_in_front.json")
if len(tweet.ReplyMentions) != 4 {
t.Errorf("Expected %d reply-mentions, got %d", 4, len(tweet.ReplyMentions))
}
for i, v := range []scraper.UserHandle{"rob_mose", "primalpoly", "jmasseypoet", "SpaceX"} {
if tweet.ReplyMentions[i] != v {
t.Errorf("Expected %q, got %q at position %d", v, tweet.ReplyMentions[i], i)
}
}
}
func TestTweetWithPoll(t *testing.T) {
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_poll_4_choices.json")
if len(tweet.Polls) != 1 {
t.Fatalf("Expected there to be 1 poll, but there was %d", len(tweet.Polls))
}
p := tweet.Polls[0]
if p.TweetID != tweet.ID {
t.Errorf("Poll's TweetID (%d) should match the tweet's ID (%d)", p.TweetID, tweet.ID)
}
if p.NumChoices != 4 {
t.Errorf("Expected %d choices, got %d instead", 4, p.NumChoices)
}
expected_choice1 := "Tribal armband"
if p.Choice1 != expected_choice1 {
t.Errorf("Expected choice1 %q, got %q", expected_choice1, p.Choice1)
}
expected_choice2 := "Marijuana leaf"
if p.Choice2 != expected_choice2 {
t.Errorf("Expected choice2 %q, got %q", expected_choice2, p.Choice2)
}
expected_choice3 := "Butterfly"
if p.Choice3 != expected_choice3 {
t.Errorf("Expected choice3 %q, got %q", expected_choice3, p.Choice3)
}
expected_choice4 := "Maple leaf"
if p.Choice4 != expected_choice4 {
t.Errorf("Expected choice4 %q, got %q", expected_choice4, p.Choice4)
}
expected_votes1 := 1593
expected_votes2 := 624
expected_votes3 := 778
expected_votes4 := 1138
if p.Choice1_Votes != expected_votes1 {
t.Errorf("Expected Choice1_Votes %d, got %d", expected_votes1, p.Choice1_Votes)
}
if p.Choice2_Votes != expected_votes2 {
t.Errorf("Expected Choice2_Votes %d, got %d", expected_votes2, p.Choice2_Votes)
}
if p.Choice3_Votes != expected_votes3 {
t.Errorf("Expected Choice3_Votes %d, got %d", expected_votes3, p.Choice3_Votes)
}
if p.Choice4_Votes != expected_votes4 {
t.Errorf("Expected Choice4_Votes %d, got %d", expected_votes4, p.Choice4_Votes)
}
expected_duration := 1440 * 60
if p.VotingDuration != expected_duration {
t.Errorf("Expected voting duration %d seconds, got %d", expected_duration, p.VotingDuration)
}
expected_ends_at := int64(1638331934)
if p.VotingEndsAt.Unix() != expected_ends_at {
t.Errorf("Expected voting ends at %d (unix), got %d", expected_ends_at, p.VotingEndsAt.Unix())
}
expected_last_updated_at := int64(1638331935)
if p.LastUpdatedAt.Unix() != expected_last_updated_at {
t.Errorf("Expected updated %d, got %d", expected_last_updated_at, p.LastUpdatedAt.Unix())
}
}
func TestParseTweetResponse(t *testing.T) {
data, err := ioutil.ReadFile("test_responses/michael_malice_feed.json")
if err != nil {
panic(err)
}
var tweet_resp scraper.TweetResponse
err = json.Unmarshal(data, &tweet_resp)
if err != nil {
t.Errorf(err.Error())
}
tweets, retweets, users, err := scraper.ParseTweetResponse(tweet_resp)
if err != nil {
t.Errorf(err.Error())
}
if len(tweets) != 29 - 3 {
t.Errorf("Expected %d tweets, got %d", 29-3, len(tweets))
}
if len(retweets) != 3 {
t.Errorf("Expected %d retweets, got %d", 3, len(retweets))
}
if len(users) != 9 {
t.Errorf("Expected %d users, got %d", 9, len(users))
}
}
func TestParseTweetResponseWithTombstones(t *testing.T) {
data, err := ioutil.ReadFile("test_responses/tombstones/tombstone_deleted.json")
if err != nil {
panic(err)
}
var tweet_resp scraper.TweetResponse
err = json.Unmarshal(data, &tweet_resp)
if err != nil {
t.Errorf(err.Error())
}
extra_users := tweet_resp.HandleTombstones()
if len(extra_users) != 1 {
t.Errorf("Expected to need 1 extra user but got %d instead", len(extra_users))
}
tweets, retweets, users, err := scraper.ParseTweetResponse(tweet_resp)
if err != nil {
t.Fatal(err)
}
if len(tweets) != 2 {
t.Errorf("Expected %d tweets, got %d", 2, len(tweets))
}
if len(retweets) != 0 {
t.Errorf("Expected %d retweets, got %d", 0, len(retweets))
}
if len(users) != 1 {
t.Errorf("Expected %d users, got %d", 1, len(users))
}
}