From 069ab83fba3ae62d872d96585d33fb0a7a005266 Mon Sep 17 00:00:00 2001 From: Alessio Date: Mon, 31 Jan 2022 19:14:14 -0800 Subject: [PATCH] Refactor scraper tests - use Stretchr assert package for assertions - use "dot imports" to remove "scraper.XYZ" from everything --- scraper/api_types_test.go | 190 +++++++------------- scraper/api_types_v2_test.go | 4 +- scraper/guest_token_test.go | 12 +- scraper/image_test.go | 43 ++--- scraper/link_expander_test.go | 9 +- scraper/poll_test.go | 111 +++++------- scraper/retweet_test.go | 39 ++--- scraper/tweet_test.go | 319 ++++++++++------------------------ scraper/url_test.go | 194 ++++++--------------- scraper/user_test.go | 183 ++++++------------- scraper/video_test.go | 72 +++----- 11 files changed, 362 insertions(+), 814 deletions(-) diff --git a/scraper/api_types_test.go b/scraper/api_types_test.go index 432300e..afbcfd0 100644 --- a/scraper/api_types_test.go +++ b/scraper/api_types_test.go @@ -5,17 +5,21 @@ import ( "io/ioutil" "encoding/json" - "offline_twitter/scraper" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + . "offline_twitter/scraper" ) func TestNormalizeContent(t *testing.T) { + assert := assert.New(t) test_cases := []struct { filename string eventual_full_text string - quoted_status_id scraper.TweetID - in_reply_to_id scraper.TweetID - retweeted_status_id scraper.TweetID + quoted_status_id TweetID + in_reply_to_id TweetID + retweeted_status_id TweetID reply_mentions string } { {"test_responses/single_tweets/tweet_that_is_a_reply_with_gif.json", "", 0, 1395882872729477131, 0, "@michaelmalice"}, @@ -35,77 +39,53 @@ func TestNormalizeContent(t *testing.T) { if err != nil { panic(err) } - var tweet scraper.APITweet + var tweet APITweet err = json.Unmarshal(data, &tweet) - if err != nil { - println("Failed at " + v.filename) - t.Errorf(err.Error()) - } + assert.NoError(err, "Failed at " + v.filename) tweet.NormalizeContent() - if tweet.FullText != v.eventual_full_text { - t.Errorf("Expected %q, got %q", v.eventual_full_text, tweet.FullText) - } - if scraper.TweetID(tweet.QuotedStatusID) != v.quoted_status_id { - t.Errorf("Expected quoted status %d, but got %d", v.quoted_status_id, tweet.QuotedStatusID) - } - if scraper.TweetID(tweet.InReplyToStatusID) != v.in_reply_to_id { - t.Errorf("Expected in_reply_to_id id %d, but got %d", v.in_reply_to_id, tweet.InReplyToStatusID) - } - if scraper.TweetID(tweet.RetweetedStatusID) != v.retweeted_status_id { - t.Errorf("Expected retweeted status id %d, but got %d", v.retweeted_status_id, tweet.RetweetedStatusID) - } - if tweet.Entities.ReplyMentions != v.reply_mentions { - t.Errorf("Expected @reply mentions to be %q, but it was %q", v.reply_mentions, tweet.Entities.ReplyMentions) - } + assert.Equal(v.eventual_full_text, tweet.FullText, "Tweet text") + assert.Equal(int64(v.quoted_status_id), tweet.QuotedStatusID, "Quoted status ID") + assert.Equal(int64(v.in_reply_to_id), tweet.InReplyToStatusID, "In reply to ID") + assert.Equal(int64(v.retweeted_status_id), tweet.RetweetedStatusID, "Retweeted status ID") + assert.Equal(v.reply_mentions, tweet.Entities.ReplyMentions, "Reply mentions") } } func TestUserProfileToAPIUser(t *testing.T) { + assert := assert.New(t) data, err := ioutil.ReadFile("test_responses/michael_malice_user_profile.json") if err != nil { panic(err) } - var user_resp scraper.UserResponse + var user_resp UserResponse err = json.Unmarshal(data, &user_resp) - if err != nil { - t.Errorf(err.Error()) - } + assert.NoError(err) result := user_resp.ConvertToAPIUser() - - if result.ID != 44067298 { - t.Errorf("Expected ID %q, got %q", 44067298, result.ID) - } - if result.FollowersCount != user_resp.Data.User.Legacy.FollowersCount { - t.Errorf("Expected user count %d, got %d", user_resp.Data.User.Legacy.FollowersCount, result.FollowersCount) - } + assert.Equal(int64(44067298), result.ID) + assert.Equal(user_resp.Data.User.Legacy.FollowersCount, result.FollowersCount) } func TestGetCursor(t *testing.T) { + assert := assert.New(t) data, err := ioutil.ReadFile("test_responses/midriffs_anarchist_cookbook.json") if err != nil { panic(err) } - var tweet_resp scraper.TweetResponse + var tweet_resp TweetResponse err = json.Unmarshal(data, &tweet_resp) - if err != nil { - t.Errorf(err.Error()) - } + assert.NoError(err) - expected_cursor := "LBmGhsC+ibH1peAmgICjpbS0m98mgICj7a2lmd8mhsC4rbmsmN8mgMCqkbT1p+AmgsC4ucv4o+AmhoCyrf+nlt8mhMC9qfOwlt8mJQISAAA=" - actual_cursor := tweet_resp.GetCursor() - - if expected_cursor != actual_cursor { - t.Errorf("Expected %q, got %q", expected_cursor, actual_cursor) - } + assert.Equal("LBmGhsC+ibH1peAmgICjpbS0m98mgICj7a2lmd8mhsC4rbmsmN8mgMCqkbT1p+AmgsC4ucv4o+AmhoCyrf+nlt8mhMC9qfOwlt8mJQISAAA=", tweet_resp.GetCursor()) } func TestIsEndOfFeed(t *testing.T) { + assert := assert.New(t) test_cases := []struct { filename string is_end_of_feed bool @@ -118,130 +98,86 @@ func TestIsEndOfFeed(t *testing.T) { if err != nil { panic(err) } - var tweet_resp scraper.TweetResponse + var tweet_resp TweetResponse err = json.Unmarshal(data, &tweet_resp) - if err != nil { - t.Fatalf(err.Error()) - } - result := tweet_resp.IsEndOfFeed() - if v.is_end_of_feed != result { - t.Errorf("Expected IsEndOfFeed to be %v, but got %v", v.is_end_of_feed, result) - } + assert.NoError(err) + assert.Equal(v.is_end_of_feed, tweet_resp.IsEndOfFeed()) } } func TestHandleTombstonesHidden(t *testing.T) { + assert := assert.New(t) data, err := ioutil.ReadFile("test_responses/tombstones/tombstone_hidden_1.json") if err != nil { panic(err) } - var tweet_resp scraper.TweetResponse + var tweet_resp TweetResponse err = json.Unmarshal(data, &tweet_resp) - if err != nil { - t.Fatalf(err.Error()) - } - if len(tweet_resp.GlobalObjects.Tweets) != 2 { - t.Fatalf("Should have started with %d tweets, but had %d instead", 2, len(tweet_resp.GlobalObjects.Tweets)) - } + require.NoError(t, err) + assert.Equal(2, len(tweet_resp.GlobalObjects.Tweets), "Before tombstone handling") + tweet_resp.HandleTombstones() - if len(tweet_resp.GlobalObjects.Tweets) != 4 { - t.Errorf("Should have ended up with %d tweets, but had %d instead", 4, len(tweet_resp.GlobalObjects.Tweets)) - } + + assert.Equal(4, len(tweet_resp.GlobalObjects.Tweets), "After tombstone handling") first_tombstone, ok := tweet_resp.GlobalObjects.Tweets["1454522147750260742"] - if !ok { - t.Errorf("Missing tombstoned tweet for %s", "1454522147750260742") - } - if first_tombstone.ID != 1454522147750260742 { - t.Errorf("Expected ID %d, got %d instead", 1454522147750260742, first_tombstone.ID) - } - if first_tombstone.UserID != 1365863538393309184 { - t.Errorf("Expected UserID %d, got %d instead", 1365863538393309184, first_tombstone.UserID) - } - if first_tombstone.TombstoneText != "hidden" { - t.Errorf("Wrong tombstone text: %s", first_tombstone.TombstoneText) + if assert.True(ok, "Missing tombstone") { + assert.Equal(int64(1454522147750260742), first_tombstone.ID) + assert.Equal(int64(1365863538393309184), first_tombstone.UserID) + assert.Equal("hidden", first_tombstone.TombstoneText) } second_tombstone, ok := tweet_resp.GlobalObjects.Tweets["1454515503242829830"] - if !ok { - t.Errorf("Missing tombstoned tweet for %s", "1454515503242829830") - } - if second_tombstone.ID != 1454515503242829830 { - t.Errorf("Expected ID %d, got %d instead", 1454515503242829830, second_tombstone.ID) - } - if second_tombstone.UserID != 1365863538393309184 { - t.Errorf("Expected UserID %d, got %d instead", 1365863538393309184, second_tombstone.UserID) - } - if second_tombstone.TombstoneText != "hidden" { - t.Errorf("Wrong tombstone text: %s", second_tombstone.TombstoneText) + if assert.True(ok, "Missing tombstone") { + assert.Equal(int64(1454515503242829830), second_tombstone.ID) + assert.Equal(int64(1365863538393309184), second_tombstone.UserID) + assert.Equal("hidden", second_tombstone.TombstoneText) } } func TestHandleTombstonesDeleted(t *testing.T) { + assert := assert.New(t) data, err := ioutil.ReadFile("test_responses/tombstones/tombstone_deleted.json") if err != nil { panic(err) } - var tweet_resp scraper.TweetResponse + var tweet_resp TweetResponse err = json.Unmarshal(data, &tweet_resp) - if err != nil { - t.Fatalf(err.Error()) - } + require.NoError(t, err) + assert.Equal(1, len(tweet_resp.GlobalObjects.Tweets), "Before tombstone handling") - if len(tweet_resp.GlobalObjects.Tweets) != 1 { - t.Fatalf("Should have started with %d tweets, but had %d instead", 1, len(tweet_resp.GlobalObjects.Tweets)) - } tweet_resp.HandleTombstones() - if len(tweet_resp.GlobalObjects.Tweets) != 2 { - t.Errorf("Should have ended up with %d tweets, but had %d instead", 2, len(tweet_resp.GlobalObjects.Tweets)) - } + + assert.Equal(2, len(tweet_resp.GlobalObjects.Tweets), "After tombstone handling") tombstone, ok := tweet_resp.GlobalObjects.Tweets["1454521654781136902"] - if !ok { - t.Errorf("Missing tombstoned tweet for %s", "1454521654781136902") - } - if tombstone.ID != 1454521654781136902 { - t.Errorf("Expected ID %d, got %d instead", 1454521654781136902, tombstone.ID) - } - if tombstone.UserID != 1218687933391298560 { - t.Errorf("Expected UserID %d, got %d instead", 1218687933391298560, tombstone.UserID) - } - if tombstone.TombstoneText != "deleted" { - t.Errorf("Wrong tombstone text: %s", tombstone.TombstoneText) + if assert.True(ok) { + assert.Equal(int64(1454521654781136902), tombstone.ID) + assert.Equal(int64(1218687933391298560), tombstone.UserID) + assert.Equal("deleted", tombstone.TombstoneText) } } func TestHandleTombstonesUnavailable(t *testing.T) { + assert := assert.New(t) data, err := ioutil.ReadFile("test_responses/tombstones/tombstone_unavailable.json") if err != nil { panic(err) } - var tweet_resp scraper.TweetResponse + var tweet_resp TweetResponse err = json.Unmarshal(data, &tweet_resp) - if err != nil { - t.Fatalf(err.Error()) - } + require.NoError(t, err) + assert.Equal(2, len(tweet_resp.GlobalObjects.Tweets), "Before tombstone handling") - if len(tweet_resp.GlobalObjects.Tweets) != 2 { - t.Fatalf("Should have started with %d tweets, but had %d instead", 2, len(tweet_resp.GlobalObjects.Tweets)) - } tweet_resp.HandleTombstones() - if len(tweet_resp.GlobalObjects.Tweets) != 3 { - t.Errorf("Should have ended up with %d tweets, but had %d instead", 3, len(tweet_resp.GlobalObjects.Tweets)) - } + + assert.Equal(3, len(tweet_resp.GlobalObjects.Tweets), "After tombstone handling") tombstone, ok := tweet_resp.GlobalObjects.Tweets["1452686887651532809"] - if !ok { - t.Errorf("Missing tombstoned tweet for %s", "1452686887651532809") - } - if tombstone.ID != 1452686887651532809 { - t.Errorf("Expected ID %d, got %d instead", 1452686887651532809, tombstone.ID) - } - if tombstone.UserID != 1241389617502445569 { - t.Errorf("Expected UserID %d, got %d instead", 1241389617502445569, tombstone.UserID) - } - if tombstone.TombstoneText != "unavailable" { - t.Errorf("Wrong tombstone text: %s", tombstone.TombstoneText) + if assert.True(ok) { + assert.Equal(int64(1452686887651532809), tombstone.ID) + assert.Equal(int64(1241389617502445569), tombstone.UserID) + assert.Equal("unavailable", tombstone.TombstoneText) } } diff --git a/scraper/api_types_v2_test.go b/scraper/api_types_v2_test.go index c70647a..e2657b7 100644 --- a/scraper/api_types_v2_test.go +++ b/scraper/api_types_v2_test.go @@ -6,10 +6,10 @@ import ( "encoding/json" "fmt" - . "offline_twitter/scraper" "github.com/stretchr/testify/assert" -) + . "offline_twitter/scraper" +) /** * Parse an APIV2User diff --git a/scraper/guest_token_test.go b/scraper/guest_token_test.go index 952831a..baeb76c 100644 --- a/scraper/guest_token_test.go +++ b/scraper/guest_token_test.go @@ -1,12 +1,14 @@ package scraper_test -import "testing" -import "fmt" -import "offline_twitter/scraper" +import ( + "testing" + "fmt" + . "offline_twitter/scraper" +) // Makes an HTTP request func TestGetGuestToken(t *testing.T) { - token, err := scraper.GetGuestToken() + token, err := GetGuestToken() if err != nil { t.Errorf("%v", err) } @@ -22,6 +24,6 @@ func TestGetGuestToken(t *testing.T) { // other than the first use the cache. func BenchmarkGetGuestToken(b *testing.B) { for i := 0; i < b.N; i++ { - _, _ = scraper.GetGuestToken() + _, _ = GetGuestToken() } } diff --git a/scraper/image_test.go b/scraper/image_test.go index 216ccdb..cb15fee 100644 --- a/scraper/image_test.go +++ b/scraper/image_test.go @@ -5,42 +5,27 @@ import ( "io/ioutil" "encoding/json" - "offline_twitter/scraper" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + . "offline_twitter/scraper" ) func TestParseAPIMedia(t *testing.T) { + assert := assert.New(t) data, err := ioutil.ReadFile("test_responses/tweet_content/image.json") if err != nil { panic(err) } - var apimedia scraper.APIMedia + var apimedia APIMedia err = json.Unmarshal(data, &apimedia) - if err != nil { - t.Fatal(err.Error()) - } - image := scraper.ParseAPIMedia(apimedia) + require.NoError(t, err) - expected_id := 1395882862289772553 - if image.ID != scraper.ImageID(expected_id) { - t.Errorf("Expected ID of %q, got %q", expected_id, image.ID) - } - expected_remote_url := "https://pbs.twimg.com/media/E18sEUrWYAk8dBl.jpg" - if image.RemoteURL != expected_remote_url { - t.Errorf("Expected %q, got %q", expected_remote_url, image.RemoteURL) - } - expected_width := 593 - if image.Width != expected_width { - t.Errorf("Expected width %d, got %d", expected_width, image.Width) - } - expected_height := 239 - if image.Height != expected_height { - t.Errorf("Expected height %d, got %d", expected_height, image.Height) - } - expected_local_filename := "E18sEUrWYAk8dBl.jpg" - if image.LocalFilename != expected_local_filename { - t.Errorf("Expected %q, got %q", expected_local_filename, image.LocalFilename) - } - if image.IsDownloaded { - t.Errorf("Expected it not to be downloaded, but it was") - } + image := ParseAPIMedia(apimedia) + assert.Equal(ImageID(1395882862289772553), image.ID) + assert.Equal("https://pbs.twimg.com/media/E18sEUrWYAk8dBl.jpg", image.RemoteURL) + assert.Equal(593, image.Width) + assert.Equal(239, image.Height) + assert.Equal("E18sEUrWYAk8dBl.jpg", image.LocalFilename) + assert.False(image.IsDownloaded) } diff --git a/scraper/link_expander_test.go b/scraper/link_expander_test.go index ffdc1c0..68e9f7b 100644 --- a/scraper/link_expander_test.go +++ b/scraper/link_expander_test.go @@ -6,7 +6,9 @@ import ( "net/http" "net/http/httptest" - "offline_twitter/scraper" + "github.com/stretchr/testify/assert" + + . "offline_twitter/scraper" ) @@ -18,8 +20,5 @@ func TestExpandShortUrl(t *testing.T) { })) defer srvr.Close() - result := scraper.ExpandShortUrl(srvr.URL) - if result != redirecting_to { - t.Errorf("Expected %q, got %q", redirecting_to, result) - } + assert.Equal(t, redirecting_to, ExpandShortUrl(srvr.URL)) } diff --git a/scraper/poll_test.go b/scraper/poll_test.go index 833ac22..c4dd34f 100644 --- a/scraper/poll_test.go +++ b/scraper/poll_test.go @@ -5,96 +5,63 @@ import ( "io/ioutil" "encoding/json" - "offline_twitter/scraper" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + . "offline_twitter/scraper" ) func TestParsePoll2Choices(t *testing.T) { + assert := assert.New(t) data, err := ioutil.ReadFile("test_responses/tweet_content/poll_card_2_options.json") if err != nil { panic(err) } - var apiCard scraper.APICard + var apiCard APICard err = json.Unmarshal(data, &apiCard) - if err != nil { - t.Fatal(err.Error()) - } + require.NoError(t, err) - poll := scraper.ParseAPIPoll(apiCard) - if poll.ID != 1457419248461131776 { - t.Errorf("Expected ID %d, got %d", 1457419248461131776, poll.ID) - } - if poll.NumChoices != 2 { - t.Errorf("Expected %d choices, got %d", 2, poll.NumChoices) - } - if poll.VotingDuration != 60 * 60 * 24 { - t.Errorf("Expected duratino %d, got %d", 60 * 60 * 24, poll.VotingDuration) - } - expected_ending := int64(1636397201) - if poll.VotingEndsAt.Unix() != expected_ending { - t.Errorf("Expected closing time %d, got %d", expected_ending, poll.VotingEndsAt.Unix()) - } - expected_last_updated := int64(1636318755) - if poll.LastUpdatedAt.Unix() != expected_last_updated { - t.Errorf("Expected last-updated time %d, got %d", expected_last_updated, poll.LastUpdatedAt.Unix()) - } - if expected_last_updated > expected_ending { - t.Errorf("Last updated should be before poll closes!") - } + poll := ParseAPIPoll(apiCard) + assert.Equal(PollID(1457419248461131776), poll.ID) + assert.Equal(2, poll.NumChoices) + assert.Equal(60 * 60 * 24, poll.VotingDuration) + assert.Equal(int64(1636397201), poll.VotingEndsAt.Unix()) + assert.Equal(int64(1636318755), poll.LastUpdatedAt.Unix()) - if poll.Choice1 != "Yes" || poll.Choice2 != "No" { - t.Errorf("Expected %q and %q, got %q and %q", "Yes", "No", poll.Choice1, poll.Choice2) - } - if poll.Choice1_Votes != 529 { - t.Errorf("Expected %d votes for choice 1, got %d", 529, poll.Choice1_Votes) - } - if poll.Choice2_Votes != 2182 { - t.Errorf("Expected %d votes for choice 2, got %d", 2182, poll.Choice2_Votes) - } + assert.Less(poll.LastUpdatedAt.Unix(), poll.VotingEndsAt.Unix()) + assert.Equal("Yes", poll.Choice1) + assert.Equal("No", poll.Choice2) + assert.Equal(529, poll.Choice1_Votes) + assert.Equal(2182, poll.Choice2_Votes) } func TestParsePoll4Choices(t *testing.T) { + assert := assert.New(t) data, err := ioutil.ReadFile("test_responses/tweet_content/poll_card_4_options_ended.json") if err != nil { panic(err) } - var apiCard scraper.APICard + var apiCard APICard err = json.Unmarshal(data, &apiCard) - if err != nil { - t.Fatal(err.Error()) - } + require.NoError(t, err) - poll := scraper.ParseAPIPoll(apiCard) - if poll.ID != 1455611588854140929 { - t.Errorf("Expected ID %d, got %d", 1455611588854140929, poll.ID) - } - if poll.NumChoices != 4 { - t.Errorf("Expected %d choices, got %d", 4, poll.NumChoices) - } - if poll.VotingDuration != 60 * 60 * 24 { - t.Errorf("Expected duratino %d, got %d", 60 * 60 * 24, poll.VotingDuration) - } - expected_ending := int64(1635966221) - if poll.VotingEndsAt.Unix() != expected_ending { - t.Errorf("Expected closing time %d, got %d", expected_ending, poll.VotingEndsAt.Unix()) - } - expected_last_updated := int64(1635966226) - if poll.LastUpdatedAt.Unix() != expected_last_updated { - t.Errorf("Expected last-updated time %d, got %d", expected_last_updated, poll.LastUpdatedAt.Unix()) - } - if expected_last_updated < expected_ending { - t.Errorf("Last updated should be after poll closes!") - } + poll := ParseAPIPoll(apiCard) + assert.Equal(PollID(1455611588854140929), poll.ID) + assert.Equal(4, poll.NumChoices) + assert.Equal(60 * 60 * 24, poll.VotingDuration) + assert.Equal(int64(1635966221), poll.VotingEndsAt.Unix()) + assert.Equal(int64(1635966226), poll.LastUpdatedAt.Unix()) + assert.Greater(poll.LastUpdatedAt.Unix(), poll.VotingEndsAt.Unix()) - if poll.Choice1 != "Alec Baldwin" || poll.Choice1_Votes != 1669 { - t.Errorf("Expected %q with %d, got %q with %d", "Alec Baldwin", 1669, poll.Choice1, poll.Choice1_Votes) - } - if poll.Choice2 != "Andew Cuomo" || poll.Choice2_Votes != 272 { - t.Errorf("Expected %q with %d, got %q with %d", "Andew Cuomo", 272, poll.Choice2, poll.Choice2_Votes) - } - if poll.Choice3 != "George Floyd" || poll.Choice3_Votes != 829 { - t.Errorf("Expected %q with %d, got %q with %d", "George Floyd", 829, poll.Choice3, poll.Choice3_Votes) - } - if poll.Choice4 != "Derek Chauvin" || poll.Choice4_Votes != 2397 { - t.Errorf("Expected %q with %d, got %q with %d", "Derek Chauvin", 2397, poll.Choice4, poll.Choice4_Votes) - } + assert.Equal("Alec Baldwin", poll.Choice1) + assert.Equal(1669, poll.Choice1_Votes) + + assert.Equal("Andew Cuomo", poll.Choice2) + assert.Equal(272, poll.Choice2_Votes) + + assert.Equal("George Floyd", poll.Choice3) + assert.Equal(829, poll.Choice3_Votes) + + assert.Equal("Derek Chauvin", poll.Choice4) + assert.Equal(2397, poll.Choice4_Votes) } diff --git a/scraper/retweet_test.go b/scraper/retweet_test.go index 616b576..db5317a 100644 --- a/scraper/retweet_test.go +++ b/scraper/retweet_test.go @@ -5,38 +5,27 @@ import ( "io/ioutil" "testing" - "offline_twitter/scraper" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + . "offline_twitter/scraper" ) func TestParseSingleRetweet(t *testing.T) { + assert := assert.New(t) data, err := ioutil.ReadFile("test_responses/tweet_that_is_a_retweet.json") if err != nil { panic(err) } - var api_tweet scraper.APITweet + var api_tweet APITweet err = json.Unmarshal(data, &api_tweet) - if err != nil { - t.Errorf(err.Error()) - } + require.NoError(t, err) - retweet, err := scraper.ParseSingleRetweet(api_tweet) - if err != nil { - t.Errorf(err.Error()) - } - expected_id := 1404270043018448896 - if retweet.RetweetID != scraper.TweetID(1404270043018448896) { - t.Errorf("Expected %d, got %d", expected_id, retweet.RetweetID) - } - expected_id = 1404269989646028804 - if retweet.TweetID != scraper.TweetID(expected_id) { - t.Errorf("Expected %d, got %d", expected_id, retweet.TweetID) - } - expected_id = 44067298 - if retweet.RetweetedByID != scraper.UserID(expected_id) { - t.Errorf("Expected %d, got %d", expected_id, retweet.RetweetedByID) - } - expected_id = 1623639042 - if retweet.RetweetedAt.Unix() != int64(expected_id) { - t.Errorf("Expected %d, got %d", expected_id, retweet.RetweetedAt.Unix()) - } + retweet, err := ParseSingleRetweet(api_tweet) + require.NoError(t, err) + + assert.Equal(TweetID(1404270043018448896), retweet.RetweetID) + assert.Equal(TweetID(1404269989646028804), retweet.TweetID) + assert.Equal(UserID(44067298), retweet.RetweetedByID) + assert.Equal(int64(1623639042), retweet.RetweetedAt.Unix()) } diff --git a/scraper/tweet_test.go b/scraper/tweet_test.go index 896279f..98bff58 100644 --- a/scraper/tweet_test.go +++ b/scraper/tweet_test.go @@ -5,20 +5,23 @@ import ( "io/ioutil" "testing" - "offline_twitter/scraper" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + . "offline_twitter/scraper" ) -func load_tweet_from_file(filename string) scraper.Tweet{ +func load_tweet_from_file(filename string) Tweet{ data, err := ioutil.ReadFile(filename) if err != nil { panic(err) } - var apitweet scraper.APITweet + var apitweet APITweet err = json.Unmarshal(data, &apitweet) if err != nil { panic(err) } - tweet, err := scraper.ParseSingleTweet(apitweet) + tweet, err := ParseSingleTweet(apitweet) if err != nil { panic(err) } @@ -27,300 +30,160 @@ func load_tweet_from_file(filename string) scraper.Tweet{ func TestParseSingleTweet(t *testing.T) { + assert := assert.New(t) tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_unicode_chars.json") - expected_text := "The fact that @michaelmalice new book ‘The Anarchist Handbook’ is just absolutely destroying on the charts is the largest white pill I’ve swallowed in years." - actual_text := tweet.Text - - if actual_text != expected_text { - t.Errorf("Expected: %q; got %q", expected_text, actual_text) - } - - if len(tweet.Mentions) != 1 || tweet.Mentions[0] != "michaelmalice" { - t.Errorf("Expected %v, got %v", []string{"michaelmalice"}, tweet.Mentions) - } - - if len(tweet.Urls) != 0 { - t.Errorf("Expected %d urls, but got %d", 0, len(tweet.Urls)) - } - - if tweet.PostedAt.Unix() != 1621639105 { - t.Errorf("Expected %d, got %d", 1621639105, tweet.PostedAt.Unix()) - } - - if tweet.QuotedTweetID != 0 { - t.Errorf("Incorrectly believes it quote-tweets tweet with ID %d", tweet.QuotedTweetID) - } - - if len(tweet.Polls) != 0 { - t.Errorf("Should not have any polls") - } + assert.Equal("The fact that @michaelmalice new book ‘The Anarchist Handbook’ is just absolutely destroying on the charts is the largest white pill I’ve swallowed in years.", tweet.Text) + assert.Len(tweet.Mentions, 1) + assert.Contains(tweet.Mentions, UserHandle("michaelmalice")) + assert.Empty(tweet.Urls) + assert.Equal(int64(1621639105), tweet.PostedAt.Unix()) + assert.Zero(tweet.QuotedTweetID) + assert.Empty(tweet.Polls) } func TestParseTweetWithImage(t *testing.T) { + assert := assert.New(t) tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_image.json") - expected_text := "this saddens me every time" - if tweet.Text != expected_text { - t.Errorf("Expected: %q, got: %q", expected_text, tweet.Text) - } - if len(tweet.Images) != 1 { - t.Errorf("Expected 1 images but got %d", len(tweet.Images)) - } + assert.Equal("this saddens me every time", tweet.Text) + assert.Len(tweet.Images, 1) } func TestParseTweetWithQuotedTweetAsLink(t *testing.T) { + assert := assert.New(t) tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_quoted_tweet_as_link2.json") - expected_text := "sometimes they're too dimwitted to even get the wrong title right" - if tweet.Text != expected_text { - t.Errorf("Expected: %q, got: %q", expected_text, tweet.Text) - } - - expected_replied_id := scraper.TweetID(1395882872729477131) - if tweet.InReplyToID != expected_replied_id { - t.Errorf("Expected %q, got %q", expected_replied_id, tweet.InReplyToID) - } - if len(tweet.ReplyMentions) != 0 { - t.Errorf("Wanted %v, got %v", []string{}, tweet.ReplyMentions) - } - - expected_quoted_id := scraper.TweetID(1396194494710788100) - if tweet.QuotedTweetID != expected_quoted_id { - t.Errorf("Should be a quoted tweet with ID %d, but got %d instead", expected_quoted_id, tweet.QuotedTweetID) - } - - if len(tweet.Polls) != 0 { - t.Errorf("Should not have any polls") - } + assert.Equal("sometimes they're too dimwitted to even get the wrong title right", tweet.Text) + assert.Equal(TweetID(1395882872729477131), tweet.InReplyToID) + assert.Equal(TweetID(1396194494710788100), tweet.QuotedTweetID) + assert.Empty(tweet.ReplyMentions) + assert.Empty(tweet.Polls) } func TestParseTweetWithVideo(t *testing.T) { + assert := assert.New(t) tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_video.json") + assert.Empty(tweet.Images) + assert.Len(tweet.Videos, 1) - expected_video := "https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12" - if len(tweet.Videos) != 1 || tweet.Videos[0].RemoteURL != expected_video { - t.Errorf("Expected video URL %q, but got %+v", expected_video, tweet.Videos) - } - if tweet.Videos[0].IsGif != false { - t.Errorf("Expected it to be a regular video, but it was a gif") - } - - if len(tweet.Images) != 0 { - t.Errorf("Should not have any images, but has %d", len(tweet.Images)) - } + v := tweet.Videos[0] + assert.Equal("https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12", v.RemoteURL) + assert.False(v.IsGif) } func TestParseTweetWithGif(t *testing.T) { + assert := assert.New(t) tweet := load_tweet_from_file("test_responses/single_tweets/tweet_that_is_a_reply_with_gif.json") + assert.Len(tweet.Videos, 1) - expected_video := "https://video.twimg.com/tweet_video/E189-VhVoAYcrDv.mp4" - if len(tweet.Videos) != 1 { - t.Errorf("Expected 1 video (a gif), but got %d instead", len(tweet.Videos)) - } - if tweet.Videos[0].RemoteURL != expected_video { - t.Errorf("Expected video URL %q, but got %+v", expected_video, tweet.Videos) - } - if tweet.Videos[0].IsGif != true { - t.Errorf("Expected video to be a gif, but it wasn't") - } + v := tweet.Videos[0] + assert.Equal("https://video.twimg.com/tweet_video/E189-VhVoAYcrDv.mp4", v.RemoteURL) + assert.True(v.IsGif) } func TestParseTweetWithUrl(t *testing.T) { + assert := assert.New(t) tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_url_card.json") + assert.Empty(tweet.Polls) + assert.Len(tweet.Urls, 1) - if len(tweet.Urls) != 1 { - t.Errorf("Expected %d urls, but got %d", 1, len(tweet.Urls)) - } - - expected_url_text := "https://reason.com/2021/08/30/la-teachers-union-cecily-myart-cruz-learning-loss/" - if tweet.Urls[0].Text != expected_url_text { - t.Errorf("Expected Url text to be %q, but got %q", expected_url_text, tweet.Urls[0].Text) - } - if !tweet.Urls[0].HasCard { - t.Errorf("Expected it to have a card, but it doesn't") - } - expected_url_domain := "reason.com" - if tweet.Urls[0].Domain != expected_url_domain { - t.Errorf("Expected Url text to be %q, but got %q", expected_url_domain, tweet.Urls[0].Domain) - } - - if len(tweet.Polls) != 0 { - t.Errorf("Should not have any polls") - } + u := tweet.Urls[0] + assert.Equal("https://reason.com/2021/08/30/la-teachers-union-cecily-myart-cruz-learning-loss/", u.Text) + assert.True(u.HasCard) + assert.Equal("reason.com", u.Domain) } func TestParseTweetWithUrlButNoCard(t *testing.T) { + assert := assert.New(t) tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_url_but_no_card.json") + assert.Len(tweet.Urls, 1) - if len(tweet.Urls) != 1 { - t.Errorf("Expected %d urls, but got %d", 1, len(tweet.Urls)) - } - - expected_url_text := "https://www.politico.com/newsletters/west-wing-playbook/2021/09/16/the-jennifer-rubin-wh-symbiosis-494364" - if tweet.Urls[0].Text != expected_url_text { - t.Errorf("Expected Url text to be %q, but got %q", expected_url_text, tweet.Urls[0].Text) - } - if tweet.Urls[0].HasCard { - t.Errorf("Expected url not to have a card, but it thinks it has one") - } + u := tweet.Urls[0] + assert.Equal("https://www.politico.com/newsletters/west-wing-playbook/2021/09/16/the-jennifer-rubin-wh-symbiosis-494364", u.Text) + assert.False(u.HasCard) } func TestParseTweetWithMultipleUrls(t *testing.T) { + assert := assert.New(t) tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_multiple_urls.json") + assert.Empty(tweet.Polls) + assert.Len(tweet.Urls, 3) - if len(tweet.Urls) != 3 { - t.Errorf("Expected %d urls, got %d instead", 3, len(tweet.Urls)) - } - if tweet.Urls[0].HasCard { - t.Errorf("Expected url not to have a card, but it does: %d", 0) - } - if tweet.Urls[1].HasCard { - t.Errorf("Expected url not to have a card, but it does: %d", 1) - } - if !tweet.Urls[2].HasCard { - t.Errorf("Expected url to have a card, but it doesn't: %d", 2) - } - expected_title := "Biden’s victory came from the suburbs" - if tweet.Urls[2].Title != expected_title { - t.Errorf("Expected title to be %q, but got %q", expected_title, tweet.Urls[2].Title) - } + assert.False(tweet.Urls[0].HasCard) + assert.False(tweet.Urls[1].HasCard) + assert.True (tweet.Urls[2].HasCard) - if len(tweet.Polls) != 0 { - t.Errorf("Should not have any polls") - } + assert.Equal("Biden’s victory came from the suburbs", tweet.Urls[2].Title) } func TestTweetWithLotsOfReplyMentions(t *testing.T) { + assert := assert.New(t) tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_at_mentions_in_front.json") + assert.Len(tweet.ReplyMentions, 4) - if len(tweet.ReplyMentions) != 4 { - t.Errorf("Expected %d reply-mentions, got %d", 4, len(tweet.ReplyMentions)) - } - for i, v := range []scraper.UserHandle{"rob_mose", "primalpoly", "jmasseypoet", "SpaceX"} { - if tweet.ReplyMentions[i] != v { - t.Errorf("Expected %q, got %q at position %d", v, tweet.ReplyMentions[i], i) - } + for i, v := range []UserHandle{"rob_mose", "primalpoly", "jmasseypoet", "SpaceX"} { + assert.Equal(v, tweet.ReplyMentions[i]) } } func TestTweetWithPoll(t *testing.T) { + assert := assert.New(t) tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_poll_4_choices.json") + assert.Len(tweet.Polls, 1) - if len(tweet.Polls) != 1 { - t.Fatalf("Expected there to be 1 poll, but there was %d", len(tweet.Polls)) - } p := tweet.Polls[0] - - if p.TweetID != tweet.ID { - t.Errorf("Poll's TweetID (%d) should match the tweet's ID (%d)", p.TweetID, tweet.ID) - } - if p.NumChoices != 4 { - t.Errorf("Expected %d choices, got %d instead", 4, p.NumChoices) - } - expected_choice1 := "Tribal armband" - if p.Choice1 != expected_choice1 { - t.Errorf("Expected choice1 %q, got %q", expected_choice1, p.Choice1) - } - expected_choice2 := "Marijuana leaf" - if p.Choice2 != expected_choice2 { - t.Errorf("Expected choice2 %q, got %q", expected_choice2, p.Choice2) - } - expected_choice3 := "Butterfly" - if p.Choice3 != expected_choice3 { - t.Errorf("Expected choice3 %q, got %q", expected_choice3, p.Choice3) - } - expected_choice4 := "Maple leaf" - if p.Choice4 != expected_choice4 { - t.Errorf("Expected choice4 %q, got %q", expected_choice4, p.Choice4) - } - - expected_votes1 := 1593 - expected_votes2 := 624 - expected_votes3 := 778 - expected_votes4 := 1138 - if p.Choice1_Votes != expected_votes1 { - t.Errorf("Expected Choice1_Votes %d, got %d", expected_votes1, p.Choice1_Votes) - } - if p.Choice2_Votes != expected_votes2 { - t.Errorf("Expected Choice2_Votes %d, got %d", expected_votes2, p.Choice2_Votes) - } - if p.Choice3_Votes != expected_votes3 { - t.Errorf("Expected Choice3_Votes %d, got %d", expected_votes3, p.Choice3_Votes) - } - if p.Choice4_Votes != expected_votes4 { - t.Errorf("Expected Choice4_Votes %d, got %d", expected_votes4, p.Choice4_Votes) - } - - expected_duration := 1440 * 60 - if p.VotingDuration != expected_duration { - t.Errorf("Expected voting duration %d seconds, got %d", expected_duration, p.VotingDuration) - } - expected_ends_at := int64(1638331934) - if p.VotingEndsAt.Unix() != expected_ends_at { - t.Errorf("Expected voting ends at %d (unix), got %d", expected_ends_at, p.VotingEndsAt.Unix()) - } - expected_last_updated_at := int64(1638331935) - if p.LastUpdatedAt.Unix() != expected_last_updated_at { - t.Errorf("Expected updated %d, got %d", expected_last_updated_at, p.LastUpdatedAt.Unix()) - } + assert.Equal(tweet.ID, p.TweetID) + assert.Equal(4, p.NumChoices) + assert.Equal("Tribal armband", p.Choice1) + assert.Equal("Marijuana leaf", p.Choice2) + assert.Equal("Butterfly", p.Choice3) + assert.Equal("Maple leaf", p.Choice4) + assert.Equal(1593, p.Choice1_Votes) + assert.Equal(624, p.Choice2_Votes) + assert.Equal(778, p.Choice3_Votes) + assert.Equal(1138, p.Choice4_Votes) + assert.Equal(1440 * 60, p.VotingDuration) + assert.Equal(int64(1638331934), p.VotingEndsAt.Unix()) + assert.Equal(int64(1638331935), p.LastUpdatedAt.Unix()) } func TestParseTweetResponse(t *testing.T) { + assert := assert.New(t) data, err := ioutil.ReadFile("test_responses/michael_malice_feed.json") if err != nil { panic(err) } - var tweet_resp scraper.TweetResponse + var tweet_resp TweetResponse err = json.Unmarshal(data, &tweet_resp) - if err != nil { - t.Errorf(err.Error()) - } + require.NoError(t, err) - tweets, retweets, users, err := scraper.ParseTweetResponse(tweet_resp) - if err != nil { - t.Errorf(err.Error()) - } + tweets, retweets, users, err := ParseTweetResponse(tweet_resp) + require.NoError(t, err) - if len(tweets) != 29 - 3 { - t.Errorf("Expected %d tweets, got %d", 29-3, len(tweets)) - } - if len(retweets) != 3 { - t.Errorf("Expected %d retweets, got %d", 3, len(retweets)) - } - if len(users) != 9 { - t.Errorf("Expected %d users, got %d", 9, len(users)) - } + assert.Len(tweets, 29 - 3) + assert.Len(retweets, 3) + assert.Len(users, 9) } func TestParseTweetResponseWithTombstones(t *testing.T) { + assert := assert.New(t) data, err := ioutil.ReadFile("test_responses/tombstones/tombstone_deleted.json") if err != nil { panic(err) } - var tweet_resp scraper.TweetResponse + var tweet_resp TweetResponse err = json.Unmarshal(data, &tweet_resp) - if err != nil { - t.Errorf(err.Error()) - } + require.NoError(t, err) + extra_users := tweet_resp.HandleTombstones() - if len(extra_users) != 1 { - t.Errorf("Expected to need 1 extra user but got %d instead", len(extra_users)) - } + assert.Len(extra_users, 1) - tweets, retweets, users, err := scraper.ParseTweetResponse(tweet_resp) - if err != nil { - t.Fatal(err) - } + tweets, retweets, users, err := ParseTweetResponse(tweet_resp) + require.NoError(t, err) - if len(tweets) != 2 { - t.Errorf("Expected %d tweets, got %d", 2, len(tweets)) - } - if len(retweets) != 0 { - t.Errorf("Expected %d retweets, got %d", 0, len(retweets)) - } - if len(users) != 1 { - t.Errorf("Expected %d users, got %d", 1, len(users)) - } + assert.Len(tweets, 2) + assert.Len(retweets, 0) + assert.Len(users, 1) } diff --git a/scraper/url_test.go b/scraper/url_test.go index 48971c4..18f3f48 100644 --- a/scraper/url_test.go +++ b/scraper/url_test.go @@ -5,182 +5,92 @@ import ( "io/ioutil" "encoding/json" - "offline_twitter/scraper" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + . "offline_twitter/scraper" ) func TestParseAPIUrlCard(t *testing.T) { + assert := assert.New(t) data, err := ioutil.ReadFile("test_responses/tweet_content/url_card.json") if err != nil { panic(err) } - var apiCard scraper.APICard + var apiCard APICard err = json.Unmarshal(data, &apiCard) - if err != nil { - t.Fatal(err.Error()) - } - url := scraper.ParseAPIUrlCard(apiCard) + require.NoError(t, err) - expected_domain := "reason.com" - if url.Domain != expected_domain { - t.Errorf("Expected %q, got %q", expected_domain, url.Domain) - } - expected_title := "L.A. Teachers Union Leader: 'There's No Such Thing As Learning Loss'" - if url.Title != expected_title { - t.Errorf("Expected %q, got %q", expected_title, url.Title) - } - expected_description := "\"It’s OK that our babies may not have learned all their times tables,\" says Cecily Myart-Cruz. \"They learned resilience.\"" - if url.Description != expected_description { - t.Errorf("Expected %q, got %q", expected_description, url.Description) - } - expected_width := 600 - if url.ThumbnailWidth != expected_width { - t.Errorf("Expected thumbnail width %d, got %d", expected_width, url.ThumbnailWidth) - } - expected_height := 315 - if url.ThumbnailHeight != expected_height { - t.Errorf("Expected thumbnail height %d, got %d", expected_height, url.ThumbnailHeight) - } - expected_remote_url := "https://pbs.twimg.com/card_img/1434998862305968129/odDi9EqO?format=jpg&name=600x600" - if url.ThumbnailRemoteUrl != expected_remote_url { - t.Errorf("Expected %q, got %q", expected_remote_url, url.ThumbnailRemoteUrl) - } - expected_local_filename := "odDi9EqO_600x600.jpg" - if url.ThumbnailLocalPath != expected_local_filename { - t.Errorf("Expected %q, got %q", expected_local_filename, url.ThumbnailLocalPath) - } - expected_creator_id := scraper.UserID(155581583) - if url.CreatorID != expected_creator_id { - t.Errorf("Expected %d, got %d", expected_creator_id, url.CreatorID) - } - expected_site_id := scraper.UserID(16467567) - if url.SiteID != expected_site_id { - t.Errorf("Expected %d, got %d", expected_site_id, url.SiteID) - } - if !url.HasThumbnail { - t.Errorf("Should have a thumbnail, but it doesn't") - } - if url.IsContentDownloaded { - t.Errorf("Expected it not to be downloaded, but it was") - } + url := ParseAPIUrlCard(apiCard) + assert.Equal("reason.com", url.Domain) + assert.Equal("L.A. Teachers Union Leader: 'There's No Such Thing As Learning Loss'", url.Title) + assert.Equal("\"It’s OK that our babies may not have learned all their times tables,\" says Cecily Myart-Cruz. \"They learned resilience.\"", url.Description) + assert.Equal(600, url.ThumbnailWidth) + assert.Equal(315, url.ThumbnailHeight) + assert.Equal("https://pbs.twimg.com/card_img/1434998862305968129/odDi9EqO?format=jpg&name=600x600", url.ThumbnailRemoteUrl) + assert.Equal("odDi9EqO_600x600.jpg", url.ThumbnailLocalPath) + assert.Equal(UserID(155581583), url.CreatorID) + assert.Equal(UserID(16467567), url.SiteID) + assert.True(url.HasThumbnail) + assert.False(url.IsContentDownloaded) } func TestParseAPIUrlCardWithPlayer(t *testing.T) { + assert := assert.New(t) data, err := ioutil.ReadFile("test_responses/tweet_content/url_card_with_player.json") if err != nil { panic(err) } - var apiCard scraper.APICard + var apiCard APICard err = json.Unmarshal(data, &apiCard) - if err != nil { - t.Fatal(err.Error()) - } - url := scraper.ParseAPIUrlCard(apiCard) + require.NoError(t, err) - expected_domain := "www.youtube.com" - if url.Domain != expected_domain { - t.Errorf("Expected %q, got %q", expected_domain, url.Domain) - } - expected_title := "The Politically Incorrect Guide to the Constitution (Starring Tom..." - if url.Title != expected_title { - t.Errorf("Expected %q, got %q", expected_title, url.Title) - } - expected_description := "Watch this episode on LBRY/Odysee: https://odysee.com/@capitalresearch:5/the-politically-incorrect-guide-to-the:8Watch this episode on Rumble: https://rumble..." - if url.Description != expected_description { - t.Errorf("Expected %q, got %q", expected_description, url.Description) - } - expected_remote_url := "https://pbs.twimg.com/card_img/1437849456423194639/_1t0btyt?format=jpg&name=800x320_1" - if url.ThumbnailRemoteUrl != expected_remote_url { - t.Errorf("Expected %q, got %q", expected_remote_url, url.ThumbnailRemoteUrl) - } - expected_local_filename := "_1t0btyt_800x320_1.jpg" - if url.ThumbnailLocalPath != expected_local_filename { - t.Errorf("Expected %q, got %q", expected_local_filename, url.ThumbnailLocalPath) - } - expected_site_id := scraper.UserID(10228272) - if url.SiteID != expected_site_id { - t.Errorf("Expected %d, got %d", expected_site_id, url.SiteID) - } - if !url.HasThumbnail { - t.Errorf("Should have a thumbnail, but it doesn't") - } - if url.IsContentDownloaded { - t.Errorf("Expected it not to be downloaded, but it was") - } + url := ParseAPIUrlCard(apiCard) + assert.Equal("www.youtube.com", url.Domain) + assert.Equal("The Politically Incorrect Guide to the Constitution (Starring Tom...", url.Title) + assert.Equal("Watch this episode on LBRY/Odysee: https://odysee.com/@capitalresearch:5/the-politically-incorrect-guide-to-the:8Watch this episode on Rumble: https://rumble...", url.Description) + assert.Equal("https://pbs.twimg.com/card_img/1437849456423194639/_1t0btyt?format=jpg&name=800x320_1", url.ThumbnailRemoteUrl) + assert.Equal("_1t0btyt_800x320_1.jpg", url.ThumbnailLocalPath) + assert.Equal(UserID(10228272), url.SiteID) + assert.True(url.HasThumbnail) + assert.False(url.IsContentDownloaded) } func TestParseAPIUrlCardWithPlayerAndPlaceholderThumbnail(t *testing.T) { + assert := assert.New(t) data, err := ioutil.ReadFile("test_responses/tweet_content/url_card_with_player_placeholder_image.json") if err != nil { panic(err) } - var apiCard scraper.APICard + var apiCard APICard err = json.Unmarshal(data, &apiCard) - if err != nil { - t.Fatal(err.Error()) - } - url := scraper.ParseAPIUrlCard(apiCard) + require.NoError(t, err) - expected_domain := "www.youtube.com" - if url.Domain != expected_domain { - t.Errorf("Expected %q, got %q", expected_domain, url.Domain) - } - expected_title := "Did Michael Malice Turn Me into an Anarchist? | Ep 181" - if url.Title != expected_title { - t.Errorf("Expected %q, got %q", expected_title, url.Title) - } - expected_description := "SUBSCRIBE TO THE NEW SHOW W/ ELIJAH & SYDNEY: \"YOU ARE HERE\"YT: https://www.youtube.com/youareheredaily______________________________________________________..." - if url.Description != expected_description { - t.Errorf("Expected %q, got %q", expected_description, url.Description) - } - expected_remote_url := "https://pbs.twimg.com/cards/player-placeholder.png" - if url.ThumbnailRemoteUrl != expected_remote_url { - t.Errorf("Expected %q, got %q", expected_remote_url, url.ThumbnailRemoteUrl) - } - expected_local_filename := "player-placeholder.png" - if url.ThumbnailLocalPath != expected_local_filename { - t.Errorf("Expected %q, got %q", expected_local_filename, url.ThumbnailLocalPath) - } - expected_site_id := scraper.UserID(10228272) - if url.SiteID != expected_site_id { - t.Errorf("Expected %d, got %d", expected_site_id, url.SiteID) - } - if !url.HasThumbnail { - t.Errorf("Should have a thumbnail, but it doesn't") - } - if url.IsContentDownloaded { - t.Errorf("Expected it not to be downloaded, but it was") - } + url := ParseAPIUrlCard(apiCard) + assert.Equal("www.youtube.com", url.Domain) + assert.Equal("Did Michael Malice Turn Me into an Anarchist? | Ep 181", url.Title) + assert.Equal("SUBSCRIBE TO THE NEW SHOW W/ ELIJAH & SYDNEY: \"YOU ARE HERE\"YT: https://www.youtube.com/youareheredaily______________________________________________________...", url.Description) + assert.Equal("https://pbs.twimg.com/cards/player-placeholder.png", url.ThumbnailRemoteUrl) + assert.Equal("player-placeholder.png", url.ThumbnailLocalPath) + assert.Equal(UserID(10228272), url.SiteID) + assert.True(url.HasThumbnail) + assert.False(url.IsContentDownloaded) } func TestParseAPIUrlCardWithoutThumbnail(t *testing.T) { + assert := assert.New(t) data, err := ioutil.ReadFile("test_responses/tweet_content/url_card_without_thumbnail.json") if err != nil { panic(err) } - var apiCard scraper.APICard + var apiCard APICard err = json.Unmarshal(data, &apiCard) - if err != nil { - t.Fatal(err.Error()) - } - url := scraper.ParseAPIUrlCard(apiCard) + require.NoError(t, err) - expected_domain := "en.m.wikipedia.org" - if url.Domain != expected_domain { - t.Errorf("Expected %q, got %q", expected_domain, url.Domain) - } - expected_title := "Entryism - Wikipedia" - if url.Title != expected_title { - t.Errorf("Expected %q, got %q", expected_title, url.Title) - } - expected_description := "" - if url.Description != expected_description { - t.Errorf("Expected %q, got %q", expected_description, url.Description) - } - - if !url.HasCard { - t.Errorf("Expected it to have a card, but it didn't") - } - if url.HasThumbnail { - t.Errorf("Should have no thumbnail, but it does") - } + url := ParseAPIUrlCard(apiCard) + assert.Equal("en.m.wikipedia.org", url.Domain) + assert.Equal("Entryism - Wikipedia", url.Title) + assert.Equal("", url.Description) + assert.True(url.HasCard) + assert.False(url.HasThumbnail) } diff --git a/scraper/user_test.go b/scraper/user_test.go index f18a62d..623f146 100644 --- a/scraper/user_test.go +++ b/scraper/user_test.go @@ -7,162 +7,96 @@ import ( "net/http" "github.com/jarcoal/httpmock" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" - "offline_twitter/scraper" + . "offline_twitter/scraper" ) func TestParseSingleUser(t *testing.T) { + assert := assert.New(t) data, err := ioutil.ReadFile("test_responses/michael_malice_user_profile.json") if err != nil { panic(err) } - var user_resp scraper.UserResponse + var user_resp UserResponse err = json.Unmarshal(data, &user_resp) - if err != nil { - t.Errorf(err.Error()) - } + require.NoError(t, err) + apiUser := user_resp.ConvertToAPIUser() - user, err := scraper.ParseSingleUser(apiUser) - if err != nil { - t.Errorf(err.Error()) - } + user, err := ParseSingleUser(apiUser) + require.NoError(t, err) - expected_id := 44067298 - if user.ID != scraper.UserID(expected_id) { - t.Errorf("Expected %q, got %q", expected_id, user.ID) - } - if user.DisplayName != "Michael Malice" { - t.Errorf("Expected %q, got %q", "Michael Malice", user.DisplayName) - } - if user.Handle != "michaelmalice" { - t.Errorf("Expected %q, got %q", "michaelmalice", user.Handle) - } - expectedBio := "Author of Dear Reader, The New Right & The Anarchist Handbook\nHost of \"YOUR WELCOME\" \nSubject of Ego & Hubris by Harvey Pekar\nUnderwear Model\nHe/Him ⚑" - if user.Bio != expectedBio { - t.Errorf("Expected %q, got %q", expectedBio, user.Bio) - } - if user.FollowingCount != 941 { - t.Errorf("Expected %d, got %d", 941, user.FollowingCount) - } - if user.FollowersCount != 208589 { - t.Errorf("Expected %d, got %d", 941, user.FollowersCount) - } - if user.Location != "Brooklyn" { - t.Errorf("Expected %q, got %q", "Brooklyn", user.Location) - } - if user.Website != "https://amzn.to/3oInafv" { - t.Errorf("Expected %q, got %q", "https://amzn.to/3oInafv", user.Website) - } - if user.JoinDate.Unix() != 1243920952 { - t.Errorf("Expected %d, got %d", 1243920952, user.JoinDate.Unix()) - } - if user.IsPrivate != false { - t.Errorf("Expected %v, got %v", false, user.IsPrivate) - } - if user.IsVerified != true { - t.Errorf("Expected %v, got %v", true, user.IsPrivate) - } - if user.IsBanned != false { - t.Errorf("User should not be banned") - } - expectedProfileImage := "https://pbs.twimg.com/profile_images/1064051934812913664/Lbwdb_C9.jpg" - if user.ProfileImageUrl != expectedProfileImage { - t.Errorf("Expected %q, got %q", expectedProfileImage, user.ProfileImageUrl) - } - expected_tiny_profile_image := "https://pbs.twimg.com/profile_images/1064051934812913664/Lbwdb_C9_normal.jpg" - if user.GetTinyProfileImageUrl() != expected_tiny_profile_image { - t.Errorf("Expected %q, got %q", expected_tiny_profile_image, user.GetTinyProfileImageUrl()) - } - expectedBannerImage := "https://pbs.twimg.com/profile_banners/44067298/1615134676" - if user.BannerImageUrl != expectedBannerImage { - t.Errorf("Expected %q, got %q", expectedBannerImage, user.BannerImageUrl) - } - expected_profile_image_local := "michaelmalice_profile_Lbwdb_C9.jpg" - if user.ProfileImageLocalPath != expected_profile_image_local { - t.Errorf("Expected %q, got %q", expected_profile_image_local, user.ProfileImageLocalPath) - } - expected_banner_image_local := "michaelmalice_banner_1615134676.jpg" - if user.BannerImageLocalPath != expected_banner_image_local { - t.Errorf("Expected %q, got %q", expected_banner_image_local, user.BannerImageLocalPath) - } - expected_id = 1403835414373339136 - if user.PinnedTweetID != scraper.TweetID(expected_id) { - t.Errorf("Expected %q, got %q", expected_id, user.PinnedTweet) - } + assert.Equal(UserID(44067298), user.ID) + assert.Equal("Michael Malice", user.DisplayName) + assert.Equal(UserHandle("michaelmalice"), user.Handle) + assert.Equal("Author of Dear Reader, The New Right & The Anarchist Handbook\nHost of \"YOUR WELCOME\" \nSubject of Ego & Hubris by Harvey Pekar\nUnderwear Model\nHe/Him ⚑", user.Bio) + assert.Equal(941, user.FollowingCount) + assert.Equal(208589, user.FollowersCount) + assert.Equal("Brooklyn", user.Location) + assert.Equal("https://amzn.to/3oInafv", user.Website) + assert.Equal(int64(1243920952), user.JoinDate.Unix()) + assert.False(user.IsPrivate) + assert.True (user.IsVerified) + assert.False(user.IsBanned) + assert.Equal("https://pbs.twimg.com/profile_images/1064051934812913664/Lbwdb_C9.jpg", user.ProfileImageUrl) + assert.Equal("https://pbs.twimg.com/profile_images/1064051934812913664/Lbwdb_C9_normal.jpg", user.GetTinyProfileImageUrl()) + assert.Equal("https://pbs.twimg.com/profile_banners/44067298/1615134676", user.BannerImageUrl) + assert.Equal("michaelmalice_profile_Lbwdb_C9.jpg", user.ProfileImageLocalPath) + assert.Equal("michaelmalice_banner_1615134676.jpg", user.BannerImageLocalPath) + assert.Equal(TweetID(1403835414373339136), user.PinnedTweetID) } /** * Should correctly parse a banned user */ func TestParseBannedUser(t *testing.T) { + assert := assert.New(t) data, err := ioutil.ReadFile("test_responses/suspended_user.json") if err != nil { panic(err) } - var user_resp scraper.UserResponse + var user_resp UserResponse err = json.Unmarshal(data, &user_resp) - if err != nil { - t.Fatalf(err.Error()) - } + require.NoError(t, err) + apiUser := user_resp.ConvertToAPIUser() - user, err := scraper.ParseSingleUser(apiUser) - if err != nil { - t.Fatalf(err.Error()) - } - - if user.ID != 193918550 { - t.Errorf("Expected id %d, got %d", 193918550, user.ID) - } - if user.IsBanned != true { - t.Errorf("Expected user to be banned") - } + user, err := ParseSingleUser(apiUser) + require.NoError(t, err) + assert.Equal(UserID(193918550), user.ID) + assert.True(user.IsBanned) // Test generation of profile images for banned user - if user.GetTinyProfileImageUrl() != "https://abs.twimg.com/sticky/default_profile_images/default_profile.png" { - t.Errorf("Incorrect tiny profile image URL for banned user: %q", user.GetTinyProfileImageUrl()) - } - if user.GetTinyProfileImageLocalPath() != "default_profile.png" { - t.Errorf("Incorrect tiny profile image local path for banned user: %q", user.GetTinyProfileImageLocalPath()) - } + assert.Equal("https://abs.twimg.com/sticky/default_profile_images/default_profile.png", user.GetTinyProfileImageUrl()) + assert.Equal("default_profile.png", user.GetTinyProfileImageLocalPath()) } /** * Should extract a user handle from a tweet URL, or fail if URL is invalid */ func TestParseHandleFromTweetUrl(t *testing.T) { + assert := assert.New(t) + // Test valid tweet url url := "https://twitter.com/kanesays23/status/1429583672827465730" - expected_user := scraper.UserHandle("kanesays23") - result, err := scraper.ParseHandleFromTweetUrl(url) - if err != nil { - t.Errorf("Unexpected error while parsing handle: %s", err) - } - if result != expected_user { - t.Errorf("Expected handle %q, got %q", expected_user, result) - } + result, err := ParseHandleFromTweetUrl(url) + assert.NoError(err) + assert.Equal(UserHandle("kanesays23"), result) // Test url with GET params - result, err = scraper.ParseHandleFromTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720?s=20") - expected_user = scraper.UserHandle("NerdNoticing") - if err != nil { - t.Errorf("Unexpected error while parsing handle: %s", err) - } - if result != expected_user { - t.Errorf("Expected handle %q, got %q", expected_user, result) - } + result, err = ParseHandleFromTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720?s=20") + assert.NoError(err) + assert.Equal(UserHandle("NerdNoticing"), result) + // Test invalid url - _, err = scraper.ParseHandleFromTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720s=20") - if err == nil { - t.Errorf("Should have produced an error for invalid URL") - } + _, err = ParseHandleFromTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720s=20") + assert.Error(err) // Test empty string - _, err = scraper.ParseHandleFromTweetUrl("") - if err == nil { - t.Errorf("Should have produced an error for invalid URL") - } + _, err = ParseHandleFromTweetUrl("") + assert.Error(err) } @@ -170,6 +104,8 @@ func TestParseHandleFromTweetUrl(t *testing.T) { * Should extract a user handle from a shortened tweet URL */ func TestParseHandleFromShortenedTweetUrl(t *testing.T) { + assert := assert.New(t) + short_url := "https://t.co/rZVrNGJyDe" expanded_url := "https://twitter.com/MarkSnyderJr1/status/1460857606147350529" @@ -183,16 +119,9 @@ func TestParseHandleFromShortenedTweetUrl(t *testing.T) { }) // Check the httmock interceptor is working correctly - if scraper.ExpandShortUrl(short_url) != expanded_url { - t.Fatalf("httpmock didn't intercept the request") - } + require.Equal(t, expanded_url, ExpandShortUrl(short_url), "httpmock didn't intercept the request") - result, err := scraper.ParseHandleFromTweetUrl(short_url) - if err != nil { - t.Errorf(err.Error()) - } - expected_user := scraper.UserHandle("MarkSnyderJr1") - if result != expected_user { - t.Errorf("Expected user %q, got %q", expected_user, result) - } + result, err := ParseHandleFromTweetUrl(short_url) + require.NoError(t, err) + assert.Equal(UserHandle("MarkSnyderJr1"), result) } diff --git a/scraper/video_test.go b/scraper/video_test.go index dc261a6..bb20919 100644 --- a/scraper/video_test.go +++ b/scraper/video_test.go @@ -5,65 +5,33 @@ import ( "io/ioutil" "encoding/json" - "offline_twitter/scraper" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + . "offline_twitter/scraper" ) func TestParseAPIVideo(t *testing.T) { + assert := assert.New(t) data, err := ioutil.ReadFile("test_responses/tweet_content/video.json") if err != nil { panic(err) } - var apivideo scraper.APIExtendedMedia + var apivideo APIExtendedMedia err = json.Unmarshal(data, &apivideo) - if err != nil { - t.Fatal(err.Error()) - } - tweet_id := scraper.TweetID(28) - video := scraper.ParseAPIVideo(apivideo, tweet_id) + require.NoError(t, err) - expected_id := 1418951950020845568 - if video.ID != scraper.VideoID(expected_id) { - t.Errorf("Expected ID of %d, got %d", expected_id, video.ID) - } - if video.TweetID != tweet_id { - t.Errorf("Expected ID of %d, got %d", tweet_id, video.TweetID) - } - - expected_height := 1280 - if video.Height != expected_height { - t.Errorf("Expected height %d, got %d", expected_height, video.Height) - } - expected_width := 720 - if video.Width != expected_width { - t.Errorf("Expected width %d, got %d", expected_width, video.Width) - } - expected_remote_url := "https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12" - if video.RemoteURL != expected_remote_url { - t.Errorf("Expected %q, got %q", expected_remote_url, video.RemoteURL) - } - expected_local_filename := "28.mp4" - if video.LocalFilename != expected_local_filename { - t.Errorf("Expected %q, got %q", expected_local_filename, video.LocalFilename) - } - expected_thumbnail_url := "https://pbs.twimg.com/ext_tw_video_thumb/1418951950020845568/pu/img/eUTaYYfuAJ8FyjUi.jpg" - if video.ThumbnailRemoteUrl != expected_thumbnail_url { - t.Errorf("Expected %q, got %q", expected_thumbnail_url, video.ThumbnailRemoteUrl) - } - expected_thumbnail_filename := "eUTaYYfuAJ8FyjUi.jpg" - if video.ThumbnailLocalPath != expected_thumbnail_filename { - t.Errorf("Expected %q, got %q", expected_thumbnail_filename, video.ThumbnailLocalPath) - } - expected_view_count := 275952 - if video.ViewCount != expected_view_count { - t.Errorf("Expected view count %d, got %d", expected_view_count, video.ViewCount) - } - expected_duration := 88300 - if video.Duration != expected_duration { - t.Errorf("Expected duration %d, got %d", expected_duration, video.Duration) - } - - - if video.IsDownloaded { - t.Errorf("Expected it not to be downloaded, but it was") - } + tweet_id := TweetID(28) + video := ParseAPIVideo(apivideo, tweet_id) + assert.Equal(VideoID(1418951950020845568), video.ID) + assert.Equal(tweet_id, video.TweetID) + assert.Equal(1280, video.Height) + assert.Equal(720, video.Width) + assert.Equal("https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12", video.RemoteURL) + assert.Equal("28.mp4", video.LocalFilename) + assert.Equal("https://pbs.twimg.com/ext_tw_video_thumb/1418951950020845568/pu/img/eUTaYYfuAJ8FyjUi.jpg", video.ThumbnailRemoteUrl) + assert.Equal("eUTaYYfuAJ8FyjUi.jpg", video.ThumbnailLocalPath) + assert.Equal(275952, video.ViewCount) + assert.Equal(88300, video.Duration) + assert.False(video.IsDownloaded) }