Refactor scraper tests

- use Stretchr assert package for assertions
- use "dot imports" to remove "scraper.XYZ" from everything
This commit is contained in:
Alessio 2022-01-31 19:14:14 -08:00
parent babbdecf6c
commit 069ab83fba
11 changed files with 362 additions and 814 deletions

View File

@ -5,17 +5,21 @@ import (
"io/ioutil" "io/ioutil"
"encoding/json" "encoding/json"
"offline_twitter/scraper" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
. "offline_twitter/scraper"
) )
func TestNormalizeContent(t *testing.T) { func TestNormalizeContent(t *testing.T) {
assert := assert.New(t)
test_cases := []struct { test_cases := []struct {
filename string filename string
eventual_full_text string eventual_full_text string
quoted_status_id scraper.TweetID quoted_status_id TweetID
in_reply_to_id scraper.TweetID in_reply_to_id TweetID
retweeted_status_id scraper.TweetID retweeted_status_id TweetID
reply_mentions string reply_mentions string
} { } {
{"test_responses/single_tweets/tweet_that_is_a_reply_with_gif.json", "", 0, 1395882872729477131, 0, "@michaelmalice"}, {"test_responses/single_tweets/tweet_that_is_a_reply_with_gif.json", "", 0, 1395882872729477131, 0, "@michaelmalice"},
@ -35,77 +39,53 @@ func TestNormalizeContent(t *testing.T) {
if err != nil { if err != nil {
panic(err) panic(err)
} }
var tweet scraper.APITweet var tweet APITweet
err = json.Unmarshal(data, &tweet) err = json.Unmarshal(data, &tweet)
if err != nil { assert.NoError(err, "Failed at " + v.filename)
println("Failed at " + v.filename)
t.Errorf(err.Error())
}
tweet.NormalizeContent() tweet.NormalizeContent()
if tweet.FullText != v.eventual_full_text { assert.Equal(v.eventual_full_text, tweet.FullText, "Tweet text")
t.Errorf("Expected %q, got %q", v.eventual_full_text, tweet.FullText) assert.Equal(int64(v.quoted_status_id), tweet.QuotedStatusID, "Quoted status ID")
} assert.Equal(int64(v.in_reply_to_id), tweet.InReplyToStatusID, "In reply to ID")
if scraper.TweetID(tweet.QuotedStatusID) != v.quoted_status_id { assert.Equal(int64(v.retweeted_status_id), tweet.RetweetedStatusID, "Retweeted status ID")
t.Errorf("Expected quoted status %d, but got %d", v.quoted_status_id, tweet.QuotedStatusID) assert.Equal(v.reply_mentions, tweet.Entities.ReplyMentions, "Reply mentions")
}
if scraper.TweetID(tweet.InReplyToStatusID) != v.in_reply_to_id {
t.Errorf("Expected in_reply_to_id id %d, but got %d", v.in_reply_to_id, tweet.InReplyToStatusID)
}
if scraper.TweetID(tweet.RetweetedStatusID) != v.retweeted_status_id {
t.Errorf("Expected retweeted status id %d, but got %d", v.retweeted_status_id, tweet.RetweetedStatusID)
}
if tweet.Entities.ReplyMentions != v.reply_mentions {
t.Errorf("Expected @reply mentions to be %q, but it was %q", v.reply_mentions, tweet.Entities.ReplyMentions)
}
} }
} }
func TestUserProfileToAPIUser(t *testing.T) { func TestUserProfileToAPIUser(t *testing.T) {
assert := assert.New(t)
data, err := ioutil.ReadFile("test_responses/michael_malice_user_profile.json") data, err := ioutil.ReadFile("test_responses/michael_malice_user_profile.json")
if err != nil { if err != nil {
panic(err) panic(err)
} }
var user_resp scraper.UserResponse var user_resp UserResponse
err = json.Unmarshal(data, &user_resp) err = json.Unmarshal(data, &user_resp)
if err != nil { assert.NoError(err)
t.Errorf(err.Error())
}
result := user_resp.ConvertToAPIUser() result := user_resp.ConvertToAPIUser()
assert.Equal(int64(44067298), result.ID)
if result.ID != 44067298 { assert.Equal(user_resp.Data.User.Legacy.FollowersCount, result.FollowersCount)
t.Errorf("Expected ID %q, got %q", 44067298, result.ID)
}
if result.FollowersCount != user_resp.Data.User.Legacy.FollowersCount {
t.Errorf("Expected user count %d, got %d", user_resp.Data.User.Legacy.FollowersCount, result.FollowersCount)
}
} }
func TestGetCursor(t *testing.T) { func TestGetCursor(t *testing.T) {
assert := assert.New(t)
data, err := ioutil.ReadFile("test_responses/midriffs_anarchist_cookbook.json") data, err := ioutil.ReadFile("test_responses/midriffs_anarchist_cookbook.json")
if err != nil { if err != nil {
panic(err) panic(err)
} }
var tweet_resp scraper.TweetResponse var tweet_resp TweetResponse
err = json.Unmarshal(data, &tweet_resp) err = json.Unmarshal(data, &tweet_resp)
if err != nil { assert.NoError(err)
t.Errorf(err.Error())
}
expected_cursor := "LBmGhsC+ibH1peAmgICjpbS0m98mgICj7a2lmd8mhsC4rbmsmN8mgMCqkbT1p+AmgsC4ucv4o+AmhoCyrf+nlt8mhMC9qfOwlt8mJQISAAA=" assert.Equal("LBmGhsC+ibH1peAmgICjpbS0m98mgICj7a2lmd8mhsC4rbmsmN8mgMCqkbT1p+AmgsC4ucv4o+AmhoCyrf+nlt8mhMC9qfOwlt8mJQISAAA=", tweet_resp.GetCursor())
actual_cursor := tweet_resp.GetCursor()
if expected_cursor != actual_cursor {
t.Errorf("Expected %q, got %q", expected_cursor, actual_cursor)
}
} }
func TestIsEndOfFeed(t *testing.T) { func TestIsEndOfFeed(t *testing.T) {
assert := assert.New(t)
test_cases := []struct { test_cases := []struct {
filename string filename string
is_end_of_feed bool is_end_of_feed bool
@ -118,130 +98,86 @@ func TestIsEndOfFeed(t *testing.T) {
if err != nil { if err != nil {
panic(err) panic(err)
} }
var tweet_resp scraper.TweetResponse var tweet_resp TweetResponse
err = json.Unmarshal(data, &tweet_resp) err = json.Unmarshal(data, &tweet_resp)
if err != nil { assert.NoError(err)
t.Fatalf(err.Error()) assert.Equal(v.is_end_of_feed, tweet_resp.IsEndOfFeed())
}
result := tweet_resp.IsEndOfFeed()
if v.is_end_of_feed != result {
t.Errorf("Expected IsEndOfFeed to be %v, but got %v", v.is_end_of_feed, result)
}
} }
} }
func TestHandleTombstonesHidden(t *testing.T) { func TestHandleTombstonesHidden(t *testing.T) {
assert := assert.New(t)
data, err := ioutil.ReadFile("test_responses/tombstones/tombstone_hidden_1.json") data, err := ioutil.ReadFile("test_responses/tombstones/tombstone_hidden_1.json")
if err != nil { if err != nil {
panic(err) panic(err)
} }
var tweet_resp scraper.TweetResponse var tweet_resp TweetResponse
err = json.Unmarshal(data, &tweet_resp) err = json.Unmarshal(data, &tweet_resp)
if err != nil { require.NoError(t, err)
t.Fatalf(err.Error()) assert.Equal(2, len(tweet_resp.GlobalObjects.Tweets), "Before tombstone handling")
}
if len(tweet_resp.GlobalObjects.Tweets) != 2 {
t.Fatalf("Should have started with %d tweets, but had %d instead", 2, len(tweet_resp.GlobalObjects.Tweets))
}
tweet_resp.HandleTombstones() tweet_resp.HandleTombstones()
if len(tweet_resp.GlobalObjects.Tweets) != 4 {
t.Errorf("Should have ended up with %d tweets, but had %d instead", 4, len(tweet_resp.GlobalObjects.Tweets)) assert.Equal(4, len(tweet_resp.GlobalObjects.Tweets), "After tombstone handling")
}
first_tombstone, ok := tweet_resp.GlobalObjects.Tweets["1454522147750260742"] first_tombstone, ok := tweet_resp.GlobalObjects.Tweets["1454522147750260742"]
if !ok { if assert.True(ok, "Missing tombstone") {
t.Errorf("Missing tombstoned tweet for %s", "1454522147750260742") assert.Equal(int64(1454522147750260742), first_tombstone.ID)
} assert.Equal(int64(1365863538393309184), first_tombstone.UserID)
if first_tombstone.ID != 1454522147750260742 { assert.Equal("hidden", first_tombstone.TombstoneText)
t.Errorf("Expected ID %d, got %d instead", 1454522147750260742, first_tombstone.ID)
}
if first_tombstone.UserID != 1365863538393309184 {
t.Errorf("Expected UserID %d, got %d instead", 1365863538393309184, first_tombstone.UserID)
}
if first_tombstone.TombstoneText != "hidden" {
t.Errorf("Wrong tombstone text: %s", first_tombstone.TombstoneText)
} }
second_tombstone, ok := tweet_resp.GlobalObjects.Tweets["1454515503242829830"] second_tombstone, ok := tweet_resp.GlobalObjects.Tweets["1454515503242829830"]
if !ok { if assert.True(ok, "Missing tombstone") {
t.Errorf("Missing tombstoned tweet for %s", "1454515503242829830") assert.Equal(int64(1454515503242829830), second_tombstone.ID)
} assert.Equal(int64(1365863538393309184), second_tombstone.UserID)
if second_tombstone.ID != 1454515503242829830 { assert.Equal("hidden", second_tombstone.TombstoneText)
t.Errorf("Expected ID %d, got %d instead", 1454515503242829830, second_tombstone.ID)
}
if second_tombstone.UserID != 1365863538393309184 {
t.Errorf("Expected UserID %d, got %d instead", 1365863538393309184, second_tombstone.UserID)
}
if second_tombstone.TombstoneText != "hidden" {
t.Errorf("Wrong tombstone text: %s", second_tombstone.TombstoneText)
} }
} }
func TestHandleTombstonesDeleted(t *testing.T) { func TestHandleTombstonesDeleted(t *testing.T) {
assert := assert.New(t)
data, err := ioutil.ReadFile("test_responses/tombstones/tombstone_deleted.json") data, err := ioutil.ReadFile("test_responses/tombstones/tombstone_deleted.json")
if err != nil { if err != nil {
panic(err) panic(err)
} }
var tweet_resp scraper.TweetResponse var tweet_resp TweetResponse
err = json.Unmarshal(data, &tweet_resp) err = json.Unmarshal(data, &tweet_resp)
if err != nil { require.NoError(t, err)
t.Fatalf(err.Error()) assert.Equal(1, len(tweet_resp.GlobalObjects.Tweets), "Before tombstone handling")
}
if len(tweet_resp.GlobalObjects.Tweets) != 1 {
t.Fatalf("Should have started with %d tweets, but had %d instead", 1, len(tweet_resp.GlobalObjects.Tweets))
}
tweet_resp.HandleTombstones() tweet_resp.HandleTombstones()
if len(tweet_resp.GlobalObjects.Tweets) != 2 {
t.Errorf("Should have ended up with %d tweets, but had %d instead", 2, len(tweet_resp.GlobalObjects.Tweets)) assert.Equal(2, len(tweet_resp.GlobalObjects.Tweets), "After tombstone handling")
}
tombstone, ok := tweet_resp.GlobalObjects.Tweets["1454521654781136902"] tombstone, ok := tweet_resp.GlobalObjects.Tweets["1454521654781136902"]
if !ok { if assert.True(ok) {
t.Errorf("Missing tombstoned tweet for %s", "1454521654781136902") assert.Equal(int64(1454521654781136902), tombstone.ID)
} assert.Equal(int64(1218687933391298560), tombstone.UserID)
if tombstone.ID != 1454521654781136902 { assert.Equal("deleted", tombstone.TombstoneText)
t.Errorf("Expected ID %d, got %d instead", 1454521654781136902, tombstone.ID)
}
if tombstone.UserID != 1218687933391298560 {
t.Errorf("Expected UserID %d, got %d instead", 1218687933391298560, tombstone.UserID)
}
if tombstone.TombstoneText != "deleted" {
t.Errorf("Wrong tombstone text: %s", tombstone.TombstoneText)
} }
} }
func TestHandleTombstonesUnavailable(t *testing.T) { func TestHandleTombstonesUnavailable(t *testing.T) {
assert := assert.New(t)
data, err := ioutil.ReadFile("test_responses/tombstones/tombstone_unavailable.json") data, err := ioutil.ReadFile("test_responses/tombstones/tombstone_unavailable.json")
if err != nil { if err != nil {
panic(err) panic(err)
} }
var tweet_resp scraper.TweetResponse var tweet_resp TweetResponse
err = json.Unmarshal(data, &tweet_resp) err = json.Unmarshal(data, &tweet_resp)
if err != nil { require.NoError(t, err)
t.Fatalf(err.Error()) assert.Equal(2, len(tweet_resp.GlobalObjects.Tweets), "Before tombstone handling")
}
if len(tweet_resp.GlobalObjects.Tweets) != 2 {
t.Fatalf("Should have started with %d tweets, but had %d instead", 2, len(tweet_resp.GlobalObjects.Tweets))
}
tweet_resp.HandleTombstones() tweet_resp.HandleTombstones()
if len(tweet_resp.GlobalObjects.Tweets) != 3 {
t.Errorf("Should have ended up with %d tweets, but had %d instead", 3, len(tweet_resp.GlobalObjects.Tweets)) assert.Equal(3, len(tweet_resp.GlobalObjects.Tweets), "After tombstone handling")
}
tombstone, ok := tweet_resp.GlobalObjects.Tweets["1452686887651532809"] tombstone, ok := tweet_resp.GlobalObjects.Tweets["1452686887651532809"]
if !ok { if assert.True(ok) {
t.Errorf("Missing tombstoned tweet for %s", "1452686887651532809") assert.Equal(int64(1452686887651532809), tombstone.ID)
} assert.Equal(int64(1241389617502445569), tombstone.UserID)
if tombstone.ID != 1452686887651532809 { assert.Equal("unavailable", tombstone.TombstoneText)
t.Errorf("Expected ID %d, got %d instead", 1452686887651532809, tombstone.ID)
}
if tombstone.UserID != 1241389617502445569 {
t.Errorf("Expected UserID %d, got %d instead", 1241389617502445569, tombstone.UserID)
}
if tombstone.TombstoneText != "unavailable" {
t.Errorf("Wrong tombstone text: %s", tombstone.TombstoneText)
} }
} }

View File

@ -6,10 +6,10 @@ import (
"encoding/json" "encoding/json"
"fmt" "fmt"
. "offline_twitter/scraper"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
)
. "offline_twitter/scraper"
)
/** /**
* Parse an APIV2User * Parse an APIV2User

View File

@ -1,12 +1,14 @@
package scraper_test package scraper_test
import "testing" import (
import "fmt" "testing"
import "offline_twitter/scraper" "fmt"
. "offline_twitter/scraper"
)
// Makes an HTTP request // Makes an HTTP request
func TestGetGuestToken(t *testing.T) { func TestGetGuestToken(t *testing.T) {
token, err := scraper.GetGuestToken() token, err := GetGuestToken()
if err != nil { if err != nil {
t.Errorf("%v", err) t.Errorf("%v", err)
} }
@ -22,6 +24,6 @@ func TestGetGuestToken(t *testing.T) {
// other than the first use the cache. // other than the first use the cache.
func BenchmarkGetGuestToken(b *testing.B) { func BenchmarkGetGuestToken(b *testing.B) {
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
_, _ = scraper.GetGuestToken() _, _ = GetGuestToken()
} }
} }

View File

@ -5,42 +5,27 @@ import (
"io/ioutil" "io/ioutil"
"encoding/json" "encoding/json"
"offline_twitter/scraper" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
. "offline_twitter/scraper"
) )
func TestParseAPIMedia(t *testing.T) { func TestParseAPIMedia(t *testing.T) {
assert := assert.New(t)
data, err := ioutil.ReadFile("test_responses/tweet_content/image.json") data, err := ioutil.ReadFile("test_responses/tweet_content/image.json")
if err != nil { if err != nil {
panic(err) panic(err)
} }
var apimedia scraper.APIMedia var apimedia APIMedia
err = json.Unmarshal(data, &apimedia) err = json.Unmarshal(data, &apimedia)
if err != nil { require.NoError(t, err)
t.Fatal(err.Error())
}
image := scraper.ParseAPIMedia(apimedia)
expected_id := 1395882862289772553 image := ParseAPIMedia(apimedia)
if image.ID != scraper.ImageID(expected_id) { assert.Equal(ImageID(1395882862289772553), image.ID)
t.Errorf("Expected ID of %q, got %q", expected_id, image.ID) assert.Equal("https://pbs.twimg.com/media/E18sEUrWYAk8dBl.jpg", image.RemoteURL)
} assert.Equal(593, image.Width)
expected_remote_url := "https://pbs.twimg.com/media/E18sEUrWYAk8dBl.jpg" assert.Equal(239, image.Height)
if image.RemoteURL != expected_remote_url { assert.Equal("E18sEUrWYAk8dBl.jpg", image.LocalFilename)
t.Errorf("Expected %q, got %q", expected_remote_url, image.RemoteURL) assert.False(image.IsDownloaded)
}
expected_width := 593
if image.Width != expected_width {
t.Errorf("Expected width %d, got %d", expected_width, image.Width)
}
expected_height := 239
if image.Height != expected_height {
t.Errorf("Expected height %d, got %d", expected_height, image.Height)
}
expected_local_filename := "E18sEUrWYAk8dBl.jpg"
if image.LocalFilename != expected_local_filename {
t.Errorf("Expected %q, got %q", expected_local_filename, image.LocalFilename)
}
if image.IsDownloaded {
t.Errorf("Expected it not to be downloaded, but it was")
}
} }

View File

@ -6,7 +6,9 @@ import (
"net/http" "net/http"
"net/http/httptest" "net/http/httptest"
"offline_twitter/scraper" "github.com/stretchr/testify/assert"
. "offline_twitter/scraper"
) )
@ -18,8 +20,5 @@ func TestExpandShortUrl(t *testing.T) {
})) }))
defer srvr.Close() defer srvr.Close()
result := scraper.ExpandShortUrl(srvr.URL) assert.Equal(t, redirecting_to, ExpandShortUrl(srvr.URL))
if result != redirecting_to {
t.Errorf("Expected %q, got %q", redirecting_to, result)
}
} }

View File

@ -5,96 +5,63 @@ import (
"io/ioutil" "io/ioutil"
"encoding/json" "encoding/json"
"offline_twitter/scraper" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
. "offline_twitter/scraper"
) )
func TestParsePoll2Choices(t *testing.T) { func TestParsePoll2Choices(t *testing.T) {
assert := assert.New(t)
data, err := ioutil.ReadFile("test_responses/tweet_content/poll_card_2_options.json") data, err := ioutil.ReadFile("test_responses/tweet_content/poll_card_2_options.json")
if err != nil { if err != nil {
panic(err) panic(err)
} }
var apiCard scraper.APICard var apiCard APICard
err = json.Unmarshal(data, &apiCard) err = json.Unmarshal(data, &apiCard)
if err != nil { require.NoError(t, err)
t.Fatal(err.Error())
}
poll := scraper.ParseAPIPoll(apiCard) poll := ParseAPIPoll(apiCard)
if poll.ID != 1457419248461131776 { assert.Equal(PollID(1457419248461131776), poll.ID)
t.Errorf("Expected ID %d, got %d", 1457419248461131776, poll.ID) assert.Equal(2, poll.NumChoices)
} assert.Equal(60 * 60 * 24, poll.VotingDuration)
if poll.NumChoices != 2 { assert.Equal(int64(1636397201), poll.VotingEndsAt.Unix())
t.Errorf("Expected %d choices, got %d", 2, poll.NumChoices) assert.Equal(int64(1636318755), poll.LastUpdatedAt.Unix())
}
if poll.VotingDuration != 60 * 60 * 24 {
t.Errorf("Expected duratino %d, got %d", 60 * 60 * 24, poll.VotingDuration)
}
expected_ending := int64(1636397201)
if poll.VotingEndsAt.Unix() != expected_ending {
t.Errorf("Expected closing time %d, got %d", expected_ending, poll.VotingEndsAt.Unix())
}
expected_last_updated := int64(1636318755)
if poll.LastUpdatedAt.Unix() != expected_last_updated {
t.Errorf("Expected last-updated time %d, got %d", expected_last_updated, poll.LastUpdatedAt.Unix())
}
if expected_last_updated > expected_ending {
t.Errorf("Last updated should be before poll closes!")
}
if poll.Choice1 != "Yes" || poll.Choice2 != "No" { assert.Less(poll.LastUpdatedAt.Unix(), poll.VotingEndsAt.Unix())
t.Errorf("Expected %q and %q, got %q and %q", "Yes", "No", poll.Choice1, poll.Choice2) assert.Equal("Yes", poll.Choice1)
} assert.Equal("No", poll.Choice2)
if poll.Choice1_Votes != 529 { assert.Equal(529, poll.Choice1_Votes)
t.Errorf("Expected %d votes for choice 1, got %d", 529, poll.Choice1_Votes) assert.Equal(2182, poll.Choice2_Votes)
}
if poll.Choice2_Votes != 2182 {
t.Errorf("Expected %d votes for choice 2, got %d", 2182, poll.Choice2_Votes)
}
} }
func TestParsePoll4Choices(t *testing.T) { func TestParsePoll4Choices(t *testing.T) {
assert := assert.New(t)
data, err := ioutil.ReadFile("test_responses/tweet_content/poll_card_4_options_ended.json") data, err := ioutil.ReadFile("test_responses/tweet_content/poll_card_4_options_ended.json")
if err != nil { if err != nil {
panic(err) panic(err)
} }
var apiCard scraper.APICard var apiCard APICard
err = json.Unmarshal(data, &apiCard) err = json.Unmarshal(data, &apiCard)
if err != nil { require.NoError(t, err)
t.Fatal(err.Error())
}
poll := scraper.ParseAPIPoll(apiCard) poll := ParseAPIPoll(apiCard)
if poll.ID != 1455611588854140929 { assert.Equal(PollID(1455611588854140929), poll.ID)
t.Errorf("Expected ID %d, got %d", 1455611588854140929, poll.ID) assert.Equal(4, poll.NumChoices)
} assert.Equal(60 * 60 * 24, poll.VotingDuration)
if poll.NumChoices != 4 { assert.Equal(int64(1635966221), poll.VotingEndsAt.Unix())
t.Errorf("Expected %d choices, got %d", 4, poll.NumChoices) assert.Equal(int64(1635966226), poll.LastUpdatedAt.Unix())
} assert.Greater(poll.LastUpdatedAt.Unix(), poll.VotingEndsAt.Unix())
if poll.VotingDuration != 60 * 60 * 24 {
t.Errorf("Expected duratino %d, got %d", 60 * 60 * 24, poll.VotingDuration)
}
expected_ending := int64(1635966221)
if poll.VotingEndsAt.Unix() != expected_ending {
t.Errorf("Expected closing time %d, got %d", expected_ending, poll.VotingEndsAt.Unix())
}
expected_last_updated := int64(1635966226)
if poll.LastUpdatedAt.Unix() != expected_last_updated {
t.Errorf("Expected last-updated time %d, got %d", expected_last_updated, poll.LastUpdatedAt.Unix())
}
if expected_last_updated < expected_ending {
t.Errorf("Last updated should be after poll closes!")
}
if poll.Choice1 != "Alec Baldwin" || poll.Choice1_Votes != 1669 { assert.Equal("Alec Baldwin", poll.Choice1)
t.Errorf("Expected %q with %d, got %q with %d", "Alec Baldwin", 1669, poll.Choice1, poll.Choice1_Votes) assert.Equal(1669, poll.Choice1_Votes)
}
if poll.Choice2 != "Andew Cuomo" || poll.Choice2_Votes != 272 { assert.Equal("Andew Cuomo", poll.Choice2)
t.Errorf("Expected %q with %d, got %q with %d", "Andew Cuomo", 272, poll.Choice2, poll.Choice2_Votes) assert.Equal(272, poll.Choice2_Votes)
}
if poll.Choice3 != "George Floyd" || poll.Choice3_Votes != 829 { assert.Equal("George Floyd", poll.Choice3)
t.Errorf("Expected %q with %d, got %q with %d", "George Floyd", 829, poll.Choice3, poll.Choice3_Votes) assert.Equal(829, poll.Choice3_Votes)
}
if poll.Choice4 != "Derek Chauvin" || poll.Choice4_Votes != 2397 { assert.Equal("Derek Chauvin", poll.Choice4)
t.Errorf("Expected %q with %d, got %q with %d", "Derek Chauvin", 2397, poll.Choice4, poll.Choice4_Votes) assert.Equal(2397, poll.Choice4_Votes)
}
} }

View File

@ -5,38 +5,27 @@ import (
"io/ioutil" "io/ioutil"
"testing" "testing"
"offline_twitter/scraper" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
. "offline_twitter/scraper"
) )
func TestParseSingleRetweet(t *testing.T) { func TestParseSingleRetweet(t *testing.T) {
assert := assert.New(t)
data, err := ioutil.ReadFile("test_responses/tweet_that_is_a_retweet.json") data, err := ioutil.ReadFile("test_responses/tweet_that_is_a_retweet.json")
if err != nil { if err != nil {
panic(err) panic(err)
} }
var api_tweet scraper.APITweet var api_tweet APITweet
err = json.Unmarshal(data, &api_tweet) err = json.Unmarshal(data, &api_tweet)
if err != nil { require.NoError(t, err)
t.Errorf(err.Error())
}
retweet, err := scraper.ParseSingleRetweet(api_tweet) retweet, err := ParseSingleRetweet(api_tweet)
if err != nil { require.NoError(t, err)
t.Errorf(err.Error())
} assert.Equal(TweetID(1404270043018448896), retweet.RetweetID)
expected_id := 1404270043018448896 assert.Equal(TweetID(1404269989646028804), retweet.TweetID)
if retweet.RetweetID != scraper.TweetID(1404270043018448896) { assert.Equal(UserID(44067298), retweet.RetweetedByID)
t.Errorf("Expected %d, got %d", expected_id, retweet.RetweetID) assert.Equal(int64(1623639042), retweet.RetweetedAt.Unix())
}
expected_id = 1404269989646028804
if retweet.TweetID != scraper.TweetID(expected_id) {
t.Errorf("Expected %d, got %d", expected_id, retweet.TweetID)
}
expected_id = 44067298
if retweet.RetweetedByID != scraper.UserID(expected_id) {
t.Errorf("Expected %d, got %d", expected_id, retweet.RetweetedByID)
}
expected_id = 1623639042
if retweet.RetweetedAt.Unix() != int64(expected_id) {
t.Errorf("Expected %d, got %d", expected_id, retweet.RetweetedAt.Unix())
}
} }

View File

@ -5,20 +5,23 @@ import (
"io/ioutil" "io/ioutil"
"testing" "testing"
"offline_twitter/scraper" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
. "offline_twitter/scraper"
) )
func load_tweet_from_file(filename string) scraper.Tweet{ func load_tweet_from_file(filename string) Tweet{
data, err := ioutil.ReadFile(filename) data, err := ioutil.ReadFile(filename)
if err != nil { if err != nil {
panic(err) panic(err)
} }
var apitweet scraper.APITweet var apitweet APITweet
err = json.Unmarshal(data, &apitweet) err = json.Unmarshal(data, &apitweet)
if err != nil { if err != nil {
panic(err) panic(err)
} }
tweet, err := scraper.ParseSingleTweet(apitweet) tweet, err := ParseSingleTweet(apitweet)
if err != nil { if err != nil {
panic(err) panic(err)
} }
@ -27,300 +30,160 @@ func load_tweet_from_file(filename string) scraper.Tweet{
func TestParseSingleTweet(t *testing.T) { func TestParseSingleTweet(t *testing.T) {
assert := assert.New(t)
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_unicode_chars.json") tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_unicode_chars.json")
expected_text := "The fact that @michaelmalice new book The Anarchist Handbook is just absolutely destroying on the charts is the largest white pill Ive swallowed in years." assert.Equal("The fact that @michaelmalice new book The Anarchist Handbook is just absolutely destroying on the charts is the largest white pill Ive swallowed in years.", tweet.Text)
actual_text := tweet.Text assert.Len(tweet.Mentions, 1)
assert.Contains(tweet.Mentions, UserHandle("michaelmalice"))
if actual_text != expected_text { assert.Empty(tweet.Urls)
t.Errorf("Expected: %q; got %q", expected_text, actual_text) assert.Equal(int64(1621639105), tweet.PostedAt.Unix())
} assert.Zero(tweet.QuotedTweetID)
assert.Empty(tweet.Polls)
if len(tweet.Mentions) != 1 || tweet.Mentions[0] != "michaelmalice" {
t.Errorf("Expected %v, got %v", []string{"michaelmalice"}, tweet.Mentions)
}
if len(tweet.Urls) != 0 {
t.Errorf("Expected %d urls, but got %d", 0, len(tweet.Urls))
}
if tweet.PostedAt.Unix() != 1621639105 {
t.Errorf("Expected %d, got %d", 1621639105, tweet.PostedAt.Unix())
}
if tweet.QuotedTweetID != 0 {
t.Errorf("Incorrectly believes it quote-tweets tweet with ID %d", tweet.QuotedTweetID)
}
if len(tweet.Polls) != 0 {
t.Errorf("Should not have any polls")
}
} }
func TestParseTweetWithImage(t *testing.T) { func TestParseTweetWithImage(t *testing.T) {
assert := assert.New(t)
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_image.json") tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_image.json")
expected_text := "this saddens me every time" assert.Equal("this saddens me every time", tweet.Text)
if tweet.Text != expected_text { assert.Len(tweet.Images, 1)
t.Errorf("Expected: %q, got: %q", expected_text, tweet.Text)
}
if len(tweet.Images) != 1 {
t.Errorf("Expected 1 images but got %d", len(tweet.Images))
}
} }
func TestParseTweetWithQuotedTweetAsLink(t *testing.T) { func TestParseTweetWithQuotedTweetAsLink(t *testing.T) {
assert := assert.New(t)
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_quoted_tweet_as_link2.json") tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_quoted_tweet_as_link2.json")
expected_text := "sometimes they're too dimwitted to even get the wrong title right" assert.Equal("sometimes they're too dimwitted to even get the wrong title right", tweet.Text)
if tweet.Text != expected_text { assert.Equal(TweetID(1395882872729477131), tweet.InReplyToID)
t.Errorf("Expected: %q, got: %q", expected_text, tweet.Text) assert.Equal(TweetID(1396194494710788100), tweet.QuotedTweetID)
} assert.Empty(tweet.ReplyMentions)
assert.Empty(tweet.Polls)
expected_replied_id := scraper.TweetID(1395882872729477131)
if tweet.InReplyToID != expected_replied_id {
t.Errorf("Expected %q, got %q", expected_replied_id, tweet.InReplyToID)
}
if len(tweet.ReplyMentions) != 0 {
t.Errorf("Wanted %v, got %v", []string{}, tweet.ReplyMentions)
}
expected_quoted_id := scraper.TweetID(1396194494710788100)
if tweet.QuotedTweetID != expected_quoted_id {
t.Errorf("Should be a quoted tweet with ID %d, but got %d instead", expected_quoted_id, tweet.QuotedTweetID)
}
if len(tweet.Polls) != 0 {
t.Errorf("Should not have any polls")
}
} }
func TestParseTweetWithVideo(t *testing.T) { func TestParseTweetWithVideo(t *testing.T) {
assert := assert.New(t)
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_video.json") tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_video.json")
assert.Empty(tweet.Images)
assert.Len(tweet.Videos, 1)
expected_video := "https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12" v := tweet.Videos[0]
if len(tweet.Videos) != 1 || tweet.Videos[0].RemoteURL != expected_video { assert.Equal("https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12", v.RemoteURL)
t.Errorf("Expected video URL %q, but got %+v", expected_video, tweet.Videos) assert.False(v.IsGif)
}
if tweet.Videos[0].IsGif != false {
t.Errorf("Expected it to be a regular video, but it was a gif")
}
if len(tweet.Images) != 0 {
t.Errorf("Should not have any images, but has %d", len(tweet.Images))
}
} }
func TestParseTweetWithGif(t *testing.T) { func TestParseTweetWithGif(t *testing.T) {
assert := assert.New(t)
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_that_is_a_reply_with_gif.json") tweet := load_tweet_from_file("test_responses/single_tweets/tweet_that_is_a_reply_with_gif.json")
assert.Len(tweet.Videos, 1)
expected_video := "https://video.twimg.com/tweet_video/E189-VhVoAYcrDv.mp4" v := tweet.Videos[0]
if len(tweet.Videos) != 1 { assert.Equal("https://video.twimg.com/tweet_video/E189-VhVoAYcrDv.mp4", v.RemoteURL)
t.Errorf("Expected 1 video (a gif), but got %d instead", len(tweet.Videos)) assert.True(v.IsGif)
}
if tweet.Videos[0].RemoteURL != expected_video {
t.Errorf("Expected video URL %q, but got %+v", expected_video, tweet.Videos)
}
if tweet.Videos[0].IsGif != true {
t.Errorf("Expected video to be a gif, but it wasn't")
}
} }
func TestParseTweetWithUrl(t *testing.T) { func TestParseTweetWithUrl(t *testing.T) {
assert := assert.New(t)
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_url_card.json") tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_url_card.json")
assert.Empty(tweet.Polls)
assert.Len(tweet.Urls, 1)
if len(tweet.Urls) != 1 { u := tweet.Urls[0]
t.Errorf("Expected %d urls, but got %d", 1, len(tweet.Urls)) assert.Equal("https://reason.com/2021/08/30/la-teachers-union-cecily-myart-cruz-learning-loss/", u.Text)
} assert.True(u.HasCard)
assert.Equal("reason.com", u.Domain)
expected_url_text := "https://reason.com/2021/08/30/la-teachers-union-cecily-myart-cruz-learning-loss/"
if tweet.Urls[0].Text != expected_url_text {
t.Errorf("Expected Url text to be %q, but got %q", expected_url_text, tweet.Urls[0].Text)
}
if !tweet.Urls[0].HasCard {
t.Errorf("Expected it to have a card, but it doesn't")
}
expected_url_domain := "reason.com"
if tweet.Urls[0].Domain != expected_url_domain {
t.Errorf("Expected Url text to be %q, but got %q", expected_url_domain, tweet.Urls[0].Domain)
}
if len(tweet.Polls) != 0 {
t.Errorf("Should not have any polls")
}
} }
func TestParseTweetWithUrlButNoCard(t *testing.T) { func TestParseTweetWithUrlButNoCard(t *testing.T) {
assert := assert.New(t)
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_url_but_no_card.json") tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_url_but_no_card.json")
assert.Len(tweet.Urls, 1)
if len(tweet.Urls) != 1 { u := tweet.Urls[0]
t.Errorf("Expected %d urls, but got %d", 1, len(tweet.Urls)) assert.Equal("https://www.politico.com/newsletters/west-wing-playbook/2021/09/16/the-jennifer-rubin-wh-symbiosis-494364", u.Text)
} assert.False(u.HasCard)
expected_url_text := "https://www.politico.com/newsletters/west-wing-playbook/2021/09/16/the-jennifer-rubin-wh-symbiosis-494364"
if tweet.Urls[0].Text != expected_url_text {
t.Errorf("Expected Url text to be %q, but got %q", expected_url_text, tweet.Urls[0].Text)
}
if tweet.Urls[0].HasCard {
t.Errorf("Expected url not to have a card, but it thinks it has one")
}
} }
func TestParseTweetWithMultipleUrls(t *testing.T) { func TestParseTweetWithMultipleUrls(t *testing.T) {
assert := assert.New(t)
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_multiple_urls.json") tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_multiple_urls.json")
assert.Empty(tweet.Polls)
assert.Len(tweet.Urls, 3)
if len(tweet.Urls) != 3 { assert.False(tweet.Urls[0].HasCard)
t.Errorf("Expected %d urls, got %d instead", 3, len(tweet.Urls)) assert.False(tweet.Urls[1].HasCard)
} assert.True (tweet.Urls[2].HasCard)
if tweet.Urls[0].HasCard {
t.Errorf("Expected url not to have a card, but it does: %d", 0)
}
if tweet.Urls[1].HasCard {
t.Errorf("Expected url not to have a card, but it does: %d", 1)
}
if !tweet.Urls[2].HasCard {
t.Errorf("Expected url to have a card, but it doesn't: %d", 2)
}
expected_title := "Bidens victory came from the suburbs"
if tweet.Urls[2].Title != expected_title {
t.Errorf("Expected title to be %q, but got %q", expected_title, tweet.Urls[2].Title)
}
if len(tweet.Polls) != 0 { assert.Equal("Bidens victory came from the suburbs", tweet.Urls[2].Title)
t.Errorf("Should not have any polls")
}
} }
func TestTweetWithLotsOfReplyMentions(t *testing.T) { func TestTweetWithLotsOfReplyMentions(t *testing.T) {
assert := assert.New(t)
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_at_mentions_in_front.json") tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_at_mentions_in_front.json")
assert.Len(tweet.ReplyMentions, 4)
if len(tweet.ReplyMentions) != 4 { for i, v := range []UserHandle{"rob_mose", "primalpoly", "jmasseypoet", "SpaceX"} {
t.Errorf("Expected %d reply-mentions, got %d", 4, len(tweet.ReplyMentions)) assert.Equal(v, tweet.ReplyMentions[i])
}
for i, v := range []scraper.UserHandle{"rob_mose", "primalpoly", "jmasseypoet", "SpaceX"} {
if tweet.ReplyMentions[i] != v {
t.Errorf("Expected %q, got %q at position %d", v, tweet.ReplyMentions[i], i)
}
} }
} }
func TestTweetWithPoll(t *testing.T) { func TestTweetWithPoll(t *testing.T) {
assert := assert.New(t)
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_poll_4_choices.json") tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_poll_4_choices.json")
assert.Len(tweet.Polls, 1)
if len(tweet.Polls) != 1 {
t.Fatalf("Expected there to be 1 poll, but there was %d", len(tweet.Polls))
}
p := tweet.Polls[0] p := tweet.Polls[0]
assert.Equal(tweet.ID, p.TweetID)
if p.TweetID != tweet.ID { assert.Equal(4, p.NumChoices)
t.Errorf("Poll's TweetID (%d) should match the tweet's ID (%d)", p.TweetID, tweet.ID) assert.Equal("Tribal armband", p.Choice1)
} assert.Equal("Marijuana leaf", p.Choice2)
if p.NumChoices != 4 { assert.Equal("Butterfly", p.Choice3)
t.Errorf("Expected %d choices, got %d instead", 4, p.NumChoices) assert.Equal("Maple leaf", p.Choice4)
} assert.Equal(1593, p.Choice1_Votes)
expected_choice1 := "Tribal armband" assert.Equal(624, p.Choice2_Votes)
if p.Choice1 != expected_choice1 { assert.Equal(778, p.Choice3_Votes)
t.Errorf("Expected choice1 %q, got %q", expected_choice1, p.Choice1) assert.Equal(1138, p.Choice4_Votes)
} assert.Equal(1440 * 60, p.VotingDuration)
expected_choice2 := "Marijuana leaf" assert.Equal(int64(1638331934), p.VotingEndsAt.Unix())
if p.Choice2 != expected_choice2 { assert.Equal(int64(1638331935), p.LastUpdatedAt.Unix())
t.Errorf("Expected choice2 %q, got %q", expected_choice2, p.Choice2)
}
expected_choice3 := "Butterfly"
if p.Choice3 != expected_choice3 {
t.Errorf("Expected choice3 %q, got %q", expected_choice3, p.Choice3)
}
expected_choice4 := "Maple leaf"
if p.Choice4 != expected_choice4 {
t.Errorf("Expected choice4 %q, got %q", expected_choice4, p.Choice4)
}
expected_votes1 := 1593
expected_votes2 := 624
expected_votes3 := 778
expected_votes4 := 1138
if p.Choice1_Votes != expected_votes1 {
t.Errorf("Expected Choice1_Votes %d, got %d", expected_votes1, p.Choice1_Votes)
}
if p.Choice2_Votes != expected_votes2 {
t.Errorf("Expected Choice2_Votes %d, got %d", expected_votes2, p.Choice2_Votes)
}
if p.Choice3_Votes != expected_votes3 {
t.Errorf("Expected Choice3_Votes %d, got %d", expected_votes3, p.Choice3_Votes)
}
if p.Choice4_Votes != expected_votes4 {
t.Errorf("Expected Choice4_Votes %d, got %d", expected_votes4, p.Choice4_Votes)
}
expected_duration := 1440 * 60
if p.VotingDuration != expected_duration {
t.Errorf("Expected voting duration %d seconds, got %d", expected_duration, p.VotingDuration)
}
expected_ends_at := int64(1638331934)
if p.VotingEndsAt.Unix() != expected_ends_at {
t.Errorf("Expected voting ends at %d (unix), got %d", expected_ends_at, p.VotingEndsAt.Unix())
}
expected_last_updated_at := int64(1638331935)
if p.LastUpdatedAt.Unix() != expected_last_updated_at {
t.Errorf("Expected updated %d, got %d", expected_last_updated_at, p.LastUpdatedAt.Unix())
}
} }
func TestParseTweetResponse(t *testing.T) { func TestParseTweetResponse(t *testing.T) {
assert := assert.New(t)
data, err := ioutil.ReadFile("test_responses/michael_malice_feed.json") data, err := ioutil.ReadFile("test_responses/michael_malice_feed.json")
if err != nil { if err != nil {
panic(err) panic(err)
} }
var tweet_resp scraper.TweetResponse var tweet_resp TweetResponse
err = json.Unmarshal(data, &tweet_resp) err = json.Unmarshal(data, &tweet_resp)
if err != nil { require.NoError(t, err)
t.Errorf(err.Error())
}
tweets, retweets, users, err := scraper.ParseTweetResponse(tweet_resp) tweets, retweets, users, err := ParseTweetResponse(tweet_resp)
if err != nil { require.NoError(t, err)
t.Errorf(err.Error())
}
if len(tweets) != 29 - 3 { assert.Len(tweets, 29 - 3)
t.Errorf("Expected %d tweets, got %d", 29-3, len(tweets)) assert.Len(retweets, 3)
} assert.Len(users, 9)
if len(retweets) != 3 {
t.Errorf("Expected %d retweets, got %d", 3, len(retweets))
}
if len(users) != 9 {
t.Errorf("Expected %d users, got %d", 9, len(users))
}
} }
func TestParseTweetResponseWithTombstones(t *testing.T) { func TestParseTweetResponseWithTombstones(t *testing.T) {
assert := assert.New(t)
data, err := ioutil.ReadFile("test_responses/tombstones/tombstone_deleted.json") data, err := ioutil.ReadFile("test_responses/tombstones/tombstone_deleted.json")
if err != nil { if err != nil {
panic(err) panic(err)
} }
var tweet_resp scraper.TweetResponse var tweet_resp TweetResponse
err = json.Unmarshal(data, &tweet_resp) err = json.Unmarshal(data, &tweet_resp)
if err != nil { require.NoError(t, err)
t.Errorf(err.Error())
}
extra_users := tweet_resp.HandleTombstones() extra_users := tweet_resp.HandleTombstones()
if len(extra_users) != 1 { assert.Len(extra_users, 1)
t.Errorf("Expected to need 1 extra user but got %d instead", len(extra_users))
}
tweets, retweets, users, err := scraper.ParseTweetResponse(tweet_resp) tweets, retweets, users, err := ParseTweetResponse(tweet_resp)
if err != nil { require.NoError(t, err)
t.Fatal(err)
}
if len(tweets) != 2 { assert.Len(tweets, 2)
t.Errorf("Expected %d tweets, got %d", 2, len(tweets)) assert.Len(retweets, 0)
} assert.Len(users, 1)
if len(retweets) != 0 {
t.Errorf("Expected %d retweets, got %d", 0, len(retweets))
}
if len(users) != 1 {
t.Errorf("Expected %d users, got %d", 1, len(users))
}
} }

View File

@ -5,182 +5,92 @@ import (
"io/ioutil" "io/ioutil"
"encoding/json" "encoding/json"
"offline_twitter/scraper" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
. "offline_twitter/scraper"
) )
func TestParseAPIUrlCard(t *testing.T) { func TestParseAPIUrlCard(t *testing.T) {
assert := assert.New(t)
data, err := ioutil.ReadFile("test_responses/tweet_content/url_card.json") data, err := ioutil.ReadFile("test_responses/tweet_content/url_card.json")
if err != nil { if err != nil {
panic(err) panic(err)
} }
var apiCard scraper.APICard var apiCard APICard
err = json.Unmarshal(data, &apiCard) err = json.Unmarshal(data, &apiCard)
if err != nil { require.NoError(t, err)
t.Fatal(err.Error())
}
url := scraper.ParseAPIUrlCard(apiCard)
expected_domain := "reason.com" url := ParseAPIUrlCard(apiCard)
if url.Domain != expected_domain { assert.Equal("reason.com", url.Domain)
t.Errorf("Expected %q, got %q", expected_domain, url.Domain) assert.Equal("L.A. Teachers Union Leader: 'There's No Such Thing As Learning Loss'", url.Title)
} assert.Equal("\"Its OK that our babies may not have learned all their times tables,\" says Cecily Myart-Cruz. \"They learned resilience.\"", url.Description)
expected_title := "L.A. Teachers Union Leader: 'There's No Such Thing As Learning Loss'" assert.Equal(600, url.ThumbnailWidth)
if url.Title != expected_title { assert.Equal(315, url.ThumbnailHeight)
t.Errorf("Expected %q, got %q", expected_title, url.Title) assert.Equal("https://pbs.twimg.com/card_img/1434998862305968129/odDi9EqO?format=jpg&name=600x600", url.ThumbnailRemoteUrl)
} assert.Equal("odDi9EqO_600x600.jpg", url.ThumbnailLocalPath)
expected_description := "\"Its OK that our babies may not have learned all their times tables,\" says Cecily Myart-Cruz. \"They learned resilience.\"" assert.Equal(UserID(155581583), url.CreatorID)
if url.Description != expected_description { assert.Equal(UserID(16467567), url.SiteID)
t.Errorf("Expected %q, got %q", expected_description, url.Description) assert.True(url.HasThumbnail)
} assert.False(url.IsContentDownloaded)
expected_width := 600
if url.ThumbnailWidth != expected_width {
t.Errorf("Expected thumbnail width %d, got %d", expected_width, url.ThumbnailWidth)
}
expected_height := 315
if url.ThumbnailHeight != expected_height {
t.Errorf("Expected thumbnail height %d, got %d", expected_height, url.ThumbnailHeight)
}
expected_remote_url := "https://pbs.twimg.com/card_img/1434998862305968129/odDi9EqO?format=jpg&name=600x600"
if url.ThumbnailRemoteUrl != expected_remote_url {
t.Errorf("Expected %q, got %q", expected_remote_url, url.ThumbnailRemoteUrl)
}
expected_local_filename := "odDi9EqO_600x600.jpg"
if url.ThumbnailLocalPath != expected_local_filename {
t.Errorf("Expected %q, got %q", expected_local_filename, url.ThumbnailLocalPath)
}
expected_creator_id := scraper.UserID(155581583)
if url.CreatorID != expected_creator_id {
t.Errorf("Expected %d, got %d", expected_creator_id, url.CreatorID)
}
expected_site_id := scraper.UserID(16467567)
if url.SiteID != expected_site_id {
t.Errorf("Expected %d, got %d", expected_site_id, url.SiteID)
}
if !url.HasThumbnail {
t.Errorf("Should have a thumbnail, but it doesn't")
}
if url.IsContentDownloaded {
t.Errorf("Expected it not to be downloaded, but it was")
}
} }
func TestParseAPIUrlCardWithPlayer(t *testing.T) { func TestParseAPIUrlCardWithPlayer(t *testing.T) {
assert := assert.New(t)
data, err := ioutil.ReadFile("test_responses/tweet_content/url_card_with_player.json") data, err := ioutil.ReadFile("test_responses/tweet_content/url_card_with_player.json")
if err != nil { if err != nil {
panic(err) panic(err)
} }
var apiCard scraper.APICard var apiCard APICard
err = json.Unmarshal(data, &apiCard) err = json.Unmarshal(data, &apiCard)
if err != nil { require.NoError(t, err)
t.Fatal(err.Error())
}
url := scraper.ParseAPIUrlCard(apiCard)
expected_domain := "www.youtube.com" url := ParseAPIUrlCard(apiCard)
if url.Domain != expected_domain { assert.Equal("www.youtube.com", url.Domain)
t.Errorf("Expected %q, got %q", expected_domain, url.Domain) assert.Equal("The Politically Incorrect Guide to the Constitution (Starring Tom...", url.Title)
} assert.Equal("Watch this episode on LBRY/Odysee: https://odysee.com/@capitalresearch:5/the-politically-incorrect-guide-to-the:8Watch this episode on Rumble: https://rumble...", url.Description)
expected_title := "The Politically Incorrect Guide to the Constitution (Starring Tom..." assert.Equal("https://pbs.twimg.com/card_img/1437849456423194639/_1t0btyt?format=jpg&name=800x320_1", url.ThumbnailRemoteUrl)
if url.Title != expected_title { assert.Equal("_1t0btyt_800x320_1.jpg", url.ThumbnailLocalPath)
t.Errorf("Expected %q, got %q", expected_title, url.Title) assert.Equal(UserID(10228272), url.SiteID)
} assert.True(url.HasThumbnail)
expected_description := "Watch this episode on LBRY/Odysee: https://odysee.com/@capitalresearch:5/the-politically-incorrect-guide-to-the:8Watch this episode on Rumble: https://rumble..." assert.False(url.IsContentDownloaded)
if url.Description != expected_description {
t.Errorf("Expected %q, got %q", expected_description, url.Description)
}
expected_remote_url := "https://pbs.twimg.com/card_img/1437849456423194639/_1t0btyt?format=jpg&name=800x320_1"
if url.ThumbnailRemoteUrl != expected_remote_url {
t.Errorf("Expected %q, got %q", expected_remote_url, url.ThumbnailRemoteUrl)
}
expected_local_filename := "_1t0btyt_800x320_1.jpg"
if url.ThumbnailLocalPath != expected_local_filename {
t.Errorf("Expected %q, got %q", expected_local_filename, url.ThumbnailLocalPath)
}
expected_site_id := scraper.UserID(10228272)
if url.SiteID != expected_site_id {
t.Errorf("Expected %d, got %d", expected_site_id, url.SiteID)
}
if !url.HasThumbnail {
t.Errorf("Should have a thumbnail, but it doesn't")
}
if url.IsContentDownloaded {
t.Errorf("Expected it not to be downloaded, but it was")
}
} }
func TestParseAPIUrlCardWithPlayerAndPlaceholderThumbnail(t *testing.T) { func TestParseAPIUrlCardWithPlayerAndPlaceholderThumbnail(t *testing.T) {
assert := assert.New(t)
data, err := ioutil.ReadFile("test_responses/tweet_content/url_card_with_player_placeholder_image.json") data, err := ioutil.ReadFile("test_responses/tweet_content/url_card_with_player_placeholder_image.json")
if err != nil { if err != nil {
panic(err) panic(err)
} }
var apiCard scraper.APICard var apiCard APICard
err = json.Unmarshal(data, &apiCard) err = json.Unmarshal(data, &apiCard)
if err != nil { require.NoError(t, err)
t.Fatal(err.Error())
}
url := scraper.ParseAPIUrlCard(apiCard)
expected_domain := "www.youtube.com" url := ParseAPIUrlCard(apiCard)
if url.Domain != expected_domain { assert.Equal("www.youtube.com", url.Domain)
t.Errorf("Expected %q, got %q", expected_domain, url.Domain) assert.Equal("Did Michael Malice Turn Me into an Anarchist? | Ep 181", url.Title)
} assert.Equal("SUBSCRIBE TO THE NEW SHOW W/ ELIJAH & SYDNEY: \"YOU ARE HERE\"YT: https://www.youtube.com/youareheredaily______________________________________________________...", url.Description)
expected_title := "Did Michael Malice Turn Me into an Anarchist? | Ep 181" assert.Equal("https://pbs.twimg.com/cards/player-placeholder.png", url.ThumbnailRemoteUrl)
if url.Title != expected_title { assert.Equal("player-placeholder.png", url.ThumbnailLocalPath)
t.Errorf("Expected %q, got %q", expected_title, url.Title) assert.Equal(UserID(10228272), url.SiteID)
} assert.True(url.HasThumbnail)
expected_description := "SUBSCRIBE TO THE NEW SHOW W/ ELIJAH & SYDNEY: \"YOU ARE HERE\"YT: https://www.youtube.com/youareheredaily______________________________________________________..." assert.False(url.IsContentDownloaded)
if url.Description != expected_description {
t.Errorf("Expected %q, got %q", expected_description, url.Description)
}
expected_remote_url := "https://pbs.twimg.com/cards/player-placeholder.png"
if url.ThumbnailRemoteUrl != expected_remote_url {
t.Errorf("Expected %q, got %q", expected_remote_url, url.ThumbnailRemoteUrl)
}
expected_local_filename := "player-placeholder.png"
if url.ThumbnailLocalPath != expected_local_filename {
t.Errorf("Expected %q, got %q", expected_local_filename, url.ThumbnailLocalPath)
}
expected_site_id := scraper.UserID(10228272)
if url.SiteID != expected_site_id {
t.Errorf("Expected %d, got %d", expected_site_id, url.SiteID)
}
if !url.HasThumbnail {
t.Errorf("Should have a thumbnail, but it doesn't")
}
if url.IsContentDownloaded {
t.Errorf("Expected it not to be downloaded, but it was")
}
} }
func TestParseAPIUrlCardWithoutThumbnail(t *testing.T) { func TestParseAPIUrlCardWithoutThumbnail(t *testing.T) {
assert := assert.New(t)
data, err := ioutil.ReadFile("test_responses/tweet_content/url_card_without_thumbnail.json") data, err := ioutil.ReadFile("test_responses/tweet_content/url_card_without_thumbnail.json")
if err != nil { if err != nil {
panic(err) panic(err)
} }
var apiCard scraper.APICard var apiCard APICard
err = json.Unmarshal(data, &apiCard) err = json.Unmarshal(data, &apiCard)
if err != nil { require.NoError(t, err)
t.Fatal(err.Error())
}
url := scraper.ParseAPIUrlCard(apiCard)
expected_domain := "en.m.wikipedia.org" url := ParseAPIUrlCard(apiCard)
if url.Domain != expected_domain { assert.Equal("en.m.wikipedia.org", url.Domain)
t.Errorf("Expected %q, got %q", expected_domain, url.Domain) assert.Equal("Entryism - Wikipedia", url.Title)
} assert.Equal("", url.Description)
expected_title := "Entryism - Wikipedia" assert.True(url.HasCard)
if url.Title != expected_title { assert.False(url.HasThumbnail)
t.Errorf("Expected %q, got %q", expected_title, url.Title)
}
expected_description := ""
if url.Description != expected_description {
t.Errorf("Expected %q, got %q", expected_description, url.Description)
}
if !url.HasCard {
t.Errorf("Expected it to have a card, but it didn't")
}
if url.HasThumbnail {
t.Errorf("Should have no thumbnail, but it does")
}
} }

View File

@ -7,162 +7,96 @@ import (
"net/http" "net/http"
"github.com/jarcoal/httpmock" "github.com/jarcoal/httpmock"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"offline_twitter/scraper" . "offline_twitter/scraper"
) )
func TestParseSingleUser(t *testing.T) { func TestParseSingleUser(t *testing.T) {
assert := assert.New(t)
data, err := ioutil.ReadFile("test_responses/michael_malice_user_profile.json") data, err := ioutil.ReadFile("test_responses/michael_malice_user_profile.json")
if err != nil { if err != nil {
panic(err) panic(err)
} }
var user_resp scraper.UserResponse var user_resp UserResponse
err = json.Unmarshal(data, &user_resp) err = json.Unmarshal(data, &user_resp)
if err != nil { require.NoError(t, err)
t.Errorf(err.Error())
}
apiUser := user_resp.ConvertToAPIUser() apiUser := user_resp.ConvertToAPIUser()
user, err := scraper.ParseSingleUser(apiUser) user, err := ParseSingleUser(apiUser)
if err != nil { require.NoError(t, err)
t.Errorf(err.Error())
}
expected_id := 44067298 assert.Equal(UserID(44067298), user.ID)
if user.ID != scraper.UserID(expected_id) { assert.Equal("Michael Malice", user.DisplayName)
t.Errorf("Expected %q, got %q", expected_id, user.ID) assert.Equal(UserHandle("michaelmalice"), user.Handle)
} assert.Equal("Author of Dear Reader, The New Right & The Anarchist Handbook\nHost of \"YOUR WELCOME\" \nSubject of Ego & Hubris by Harvey Pekar\nUnderwear Model\nHe/Him ⚑", user.Bio)
if user.DisplayName != "Michael Malice" { assert.Equal(941, user.FollowingCount)
t.Errorf("Expected %q, got %q", "Michael Malice", user.DisplayName) assert.Equal(208589, user.FollowersCount)
} assert.Equal("Brooklyn", user.Location)
if user.Handle != "michaelmalice" { assert.Equal("https://amzn.to/3oInafv", user.Website)
t.Errorf("Expected %q, got %q", "michaelmalice", user.Handle) assert.Equal(int64(1243920952), user.JoinDate.Unix())
} assert.False(user.IsPrivate)
expectedBio := "Author of Dear Reader, The New Right & The Anarchist Handbook\nHost of \"YOUR WELCOME\" \nSubject of Ego & Hubris by Harvey Pekar\nUnderwear Model\nHe/Him ⚑" assert.True (user.IsVerified)
if user.Bio != expectedBio { assert.False(user.IsBanned)
t.Errorf("Expected %q, got %q", expectedBio, user.Bio) assert.Equal("https://pbs.twimg.com/profile_images/1064051934812913664/Lbwdb_C9.jpg", user.ProfileImageUrl)
} assert.Equal("https://pbs.twimg.com/profile_images/1064051934812913664/Lbwdb_C9_normal.jpg", user.GetTinyProfileImageUrl())
if user.FollowingCount != 941 { assert.Equal("https://pbs.twimg.com/profile_banners/44067298/1615134676", user.BannerImageUrl)
t.Errorf("Expected %d, got %d", 941, user.FollowingCount) assert.Equal("michaelmalice_profile_Lbwdb_C9.jpg", user.ProfileImageLocalPath)
} assert.Equal("michaelmalice_banner_1615134676.jpg", user.BannerImageLocalPath)
if user.FollowersCount != 208589 { assert.Equal(TweetID(1403835414373339136), user.PinnedTweetID)
t.Errorf("Expected %d, got %d", 941, user.FollowersCount)
}
if user.Location != "Brooklyn" {
t.Errorf("Expected %q, got %q", "Brooklyn", user.Location)
}
if user.Website != "https://amzn.to/3oInafv" {
t.Errorf("Expected %q, got %q", "https://amzn.to/3oInafv", user.Website)
}
if user.JoinDate.Unix() != 1243920952 {
t.Errorf("Expected %d, got %d", 1243920952, user.JoinDate.Unix())
}
if user.IsPrivate != false {
t.Errorf("Expected %v, got %v", false, user.IsPrivate)
}
if user.IsVerified != true {
t.Errorf("Expected %v, got %v", true, user.IsPrivate)
}
if user.IsBanned != false {
t.Errorf("User should not be banned")
}
expectedProfileImage := "https://pbs.twimg.com/profile_images/1064051934812913664/Lbwdb_C9.jpg"
if user.ProfileImageUrl != expectedProfileImage {
t.Errorf("Expected %q, got %q", expectedProfileImage, user.ProfileImageUrl)
}
expected_tiny_profile_image := "https://pbs.twimg.com/profile_images/1064051934812913664/Lbwdb_C9_normal.jpg"
if user.GetTinyProfileImageUrl() != expected_tiny_profile_image {
t.Errorf("Expected %q, got %q", expected_tiny_profile_image, user.GetTinyProfileImageUrl())
}
expectedBannerImage := "https://pbs.twimg.com/profile_banners/44067298/1615134676"
if user.BannerImageUrl != expectedBannerImage {
t.Errorf("Expected %q, got %q", expectedBannerImage, user.BannerImageUrl)
}
expected_profile_image_local := "michaelmalice_profile_Lbwdb_C9.jpg"
if user.ProfileImageLocalPath != expected_profile_image_local {
t.Errorf("Expected %q, got %q", expected_profile_image_local, user.ProfileImageLocalPath)
}
expected_banner_image_local := "michaelmalice_banner_1615134676.jpg"
if user.BannerImageLocalPath != expected_banner_image_local {
t.Errorf("Expected %q, got %q", expected_banner_image_local, user.BannerImageLocalPath)
}
expected_id = 1403835414373339136
if user.PinnedTweetID != scraper.TweetID(expected_id) {
t.Errorf("Expected %q, got %q", expected_id, user.PinnedTweet)
}
} }
/** /**
* Should correctly parse a banned user * Should correctly parse a banned user
*/ */
func TestParseBannedUser(t *testing.T) { func TestParseBannedUser(t *testing.T) {
assert := assert.New(t)
data, err := ioutil.ReadFile("test_responses/suspended_user.json") data, err := ioutil.ReadFile("test_responses/suspended_user.json")
if err != nil { if err != nil {
panic(err) panic(err)
} }
var user_resp scraper.UserResponse var user_resp UserResponse
err = json.Unmarshal(data, &user_resp) err = json.Unmarshal(data, &user_resp)
if err != nil { require.NoError(t, err)
t.Fatalf(err.Error())
}
apiUser := user_resp.ConvertToAPIUser() apiUser := user_resp.ConvertToAPIUser()
user, err := scraper.ParseSingleUser(apiUser) user, err := ParseSingleUser(apiUser)
if err != nil { require.NoError(t, err)
t.Fatalf(err.Error()) assert.Equal(UserID(193918550), user.ID)
} assert.True(user.IsBanned)
if user.ID != 193918550 {
t.Errorf("Expected id %d, got %d", 193918550, user.ID)
}
if user.IsBanned != true {
t.Errorf("Expected user to be banned")
}
// Test generation of profile images for banned user // Test generation of profile images for banned user
if user.GetTinyProfileImageUrl() != "https://abs.twimg.com/sticky/default_profile_images/default_profile.png" { assert.Equal("https://abs.twimg.com/sticky/default_profile_images/default_profile.png", user.GetTinyProfileImageUrl())
t.Errorf("Incorrect tiny profile image URL for banned user: %q", user.GetTinyProfileImageUrl()) assert.Equal("default_profile.png", user.GetTinyProfileImageLocalPath())
}
if user.GetTinyProfileImageLocalPath() != "default_profile.png" {
t.Errorf("Incorrect tiny profile image local path for banned user: %q", user.GetTinyProfileImageLocalPath())
}
} }
/** /**
* Should extract a user handle from a tweet URL, or fail if URL is invalid * Should extract a user handle from a tweet URL, or fail if URL is invalid
*/ */
func TestParseHandleFromTweetUrl(t *testing.T) { func TestParseHandleFromTweetUrl(t *testing.T) {
assert := assert.New(t)
// Test valid tweet url // Test valid tweet url
url := "https://twitter.com/kanesays23/status/1429583672827465730" url := "https://twitter.com/kanesays23/status/1429583672827465730"
expected_user := scraper.UserHandle("kanesays23") result, err := ParseHandleFromTweetUrl(url)
result, err := scraper.ParseHandleFromTweetUrl(url) assert.NoError(err)
if err != nil { assert.Equal(UserHandle("kanesays23"), result)
t.Errorf("Unexpected error while parsing handle: %s", err)
}
if result != expected_user {
t.Errorf("Expected handle %q, got %q", expected_user, result)
}
// Test url with GET params // Test url with GET params
result, err = scraper.ParseHandleFromTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720?s=20") result, err = ParseHandleFromTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720?s=20")
expected_user = scraper.UserHandle("NerdNoticing") assert.NoError(err)
if err != nil { assert.Equal(UserHandle("NerdNoticing"), result)
t.Errorf("Unexpected error while parsing handle: %s", err)
}
if result != expected_user {
t.Errorf("Expected handle %q, got %q", expected_user, result)
}
// Test invalid url // Test invalid url
_, err = scraper.ParseHandleFromTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720s=20") _, err = ParseHandleFromTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720s=20")
if err == nil { assert.Error(err)
t.Errorf("Should have produced an error for invalid URL")
}
// Test empty string // Test empty string
_, err = scraper.ParseHandleFromTweetUrl("") _, err = ParseHandleFromTweetUrl("")
if err == nil { assert.Error(err)
t.Errorf("Should have produced an error for invalid URL")
}
} }
@ -170,6 +104,8 @@ func TestParseHandleFromTweetUrl(t *testing.T) {
* Should extract a user handle from a shortened tweet URL * Should extract a user handle from a shortened tweet URL
*/ */
func TestParseHandleFromShortenedTweetUrl(t *testing.T) { func TestParseHandleFromShortenedTweetUrl(t *testing.T) {
assert := assert.New(t)
short_url := "https://t.co/rZVrNGJyDe" short_url := "https://t.co/rZVrNGJyDe"
expanded_url := "https://twitter.com/MarkSnyderJr1/status/1460857606147350529" expanded_url := "https://twitter.com/MarkSnyderJr1/status/1460857606147350529"
@ -183,16 +119,9 @@ func TestParseHandleFromShortenedTweetUrl(t *testing.T) {
}) })
// Check the httmock interceptor is working correctly // Check the httmock interceptor is working correctly
if scraper.ExpandShortUrl(short_url) != expanded_url { require.Equal(t, expanded_url, ExpandShortUrl(short_url), "httpmock didn't intercept the request")
t.Fatalf("httpmock didn't intercept the request")
}
result, err := scraper.ParseHandleFromTweetUrl(short_url) result, err := ParseHandleFromTweetUrl(short_url)
if err != nil { require.NoError(t, err)
t.Errorf(err.Error()) assert.Equal(UserHandle("MarkSnyderJr1"), result)
}
expected_user := scraper.UserHandle("MarkSnyderJr1")
if result != expected_user {
t.Errorf("Expected user %q, got %q", expected_user, result)
}
} }

View File

@ -5,65 +5,33 @@ import (
"io/ioutil" "io/ioutil"
"encoding/json" "encoding/json"
"offline_twitter/scraper" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
. "offline_twitter/scraper"
) )
func TestParseAPIVideo(t *testing.T) { func TestParseAPIVideo(t *testing.T) {
assert := assert.New(t)
data, err := ioutil.ReadFile("test_responses/tweet_content/video.json") data, err := ioutil.ReadFile("test_responses/tweet_content/video.json")
if err != nil { if err != nil {
panic(err) panic(err)
} }
var apivideo scraper.APIExtendedMedia var apivideo APIExtendedMedia
err = json.Unmarshal(data, &apivideo) err = json.Unmarshal(data, &apivideo)
if err != nil { require.NoError(t, err)
t.Fatal(err.Error())
}
tweet_id := scraper.TweetID(28)
video := scraper.ParseAPIVideo(apivideo, tweet_id)
expected_id := 1418951950020845568 tweet_id := TweetID(28)
if video.ID != scraper.VideoID(expected_id) { video := ParseAPIVideo(apivideo, tweet_id)
t.Errorf("Expected ID of %d, got %d", expected_id, video.ID) assert.Equal(VideoID(1418951950020845568), video.ID)
} assert.Equal(tweet_id, video.TweetID)
if video.TweetID != tweet_id { assert.Equal(1280, video.Height)
t.Errorf("Expected ID of %d, got %d", tweet_id, video.TweetID) assert.Equal(720, video.Width)
} assert.Equal("https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12", video.RemoteURL)
assert.Equal("28.mp4", video.LocalFilename)
expected_height := 1280 assert.Equal("https://pbs.twimg.com/ext_tw_video_thumb/1418951950020845568/pu/img/eUTaYYfuAJ8FyjUi.jpg", video.ThumbnailRemoteUrl)
if video.Height != expected_height { assert.Equal("eUTaYYfuAJ8FyjUi.jpg", video.ThumbnailLocalPath)
t.Errorf("Expected height %d, got %d", expected_height, video.Height) assert.Equal(275952, video.ViewCount)
} assert.Equal(88300, video.Duration)
expected_width := 720 assert.False(video.IsDownloaded)
if video.Width != expected_width {
t.Errorf("Expected width %d, got %d", expected_width, video.Width)
}
expected_remote_url := "https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12"
if video.RemoteURL != expected_remote_url {
t.Errorf("Expected %q, got %q", expected_remote_url, video.RemoteURL)
}
expected_local_filename := "28.mp4"
if video.LocalFilename != expected_local_filename {
t.Errorf("Expected %q, got %q", expected_local_filename, video.LocalFilename)
}
expected_thumbnail_url := "https://pbs.twimg.com/ext_tw_video_thumb/1418951950020845568/pu/img/eUTaYYfuAJ8FyjUi.jpg"
if video.ThumbnailRemoteUrl != expected_thumbnail_url {
t.Errorf("Expected %q, got %q", expected_thumbnail_url, video.ThumbnailRemoteUrl)
}
expected_thumbnail_filename := "eUTaYYfuAJ8FyjUi.jpg"
if video.ThumbnailLocalPath != expected_thumbnail_filename {
t.Errorf("Expected %q, got %q", expected_thumbnail_filename, video.ThumbnailLocalPath)
}
expected_view_count := 275952
if video.ViewCount != expected_view_count {
t.Errorf("Expected view count %d, got %d", expected_view_count, video.ViewCount)
}
expected_duration := 88300
if video.Duration != expected_duration {
t.Errorf("Expected duration %d, got %d", expected_duration, video.Duration)
}
if video.IsDownloaded {
t.Errorf("Expected it not to be downloaded, but it was")
}
} }