Refactor scraper tests

- use Stretchr assert package for assertions
- use "dot imports" to remove "scraper.XYZ" from everything
This commit is contained in:
Alessio 2022-01-31 19:14:14 -08:00
parent babbdecf6c
commit 069ab83fba
11 changed files with 362 additions and 814 deletions

View File

@ -5,17 +5,21 @@ import (
"io/ioutil"
"encoding/json"
"offline_twitter/scraper"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
. "offline_twitter/scraper"
)
func TestNormalizeContent(t *testing.T) {
assert := assert.New(t)
test_cases := []struct {
filename string
eventual_full_text string
quoted_status_id scraper.TweetID
in_reply_to_id scraper.TweetID
retweeted_status_id scraper.TweetID
quoted_status_id TweetID
in_reply_to_id TweetID
retweeted_status_id TweetID
reply_mentions string
} {
{"test_responses/single_tweets/tweet_that_is_a_reply_with_gif.json", "", 0, 1395882872729477131, 0, "@michaelmalice"},
@ -35,77 +39,53 @@ func TestNormalizeContent(t *testing.T) {
if err != nil {
panic(err)
}
var tweet scraper.APITweet
var tweet APITweet
err = json.Unmarshal(data, &tweet)
if err != nil {
println("Failed at " + v.filename)
t.Errorf(err.Error())
}
assert.NoError(err, "Failed at " + v.filename)
tweet.NormalizeContent()
if tweet.FullText != v.eventual_full_text {
t.Errorf("Expected %q, got %q", v.eventual_full_text, tweet.FullText)
}
if scraper.TweetID(tweet.QuotedStatusID) != v.quoted_status_id {
t.Errorf("Expected quoted status %d, but got %d", v.quoted_status_id, tweet.QuotedStatusID)
}
if scraper.TweetID(tweet.InReplyToStatusID) != v.in_reply_to_id {
t.Errorf("Expected in_reply_to_id id %d, but got %d", v.in_reply_to_id, tweet.InReplyToStatusID)
}
if scraper.TweetID(tweet.RetweetedStatusID) != v.retweeted_status_id {
t.Errorf("Expected retweeted status id %d, but got %d", v.retweeted_status_id, tweet.RetweetedStatusID)
}
if tweet.Entities.ReplyMentions != v.reply_mentions {
t.Errorf("Expected @reply mentions to be %q, but it was %q", v.reply_mentions, tweet.Entities.ReplyMentions)
}
assert.Equal(v.eventual_full_text, tweet.FullText, "Tweet text")
assert.Equal(int64(v.quoted_status_id), tweet.QuotedStatusID, "Quoted status ID")
assert.Equal(int64(v.in_reply_to_id), tweet.InReplyToStatusID, "In reply to ID")
assert.Equal(int64(v.retweeted_status_id), tweet.RetweetedStatusID, "Retweeted status ID")
assert.Equal(v.reply_mentions, tweet.Entities.ReplyMentions, "Reply mentions")
}
}
func TestUserProfileToAPIUser(t *testing.T) {
assert := assert.New(t)
data, err := ioutil.ReadFile("test_responses/michael_malice_user_profile.json")
if err != nil {
panic(err)
}
var user_resp scraper.UserResponse
var user_resp UserResponse
err = json.Unmarshal(data, &user_resp)
if err != nil {
t.Errorf(err.Error())
}
assert.NoError(err)
result := user_resp.ConvertToAPIUser()
if result.ID != 44067298 {
t.Errorf("Expected ID %q, got %q", 44067298, result.ID)
}
if result.FollowersCount != user_resp.Data.User.Legacy.FollowersCount {
t.Errorf("Expected user count %d, got %d", user_resp.Data.User.Legacy.FollowersCount, result.FollowersCount)
}
assert.Equal(int64(44067298), result.ID)
assert.Equal(user_resp.Data.User.Legacy.FollowersCount, result.FollowersCount)
}
func TestGetCursor(t *testing.T) {
assert := assert.New(t)
data, err := ioutil.ReadFile("test_responses/midriffs_anarchist_cookbook.json")
if err != nil {
panic(err)
}
var tweet_resp scraper.TweetResponse
var tweet_resp TweetResponse
err = json.Unmarshal(data, &tweet_resp)
if err != nil {
t.Errorf(err.Error())
}
assert.NoError(err)
expected_cursor := "LBmGhsC+ibH1peAmgICjpbS0m98mgICj7a2lmd8mhsC4rbmsmN8mgMCqkbT1p+AmgsC4ucv4o+AmhoCyrf+nlt8mhMC9qfOwlt8mJQISAAA="
actual_cursor := tweet_resp.GetCursor()
if expected_cursor != actual_cursor {
t.Errorf("Expected %q, got %q", expected_cursor, actual_cursor)
}
assert.Equal("LBmGhsC+ibH1peAmgICjpbS0m98mgICj7a2lmd8mhsC4rbmsmN8mgMCqkbT1p+AmgsC4ucv4o+AmhoCyrf+nlt8mhMC9qfOwlt8mJQISAAA=", tweet_resp.GetCursor())
}
func TestIsEndOfFeed(t *testing.T) {
assert := assert.New(t)
test_cases := []struct {
filename string
is_end_of_feed bool
@ -118,130 +98,86 @@ func TestIsEndOfFeed(t *testing.T) {
if err != nil {
panic(err)
}
var tweet_resp scraper.TweetResponse
var tweet_resp TweetResponse
err = json.Unmarshal(data, &tweet_resp)
if err != nil {
t.Fatalf(err.Error())
}
result := tweet_resp.IsEndOfFeed()
if v.is_end_of_feed != result {
t.Errorf("Expected IsEndOfFeed to be %v, but got %v", v.is_end_of_feed, result)
}
assert.NoError(err)
assert.Equal(v.is_end_of_feed, tweet_resp.IsEndOfFeed())
}
}
func TestHandleTombstonesHidden(t *testing.T) {
assert := assert.New(t)
data, err := ioutil.ReadFile("test_responses/tombstones/tombstone_hidden_1.json")
if err != nil {
panic(err)
}
var tweet_resp scraper.TweetResponse
var tweet_resp TweetResponse
err = json.Unmarshal(data, &tweet_resp)
if err != nil {
t.Fatalf(err.Error())
}
if len(tweet_resp.GlobalObjects.Tweets) != 2 {
t.Fatalf("Should have started with %d tweets, but had %d instead", 2, len(tweet_resp.GlobalObjects.Tweets))
}
require.NoError(t, err)
assert.Equal(2, len(tweet_resp.GlobalObjects.Tweets), "Before tombstone handling")
tweet_resp.HandleTombstones()
if len(tweet_resp.GlobalObjects.Tweets) != 4 {
t.Errorf("Should have ended up with %d tweets, but had %d instead", 4, len(tweet_resp.GlobalObjects.Tweets))
}
assert.Equal(4, len(tweet_resp.GlobalObjects.Tweets), "After tombstone handling")
first_tombstone, ok := tweet_resp.GlobalObjects.Tweets["1454522147750260742"]
if !ok {
t.Errorf("Missing tombstoned tweet for %s", "1454522147750260742")
}
if first_tombstone.ID != 1454522147750260742 {
t.Errorf("Expected ID %d, got %d instead", 1454522147750260742, first_tombstone.ID)
}
if first_tombstone.UserID != 1365863538393309184 {
t.Errorf("Expected UserID %d, got %d instead", 1365863538393309184, first_tombstone.UserID)
}
if first_tombstone.TombstoneText != "hidden" {
t.Errorf("Wrong tombstone text: %s", first_tombstone.TombstoneText)
if assert.True(ok, "Missing tombstone") {
assert.Equal(int64(1454522147750260742), first_tombstone.ID)
assert.Equal(int64(1365863538393309184), first_tombstone.UserID)
assert.Equal("hidden", first_tombstone.TombstoneText)
}
second_tombstone, ok := tweet_resp.GlobalObjects.Tweets["1454515503242829830"]
if !ok {
t.Errorf("Missing tombstoned tweet for %s", "1454515503242829830")
}
if second_tombstone.ID != 1454515503242829830 {
t.Errorf("Expected ID %d, got %d instead", 1454515503242829830, second_tombstone.ID)
}
if second_tombstone.UserID != 1365863538393309184 {
t.Errorf("Expected UserID %d, got %d instead", 1365863538393309184, second_tombstone.UserID)
}
if second_tombstone.TombstoneText != "hidden" {
t.Errorf("Wrong tombstone text: %s", second_tombstone.TombstoneText)
if assert.True(ok, "Missing tombstone") {
assert.Equal(int64(1454515503242829830), second_tombstone.ID)
assert.Equal(int64(1365863538393309184), second_tombstone.UserID)
assert.Equal("hidden", second_tombstone.TombstoneText)
}
}
func TestHandleTombstonesDeleted(t *testing.T) {
assert := assert.New(t)
data, err := ioutil.ReadFile("test_responses/tombstones/tombstone_deleted.json")
if err != nil {
panic(err)
}
var tweet_resp scraper.TweetResponse
var tweet_resp TweetResponse
err = json.Unmarshal(data, &tweet_resp)
if err != nil {
t.Fatalf(err.Error())
}
require.NoError(t, err)
assert.Equal(1, len(tweet_resp.GlobalObjects.Tweets), "Before tombstone handling")
if len(tweet_resp.GlobalObjects.Tweets) != 1 {
t.Fatalf("Should have started with %d tweets, but had %d instead", 1, len(tweet_resp.GlobalObjects.Tweets))
}
tweet_resp.HandleTombstones()
if len(tweet_resp.GlobalObjects.Tweets) != 2 {
t.Errorf("Should have ended up with %d tweets, but had %d instead", 2, len(tweet_resp.GlobalObjects.Tweets))
}
assert.Equal(2, len(tweet_resp.GlobalObjects.Tweets), "After tombstone handling")
tombstone, ok := tweet_resp.GlobalObjects.Tweets["1454521654781136902"]
if !ok {
t.Errorf("Missing tombstoned tweet for %s", "1454521654781136902")
}
if tombstone.ID != 1454521654781136902 {
t.Errorf("Expected ID %d, got %d instead", 1454521654781136902, tombstone.ID)
}
if tombstone.UserID != 1218687933391298560 {
t.Errorf("Expected UserID %d, got %d instead", 1218687933391298560, tombstone.UserID)
}
if tombstone.TombstoneText != "deleted" {
t.Errorf("Wrong tombstone text: %s", tombstone.TombstoneText)
if assert.True(ok) {
assert.Equal(int64(1454521654781136902), tombstone.ID)
assert.Equal(int64(1218687933391298560), tombstone.UserID)
assert.Equal("deleted", tombstone.TombstoneText)
}
}
func TestHandleTombstonesUnavailable(t *testing.T) {
assert := assert.New(t)
data, err := ioutil.ReadFile("test_responses/tombstones/tombstone_unavailable.json")
if err != nil {
panic(err)
}
var tweet_resp scraper.TweetResponse
var tweet_resp TweetResponse
err = json.Unmarshal(data, &tweet_resp)
if err != nil {
t.Fatalf(err.Error())
}
require.NoError(t, err)
assert.Equal(2, len(tweet_resp.GlobalObjects.Tweets), "Before tombstone handling")
if len(tweet_resp.GlobalObjects.Tweets) != 2 {
t.Fatalf("Should have started with %d tweets, but had %d instead", 2, len(tweet_resp.GlobalObjects.Tweets))
}
tweet_resp.HandleTombstones()
if len(tweet_resp.GlobalObjects.Tweets) != 3 {
t.Errorf("Should have ended up with %d tweets, but had %d instead", 3, len(tweet_resp.GlobalObjects.Tweets))
}
assert.Equal(3, len(tweet_resp.GlobalObjects.Tweets), "After tombstone handling")
tombstone, ok := tweet_resp.GlobalObjects.Tweets["1452686887651532809"]
if !ok {
t.Errorf("Missing tombstoned tweet for %s", "1452686887651532809")
}
if tombstone.ID != 1452686887651532809 {
t.Errorf("Expected ID %d, got %d instead", 1452686887651532809, tombstone.ID)
}
if tombstone.UserID != 1241389617502445569 {
t.Errorf("Expected UserID %d, got %d instead", 1241389617502445569, tombstone.UserID)
}
if tombstone.TombstoneText != "unavailable" {
t.Errorf("Wrong tombstone text: %s", tombstone.TombstoneText)
if assert.True(ok) {
assert.Equal(int64(1452686887651532809), tombstone.ID)
assert.Equal(int64(1241389617502445569), tombstone.UserID)
assert.Equal("unavailable", tombstone.TombstoneText)
}
}

View File

@ -6,10 +6,10 @@ import (
"encoding/json"
"fmt"
. "offline_twitter/scraper"
"github.com/stretchr/testify/assert"
)
. "offline_twitter/scraper"
)
/**
* Parse an APIV2User

View File

@ -1,12 +1,14 @@
package scraper_test
import "testing"
import "fmt"
import "offline_twitter/scraper"
import (
"testing"
"fmt"
. "offline_twitter/scraper"
)
// Makes an HTTP request
func TestGetGuestToken(t *testing.T) {
token, err := scraper.GetGuestToken()
token, err := GetGuestToken()
if err != nil {
t.Errorf("%v", err)
}
@ -22,6 +24,6 @@ func TestGetGuestToken(t *testing.T) {
// other than the first use the cache.
func BenchmarkGetGuestToken(b *testing.B) {
for i := 0; i < b.N; i++ {
_, _ = scraper.GetGuestToken()
_, _ = GetGuestToken()
}
}

View File

@ -5,42 +5,27 @@ import (
"io/ioutil"
"encoding/json"
"offline_twitter/scraper"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
. "offline_twitter/scraper"
)
func TestParseAPIMedia(t *testing.T) {
assert := assert.New(t)
data, err := ioutil.ReadFile("test_responses/tweet_content/image.json")
if err != nil {
panic(err)
}
var apimedia scraper.APIMedia
var apimedia APIMedia
err = json.Unmarshal(data, &apimedia)
if err != nil {
t.Fatal(err.Error())
}
image := scraper.ParseAPIMedia(apimedia)
require.NoError(t, err)
expected_id := 1395882862289772553
if image.ID != scraper.ImageID(expected_id) {
t.Errorf("Expected ID of %q, got %q", expected_id, image.ID)
}
expected_remote_url := "https://pbs.twimg.com/media/E18sEUrWYAk8dBl.jpg"
if image.RemoteURL != expected_remote_url {
t.Errorf("Expected %q, got %q", expected_remote_url, image.RemoteURL)
}
expected_width := 593
if image.Width != expected_width {
t.Errorf("Expected width %d, got %d", expected_width, image.Width)
}
expected_height := 239
if image.Height != expected_height {
t.Errorf("Expected height %d, got %d", expected_height, image.Height)
}
expected_local_filename := "E18sEUrWYAk8dBl.jpg"
if image.LocalFilename != expected_local_filename {
t.Errorf("Expected %q, got %q", expected_local_filename, image.LocalFilename)
}
if image.IsDownloaded {
t.Errorf("Expected it not to be downloaded, but it was")
}
image := ParseAPIMedia(apimedia)
assert.Equal(ImageID(1395882862289772553), image.ID)
assert.Equal("https://pbs.twimg.com/media/E18sEUrWYAk8dBl.jpg", image.RemoteURL)
assert.Equal(593, image.Width)
assert.Equal(239, image.Height)
assert.Equal("E18sEUrWYAk8dBl.jpg", image.LocalFilename)
assert.False(image.IsDownloaded)
}

View File

@ -6,7 +6,9 @@ import (
"net/http"
"net/http/httptest"
"offline_twitter/scraper"
"github.com/stretchr/testify/assert"
. "offline_twitter/scraper"
)
@ -18,8 +20,5 @@ func TestExpandShortUrl(t *testing.T) {
}))
defer srvr.Close()
result := scraper.ExpandShortUrl(srvr.URL)
if result != redirecting_to {
t.Errorf("Expected %q, got %q", redirecting_to, result)
}
assert.Equal(t, redirecting_to, ExpandShortUrl(srvr.URL))
}

View File

@ -5,96 +5,63 @@ import (
"io/ioutil"
"encoding/json"
"offline_twitter/scraper"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
. "offline_twitter/scraper"
)
func TestParsePoll2Choices(t *testing.T) {
assert := assert.New(t)
data, err := ioutil.ReadFile("test_responses/tweet_content/poll_card_2_options.json")
if err != nil {
panic(err)
}
var apiCard scraper.APICard
var apiCard APICard
err = json.Unmarshal(data, &apiCard)
if err != nil {
t.Fatal(err.Error())
}
require.NoError(t, err)
poll := scraper.ParseAPIPoll(apiCard)
if poll.ID != 1457419248461131776 {
t.Errorf("Expected ID %d, got %d", 1457419248461131776, poll.ID)
}
if poll.NumChoices != 2 {
t.Errorf("Expected %d choices, got %d", 2, poll.NumChoices)
}
if poll.VotingDuration != 60 * 60 * 24 {
t.Errorf("Expected duratino %d, got %d", 60 * 60 * 24, poll.VotingDuration)
}
expected_ending := int64(1636397201)
if poll.VotingEndsAt.Unix() != expected_ending {
t.Errorf("Expected closing time %d, got %d", expected_ending, poll.VotingEndsAt.Unix())
}
expected_last_updated := int64(1636318755)
if poll.LastUpdatedAt.Unix() != expected_last_updated {
t.Errorf("Expected last-updated time %d, got %d", expected_last_updated, poll.LastUpdatedAt.Unix())
}
if expected_last_updated > expected_ending {
t.Errorf("Last updated should be before poll closes!")
}
poll := ParseAPIPoll(apiCard)
assert.Equal(PollID(1457419248461131776), poll.ID)
assert.Equal(2, poll.NumChoices)
assert.Equal(60 * 60 * 24, poll.VotingDuration)
assert.Equal(int64(1636397201), poll.VotingEndsAt.Unix())
assert.Equal(int64(1636318755), poll.LastUpdatedAt.Unix())
if poll.Choice1 != "Yes" || poll.Choice2 != "No" {
t.Errorf("Expected %q and %q, got %q and %q", "Yes", "No", poll.Choice1, poll.Choice2)
}
if poll.Choice1_Votes != 529 {
t.Errorf("Expected %d votes for choice 1, got %d", 529, poll.Choice1_Votes)
}
if poll.Choice2_Votes != 2182 {
t.Errorf("Expected %d votes for choice 2, got %d", 2182, poll.Choice2_Votes)
}
assert.Less(poll.LastUpdatedAt.Unix(), poll.VotingEndsAt.Unix())
assert.Equal("Yes", poll.Choice1)
assert.Equal("No", poll.Choice2)
assert.Equal(529, poll.Choice1_Votes)
assert.Equal(2182, poll.Choice2_Votes)
}
func TestParsePoll4Choices(t *testing.T) {
assert := assert.New(t)
data, err := ioutil.ReadFile("test_responses/tweet_content/poll_card_4_options_ended.json")
if err != nil {
panic(err)
}
var apiCard scraper.APICard
var apiCard APICard
err = json.Unmarshal(data, &apiCard)
if err != nil {
t.Fatal(err.Error())
}
require.NoError(t, err)
poll := scraper.ParseAPIPoll(apiCard)
if poll.ID != 1455611588854140929 {
t.Errorf("Expected ID %d, got %d", 1455611588854140929, poll.ID)
}
if poll.NumChoices != 4 {
t.Errorf("Expected %d choices, got %d", 4, poll.NumChoices)
}
if poll.VotingDuration != 60 * 60 * 24 {
t.Errorf("Expected duratino %d, got %d", 60 * 60 * 24, poll.VotingDuration)
}
expected_ending := int64(1635966221)
if poll.VotingEndsAt.Unix() != expected_ending {
t.Errorf("Expected closing time %d, got %d", expected_ending, poll.VotingEndsAt.Unix())
}
expected_last_updated := int64(1635966226)
if poll.LastUpdatedAt.Unix() != expected_last_updated {
t.Errorf("Expected last-updated time %d, got %d", expected_last_updated, poll.LastUpdatedAt.Unix())
}
if expected_last_updated < expected_ending {
t.Errorf("Last updated should be after poll closes!")
}
poll := ParseAPIPoll(apiCard)
assert.Equal(PollID(1455611588854140929), poll.ID)
assert.Equal(4, poll.NumChoices)
assert.Equal(60 * 60 * 24, poll.VotingDuration)
assert.Equal(int64(1635966221), poll.VotingEndsAt.Unix())
assert.Equal(int64(1635966226), poll.LastUpdatedAt.Unix())
assert.Greater(poll.LastUpdatedAt.Unix(), poll.VotingEndsAt.Unix())
if poll.Choice1 != "Alec Baldwin" || poll.Choice1_Votes != 1669 {
t.Errorf("Expected %q with %d, got %q with %d", "Alec Baldwin", 1669, poll.Choice1, poll.Choice1_Votes)
}
if poll.Choice2 != "Andew Cuomo" || poll.Choice2_Votes != 272 {
t.Errorf("Expected %q with %d, got %q with %d", "Andew Cuomo", 272, poll.Choice2, poll.Choice2_Votes)
}
if poll.Choice3 != "George Floyd" || poll.Choice3_Votes != 829 {
t.Errorf("Expected %q with %d, got %q with %d", "George Floyd", 829, poll.Choice3, poll.Choice3_Votes)
}
if poll.Choice4 != "Derek Chauvin" || poll.Choice4_Votes != 2397 {
t.Errorf("Expected %q with %d, got %q with %d", "Derek Chauvin", 2397, poll.Choice4, poll.Choice4_Votes)
}
assert.Equal("Alec Baldwin", poll.Choice1)
assert.Equal(1669, poll.Choice1_Votes)
assert.Equal("Andew Cuomo", poll.Choice2)
assert.Equal(272, poll.Choice2_Votes)
assert.Equal("George Floyd", poll.Choice3)
assert.Equal(829, poll.Choice3_Votes)
assert.Equal("Derek Chauvin", poll.Choice4)
assert.Equal(2397, poll.Choice4_Votes)
}

View File

@ -5,38 +5,27 @@ import (
"io/ioutil"
"testing"
"offline_twitter/scraper"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
. "offline_twitter/scraper"
)
func TestParseSingleRetweet(t *testing.T) {
assert := assert.New(t)
data, err := ioutil.ReadFile("test_responses/tweet_that_is_a_retweet.json")
if err != nil {
panic(err)
}
var api_tweet scraper.APITweet
var api_tweet APITweet
err = json.Unmarshal(data, &api_tweet)
if err != nil {
t.Errorf(err.Error())
}
require.NoError(t, err)
retweet, err := scraper.ParseSingleRetweet(api_tweet)
if err != nil {
t.Errorf(err.Error())
}
expected_id := 1404270043018448896
if retweet.RetweetID != scraper.TweetID(1404270043018448896) {
t.Errorf("Expected %d, got %d", expected_id, retweet.RetweetID)
}
expected_id = 1404269989646028804
if retweet.TweetID != scraper.TweetID(expected_id) {
t.Errorf("Expected %d, got %d", expected_id, retweet.TweetID)
}
expected_id = 44067298
if retweet.RetweetedByID != scraper.UserID(expected_id) {
t.Errorf("Expected %d, got %d", expected_id, retweet.RetweetedByID)
}
expected_id = 1623639042
if retweet.RetweetedAt.Unix() != int64(expected_id) {
t.Errorf("Expected %d, got %d", expected_id, retweet.RetweetedAt.Unix())
}
retweet, err := ParseSingleRetweet(api_tweet)
require.NoError(t, err)
assert.Equal(TweetID(1404270043018448896), retweet.RetweetID)
assert.Equal(TweetID(1404269989646028804), retweet.TweetID)
assert.Equal(UserID(44067298), retweet.RetweetedByID)
assert.Equal(int64(1623639042), retweet.RetweetedAt.Unix())
}

View File

@ -5,20 +5,23 @@ import (
"io/ioutil"
"testing"
"offline_twitter/scraper"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
. "offline_twitter/scraper"
)
func load_tweet_from_file(filename string) scraper.Tweet{
func load_tweet_from_file(filename string) Tweet{
data, err := ioutil.ReadFile(filename)
if err != nil {
panic(err)
}
var apitweet scraper.APITweet
var apitweet APITweet
err = json.Unmarshal(data, &apitweet)
if err != nil {
panic(err)
}
tweet, err := scraper.ParseSingleTweet(apitweet)
tweet, err := ParseSingleTweet(apitweet)
if err != nil {
panic(err)
}
@ -27,300 +30,160 @@ func load_tweet_from_file(filename string) scraper.Tweet{
func TestParseSingleTweet(t *testing.T) {
assert := assert.New(t)
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_unicode_chars.json")
expected_text := "The fact that @michaelmalice new book The Anarchist Handbook is just absolutely destroying on the charts is the largest white pill Ive swallowed in years."
actual_text := tweet.Text
if actual_text != expected_text {
t.Errorf("Expected: %q; got %q", expected_text, actual_text)
}
if len(tweet.Mentions) != 1 || tweet.Mentions[0] != "michaelmalice" {
t.Errorf("Expected %v, got %v", []string{"michaelmalice"}, tweet.Mentions)
}
if len(tweet.Urls) != 0 {
t.Errorf("Expected %d urls, but got %d", 0, len(tweet.Urls))
}
if tweet.PostedAt.Unix() != 1621639105 {
t.Errorf("Expected %d, got %d", 1621639105, tweet.PostedAt.Unix())
}
if tweet.QuotedTweetID != 0 {
t.Errorf("Incorrectly believes it quote-tweets tweet with ID %d", tweet.QuotedTweetID)
}
if len(tweet.Polls) != 0 {
t.Errorf("Should not have any polls")
}
assert.Equal("The fact that @michaelmalice new book The Anarchist Handbook is just absolutely destroying on the charts is the largest white pill Ive swallowed in years.", tweet.Text)
assert.Len(tweet.Mentions, 1)
assert.Contains(tweet.Mentions, UserHandle("michaelmalice"))
assert.Empty(tweet.Urls)
assert.Equal(int64(1621639105), tweet.PostedAt.Unix())
assert.Zero(tweet.QuotedTweetID)
assert.Empty(tweet.Polls)
}
func TestParseTweetWithImage(t *testing.T) {
assert := assert.New(t)
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_image.json")
expected_text := "this saddens me every time"
if tweet.Text != expected_text {
t.Errorf("Expected: %q, got: %q", expected_text, tweet.Text)
}
if len(tweet.Images) != 1 {
t.Errorf("Expected 1 images but got %d", len(tweet.Images))
}
assert.Equal("this saddens me every time", tweet.Text)
assert.Len(tweet.Images, 1)
}
func TestParseTweetWithQuotedTweetAsLink(t *testing.T) {
assert := assert.New(t)
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_quoted_tweet_as_link2.json")
expected_text := "sometimes they're too dimwitted to even get the wrong title right"
if tweet.Text != expected_text {
t.Errorf("Expected: %q, got: %q", expected_text, tweet.Text)
}
expected_replied_id := scraper.TweetID(1395882872729477131)
if tweet.InReplyToID != expected_replied_id {
t.Errorf("Expected %q, got %q", expected_replied_id, tweet.InReplyToID)
}
if len(tweet.ReplyMentions) != 0 {
t.Errorf("Wanted %v, got %v", []string{}, tweet.ReplyMentions)
}
expected_quoted_id := scraper.TweetID(1396194494710788100)
if tweet.QuotedTweetID != expected_quoted_id {
t.Errorf("Should be a quoted tweet with ID %d, but got %d instead", expected_quoted_id, tweet.QuotedTweetID)
}
if len(tweet.Polls) != 0 {
t.Errorf("Should not have any polls")
}
assert.Equal("sometimes they're too dimwitted to even get the wrong title right", tweet.Text)
assert.Equal(TweetID(1395882872729477131), tweet.InReplyToID)
assert.Equal(TweetID(1396194494710788100), tweet.QuotedTweetID)
assert.Empty(tweet.ReplyMentions)
assert.Empty(tweet.Polls)
}
func TestParseTweetWithVideo(t *testing.T) {
assert := assert.New(t)
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_video.json")
assert.Empty(tweet.Images)
assert.Len(tweet.Videos, 1)
expected_video := "https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12"
if len(tweet.Videos) != 1 || tweet.Videos[0].RemoteURL != expected_video {
t.Errorf("Expected video URL %q, but got %+v", expected_video, tweet.Videos)
}
if tweet.Videos[0].IsGif != false {
t.Errorf("Expected it to be a regular video, but it was a gif")
}
if len(tweet.Images) != 0 {
t.Errorf("Should not have any images, but has %d", len(tweet.Images))
}
v := tweet.Videos[0]
assert.Equal("https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12", v.RemoteURL)
assert.False(v.IsGif)
}
func TestParseTweetWithGif(t *testing.T) {
assert := assert.New(t)
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_that_is_a_reply_with_gif.json")
assert.Len(tweet.Videos, 1)
expected_video := "https://video.twimg.com/tweet_video/E189-VhVoAYcrDv.mp4"
if len(tweet.Videos) != 1 {
t.Errorf("Expected 1 video (a gif), but got %d instead", len(tweet.Videos))
}
if tweet.Videos[0].RemoteURL != expected_video {
t.Errorf("Expected video URL %q, but got %+v", expected_video, tweet.Videos)
}
if tweet.Videos[0].IsGif != true {
t.Errorf("Expected video to be a gif, but it wasn't")
}
v := tweet.Videos[0]
assert.Equal("https://video.twimg.com/tweet_video/E189-VhVoAYcrDv.mp4", v.RemoteURL)
assert.True(v.IsGif)
}
func TestParseTweetWithUrl(t *testing.T) {
assert := assert.New(t)
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_url_card.json")
assert.Empty(tweet.Polls)
assert.Len(tweet.Urls, 1)
if len(tweet.Urls) != 1 {
t.Errorf("Expected %d urls, but got %d", 1, len(tweet.Urls))
}
expected_url_text := "https://reason.com/2021/08/30/la-teachers-union-cecily-myart-cruz-learning-loss/"
if tweet.Urls[0].Text != expected_url_text {
t.Errorf("Expected Url text to be %q, but got %q", expected_url_text, tweet.Urls[0].Text)
}
if !tweet.Urls[0].HasCard {
t.Errorf("Expected it to have a card, but it doesn't")
}
expected_url_domain := "reason.com"
if tweet.Urls[0].Domain != expected_url_domain {
t.Errorf("Expected Url text to be %q, but got %q", expected_url_domain, tweet.Urls[0].Domain)
}
if len(tweet.Polls) != 0 {
t.Errorf("Should not have any polls")
}
u := tweet.Urls[0]
assert.Equal("https://reason.com/2021/08/30/la-teachers-union-cecily-myart-cruz-learning-loss/", u.Text)
assert.True(u.HasCard)
assert.Equal("reason.com", u.Domain)
}
func TestParseTweetWithUrlButNoCard(t *testing.T) {
assert := assert.New(t)
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_url_but_no_card.json")
assert.Len(tweet.Urls, 1)
if len(tweet.Urls) != 1 {
t.Errorf("Expected %d urls, but got %d", 1, len(tweet.Urls))
}
expected_url_text := "https://www.politico.com/newsletters/west-wing-playbook/2021/09/16/the-jennifer-rubin-wh-symbiosis-494364"
if tweet.Urls[0].Text != expected_url_text {
t.Errorf("Expected Url text to be %q, but got %q", expected_url_text, tweet.Urls[0].Text)
}
if tweet.Urls[0].HasCard {
t.Errorf("Expected url not to have a card, but it thinks it has one")
}
u := tweet.Urls[0]
assert.Equal("https://www.politico.com/newsletters/west-wing-playbook/2021/09/16/the-jennifer-rubin-wh-symbiosis-494364", u.Text)
assert.False(u.HasCard)
}
func TestParseTweetWithMultipleUrls(t *testing.T) {
assert := assert.New(t)
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_multiple_urls.json")
assert.Empty(tweet.Polls)
assert.Len(tweet.Urls, 3)
if len(tweet.Urls) != 3 {
t.Errorf("Expected %d urls, got %d instead", 3, len(tweet.Urls))
}
if tweet.Urls[0].HasCard {
t.Errorf("Expected url not to have a card, but it does: %d", 0)
}
if tweet.Urls[1].HasCard {
t.Errorf("Expected url not to have a card, but it does: %d", 1)
}
if !tweet.Urls[2].HasCard {
t.Errorf("Expected url to have a card, but it doesn't: %d", 2)
}
expected_title := "Bidens victory came from the suburbs"
if tweet.Urls[2].Title != expected_title {
t.Errorf("Expected title to be %q, but got %q", expected_title, tweet.Urls[2].Title)
}
assert.False(tweet.Urls[0].HasCard)
assert.False(tweet.Urls[1].HasCard)
assert.True (tweet.Urls[2].HasCard)
if len(tweet.Polls) != 0 {
t.Errorf("Should not have any polls")
}
assert.Equal("Bidens victory came from the suburbs", tweet.Urls[2].Title)
}
func TestTweetWithLotsOfReplyMentions(t *testing.T) {
assert := assert.New(t)
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_at_mentions_in_front.json")
assert.Len(tweet.ReplyMentions, 4)
if len(tweet.ReplyMentions) != 4 {
t.Errorf("Expected %d reply-mentions, got %d", 4, len(tweet.ReplyMentions))
}
for i, v := range []scraper.UserHandle{"rob_mose", "primalpoly", "jmasseypoet", "SpaceX"} {
if tweet.ReplyMentions[i] != v {
t.Errorf("Expected %q, got %q at position %d", v, tweet.ReplyMentions[i], i)
}
for i, v := range []UserHandle{"rob_mose", "primalpoly", "jmasseypoet", "SpaceX"} {
assert.Equal(v, tweet.ReplyMentions[i])
}
}
func TestTweetWithPoll(t *testing.T) {
assert := assert.New(t)
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_poll_4_choices.json")
assert.Len(tweet.Polls, 1)
if len(tweet.Polls) != 1 {
t.Fatalf("Expected there to be 1 poll, but there was %d", len(tweet.Polls))
}
p := tweet.Polls[0]
if p.TweetID != tweet.ID {
t.Errorf("Poll's TweetID (%d) should match the tweet's ID (%d)", p.TweetID, tweet.ID)
}
if p.NumChoices != 4 {
t.Errorf("Expected %d choices, got %d instead", 4, p.NumChoices)
}
expected_choice1 := "Tribal armband"
if p.Choice1 != expected_choice1 {
t.Errorf("Expected choice1 %q, got %q", expected_choice1, p.Choice1)
}
expected_choice2 := "Marijuana leaf"
if p.Choice2 != expected_choice2 {
t.Errorf("Expected choice2 %q, got %q", expected_choice2, p.Choice2)
}
expected_choice3 := "Butterfly"
if p.Choice3 != expected_choice3 {
t.Errorf("Expected choice3 %q, got %q", expected_choice3, p.Choice3)
}
expected_choice4 := "Maple leaf"
if p.Choice4 != expected_choice4 {
t.Errorf("Expected choice4 %q, got %q", expected_choice4, p.Choice4)
}
expected_votes1 := 1593
expected_votes2 := 624
expected_votes3 := 778
expected_votes4 := 1138
if p.Choice1_Votes != expected_votes1 {
t.Errorf("Expected Choice1_Votes %d, got %d", expected_votes1, p.Choice1_Votes)
}
if p.Choice2_Votes != expected_votes2 {
t.Errorf("Expected Choice2_Votes %d, got %d", expected_votes2, p.Choice2_Votes)
}
if p.Choice3_Votes != expected_votes3 {
t.Errorf("Expected Choice3_Votes %d, got %d", expected_votes3, p.Choice3_Votes)
}
if p.Choice4_Votes != expected_votes4 {
t.Errorf("Expected Choice4_Votes %d, got %d", expected_votes4, p.Choice4_Votes)
}
expected_duration := 1440 * 60
if p.VotingDuration != expected_duration {
t.Errorf("Expected voting duration %d seconds, got %d", expected_duration, p.VotingDuration)
}
expected_ends_at := int64(1638331934)
if p.VotingEndsAt.Unix() != expected_ends_at {
t.Errorf("Expected voting ends at %d (unix), got %d", expected_ends_at, p.VotingEndsAt.Unix())
}
expected_last_updated_at := int64(1638331935)
if p.LastUpdatedAt.Unix() != expected_last_updated_at {
t.Errorf("Expected updated %d, got %d", expected_last_updated_at, p.LastUpdatedAt.Unix())
}
assert.Equal(tweet.ID, p.TweetID)
assert.Equal(4, p.NumChoices)
assert.Equal("Tribal armband", p.Choice1)
assert.Equal("Marijuana leaf", p.Choice2)
assert.Equal("Butterfly", p.Choice3)
assert.Equal("Maple leaf", p.Choice4)
assert.Equal(1593, p.Choice1_Votes)
assert.Equal(624, p.Choice2_Votes)
assert.Equal(778, p.Choice3_Votes)
assert.Equal(1138, p.Choice4_Votes)
assert.Equal(1440 * 60, p.VotingDuration)
assert.Equal(int64(1638331934), p.VotingEndsAt.Unix())
assert.Equal(int64(1638331935), p.LastUpdatedAt.Unix())
}
func TestParseTweetResponse(t *testing.T) {
assert := assert.New(t)
data, err := ioutil.ReadFile("test_responses/michael_malice_feed.json")
if err != nil {
panic(err)
}
var tweet_resp scraper.TweetResponse
var tweet_resp TweetResponse
err = json.Unmarshal(data, &tweet_resp)
if err != nil {
t.Errorf(err.Error())
}
require.NoError(t, err)
tweets, retweets, users, err := scraper.ParseTweetResponse(tweet_resp)
if err != nil {
t.Errorf(err.Error())
}
tweets, retweets, users, err := ParseTweetResponse(tweet_resp)
require.NoError(t, err)
if len(tweets) != 29 - 3 {
t.Errorf("Expected %d tweets, got %d", 29-3, len(tweets))
}
if len(retweets) != 3 {
t.Errorf("Expected %d retweets, got %d", 3, len(retweets))
}
if len(users) != 9 {
t.Errorf("Expected %d users, got %d", 9, len(users))
}
assert.Len(tweets, 29 - 3)
assert.Len(retweets, 3)
assert.Len(users, 9)
}
func TestParseTweetResponseWithTombstones(t *testing.T) {
assert := assert.New(t)
data, err := ioutil.ReadFile("test_responses/tombstones/tombstone_deleted.json")
if err != nil {
panic(err)
}
var tweet_resp scraper.TweetResponse
var tweet_resp TweetResponse
err = json.Unmarshal(data, &tweet_resp)
if err != nil {
t.Errorf(err.Error())
}
require.NoError(t, err)
extra_users := tweet_resp.HandleTombstones()
if len(extra_users) != 1 {
t.Errorf("Expected to need 1 extra user but got %d instead", len(extra_users))
}
assert.Len(extra_users, 1)
tweets, retweets, users, err := scraper.ParseTweetResponse(tweet_resp)
if err != nil {
t.Fatal(err)
}
tweets, retweets, users, err := ParseTweetResponse(tweet_resp)
require.NoError(t, err)
if len(tweets) != 2 {
t.Errorf("Expected %d tweets, got %d", 2, len(tweets))
}
if len(retweets) != 0 {
t.Errorf("Expected %d retweets, got %d", 0, len(retweets))
}
if len(users) != 1 {
t.Errorf("Expected %d users, got %d", 1, len(users))
}
assert.Len(tweets, 2)
assert.Len(retweets, 0)
assert.Len(users, 1)
}

View File

@ -5,182 +5,92 @@ import (
"io/ioutil"
"encoding/json"
"offline_twitter/scraper"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
. "offline_twitter/scraper"
)
func TestParseAPIUrlCard(t *testing.T) {
assert := assert.New(t)
data, err := ioutil.ReadFile("test_responses/tweet_content/url_card.json")
if err != nil {
panic(err)
}
var apiCard scraper.APICard
var apiCard APICard
err = json.Unmarshal(data, &apiCard)
if err != nil {
t.Fatal(err.Error())
}
url := scraper.ParseAPIUrlCard(apiCard)
require.NoError(t, err)
expected_domain := "reason.com"
if url.Domain != expected_domain {
t.Errorf("Expected %q, got %q", expected_domain, url.Domain)
}
expected_title := "L.A. Teachers Union Leader: 'There's No Such Thing As Learning Loss'"
if url.Title != expected_title {
t.Errorf("Expected %q, got %q", expected_title, url.Title)
}
expected_description := "\"Its OK that our babies may not have learned all their times tables,\" says Cecily Myart-Cruz. \"They learned resilience.\""
if url.Description != expected_description {
t.Errorf("Expected %q, got %q", expected_description, url.Description)
}
expected_width := 600
if url.ThumbnailWidth != expected_width {
t.Errorf("Expected thumbnail width %d, got %d", expected_width, url.ThumbnailWidth)
}
expected_height := 315
if url.ThumbnailHeight != expected_height {
t.Errorf("Expected thumbnail height %d, got %d", expected_height, url.ThumbnailHeight)
}
expected_remote_url := "https://pbs.twimg.com/card_img/1434998862305968129/odDi9EqO?format=jpg&name=600x600"
if url.ThumbnailRemoteUrl != expected_remote_url {
t.Errorf("Expected %q, got %q", expected_remote_url, url.ThumbnailRemoteUrl)
}
expected_local_filename := "odDi9EqO_600x600.jpg"
if url.ThumbnailLocalPath != expected_local_filename {
t.Errorf("Expected %q, got %q", expected_local_filename, url.ThumbnailLocalPath)
}
expected_creator_id := scraper.UserID(155581583)
if url.CreatorID != expected_creator_id {
t.Errorf("Expected %d, got %d", expected_creator_id, url.CreatorID)
}
expected_site_id := scraper.UserID(16467567)
if url.SiteID != expected_site_id {
t.Errorf("Expected %d, got %d", expected_site_id, url.SiteID)
}
if !url.HasThumbnail {
t.Errorf("Should have a thumbnail, but it doesn't")
}
if url.IsContentDownloaded {
t.Errorf("Expected it not to be downloaded, but it was")
}
url := ParseAPIUrlCard(apiCard)
assert.Equal("reason.com", url.Domain)
assert.Equal("L.A. Teachers Union Leader: 'There's No Such Thing As Learning Loss'", url.Title)
assert.Equal("\"Its OK that our babies may not have learned all their times tables,\" says Cecily Myart-Cruz. \"They learned resilience.\"", url.Description)
assert.Equal(600, url.ThumbnailWidth)
assert.Equal(315, url.ThumbnailHeight)
assert.Equal("https://pbs.twimg.com/card_img/1434998862305968129/odDi9EqO?format=jpg&name=600x600", url.ThumbnailRemoteUrl)
assert.Equal("odDi9EqO_600x600.jpg", url.ThumbnailLocalPath)
assert.Equal(UserID(155581583), url.CreatorID)
assert.Equal(UserID(16467567), url.SiteID)
assert.True(url.HasThumbnail)
assert.False(url.IsContentDownloaded)
}
func TestParseAPIUrlCardWithPlayer(t *testing.T) {
assert := assert.New(t)
data, err := ioutil.ReadFile("test_responses/tweet_content/url_card_with_player.json")
if err != nil {
panic(err)
}
var apiCard scraper.APICard
var apiCard APICard
err = json.Unmarshal(data, &apiCard)
if err != nil {
t.Fatal(err.Error())
}
url := scraper.ParseAPIUrlCard(apiCard)
require.NoError(t, err)
expected_domain := "www.youtube.com"
if url.Domain != expected_domain {
t.Errorf("Expected %q, got %q", expected_domain, url.Domain)
}
expected_title := "The Politically Incorrect Guide to the Constitution (Starring Tom..."
if url.Title != expected_title {
t.Errorf("Expected %q, got %q", expected_title, url.Title)
}
expected_description := "Watch this episode on LBRY/Odysee: https://odysee.com/@capitalresearch:5/the-politically-incorrect-guide-to-the:8Watch this episode on Rumble: https://rumble..."
if url.Description != expected_description {
t.Errorf("Expected %q, got %q", expected_description, url.Description)
}
expected_remote_url := "https://pbs.twimg.com/card_img/1437849456423194639/_1t0btyt?format=jpg&name=800x320_1"
if url.ThumbnailRemoteUrl != expected_remote_url {
t.Errorf("Expected %q, got %q", expected_remote_url, url.ThumbnailRemoteUrl)
}
expected_local_filename := "_1t0btyt_800x320_1.jpg"
if url.ThumbnailLocalPath != expected_local_filename {
t.Errorf("Expected %q, got %q", expected_local_filename, url.ThumbnailLocalPath)
}
expected_site_id := scraper.UserID(10228272)
if url.SiteID != expected_site_id {
t.Errorf("Expected %d, got %d", expected_site_id, url.SiteID)
}
if !url.HasThumbnail {
t.Errorf("Should have a thumbnail, but it doesn't")
}
if url.IsContentDownloaded {
t.Errorf("Expected it not to be downloaded, but it was")
}
url := ParseAPIUrlCard(apiCard)
assert.Equal("www.youtube.com", url.Domain)
assert.Equal("The Politically Incorrect Guide to the Constitution (Starring Tom...", url.Title)
assert.Equal("Watch this episode on LBRY/Odysee: https://odysee.com/@capitalresearch:5/the-politically-incorrect-guide-to-the:8Watch this episode on Rumble: https://rumble...", url.Description)
assert.Equal("https://pbs.twimg.com/card_img/1437849456423194639/_1t0btyt?format=jpg&name=800x320_1", url.ThumbnailRemoteUrl)
assert.Equal("_1t0btyt_800x320_1.jpg", url.ThumbnailLocalPath)
assert.Equal(UserID(10228272), url.SiteID)
assert.True(url.HasThumbnail)
assert.False(url.IsContentDownloaded)
}
func TestParseAPIUrlCardWithPlayerAndPlaceholderThumbnail(t *testing.T) {
assert := assert.New(t)
data, err := ioutil.ReadFile("test_responses/tweet_content/url_card_with_player_placeholder_image.json")
if err != nil {
panic(err)
}
var apiCard scraper.APICard
var apiCard APICard
err = json.Unmarshal(data, &apiCard)
if err != nil {
t.Fatal(err.Error())
}
url := scraper.ParseAPIUrlCard(apiCard)
require.NoError(t, err)
expected_domain := "www.youtube.com"
if url.Domain != expected_domain {
t.Errorf("Expected %q, got %q", expected_domain, url.Domain)
}
expected_title := "Did Michael Malice Turn Me into an Anarchist? | Ep 181"
if url.Title != expected_title {
t.Errorf("Expected %q, got %q", expected_title, url.Title)
}
expected_description := "SUBSCRIBE TO THE NEW SHOW W/ ELIJAH & SYDNEY: \"YOU ARE HERE\"YT: https://www.youtube.com/youareheredaily______________________________________________________..."
if url.Description != expected_description {
t.Errorf("Expected %q, got %q", expected_description, url.Description)
}
expected_remote_url := "https://pbs.twimg.com/cards/player-placeholder.png"
if url.ThumbnailRemoteUrl != expected_remote_url {
t.Errorf("Expected %q, got %q", expected_remote_url, url.ThumbnailRemoteUrl)
}
expected_local_filename := "player-placeholder.png"
if url.ThumbnailLocalPath != expected_local_filename {
t.Errorf("Expected %q, got %q", expected_local_filename, url.ThumbnailLocalPath)
}
expected_site_id := scraper.UserID(10228272)
if url.SiteID != expected_site_id {
t.Errorf("Expected %d, got %d", expected_site_id, url.SiteID)
}
if !url.HasThumbnail {
t.Errorf("Should have a thumbnail, but it doesn't")
}
if url.IsContentDownloaded {
t.Errorf("Expected it not to be downloaded, but it was")
}
url := ParseAPIUrlCard(apiCard)
assert.Equal("www.youtube.com", url.Domain)
assert.Equal("Did Michael Malice Turn Me into an Anarchist? | Ep 181", url.Title)
assert.Equal("SUBSCRIBE TO THE NEW SHOW W/ ELIJAH & SYDNEY: \"YOU ARE HERE\"YT: https://www.youtube.com/youareheredaily______________________________________________________...", url.Description)
assert.Equal("https://pbs.twimg.com/cards/player-placeholder.png", url.ThumbnailRemoteUrl)
assert.Equal("player-placeholder.png", url.ThumbnailLocalPath)
assert.Equal(UserID(10228272), url.SiteID)
assert.True(url.HasThumbnail)
assert.False(url.IsContentDownloaded)
}
func TestParseAPIUrlCardWithoutThumbnail(t *testing.T) {
assert := assert.New(t)
data, err := ioutil.ReadFile("test_responses/tweet_content/url_card_without_thumbnail.json")
if err != nil {
panic(err)
}
var apiCard scraper.APICard
var apiCard APICard
err = json.Unmarshal(data, &apiCard)
if err != nil {
t.Fatal(err.Error())
}
url := scraper.ParseAPIUrlCard(apiCard)
require.NoError(t, err)
expected_domain := "en.m.wikipedia.org"
if url.Domain != expected_domain {
t.Errorf("Expected %q, got %q", expected_domain, url.Domain)
}
expected_title := "Entryism - Wikipedia"
if url.Title != expected_title {
t.Errorf("Expected %q, got %q", expected_title, url.Title)
}
expected_description := ""
if url.Description != expected_description {
t.Errorf("Expected %q, got %q", expected_description, url.Description)
}
if !url.HasCard {
t.Errorf("Expected it to have a card, but it didn't")
}
if url.HasThumbnail {
t.Errorf("Should have no thumbnail, but it does")
}
url := ParseAPIUrlCard(apiCard)
assert.Equal("en.m.wikipedia.org", url.Domain)
assert.Equal("Entryism - Wikipedia", url.Title)
assert.Equal("", url.Description)
assert.True(url.HasCard)
assert.False(url.HasThumbnail)
}

View File

@ -7,162 +7,96 @@ import (
"net/http"
"github.com/jarcoal/httpmock"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"offline_twitter/scraper"
. "offline_twitter/scraper"
)
func TestParseSingleUser(t *testing.T) {
assert := assert.New(t)
data, err := ioutil.ReadFile("test_responses/michael_malice_user_profile.json")
if err != nil {
panic(err)
}
var user_resp scraper.UserResponse
var user_resp UserResponse
err = json.Unmarshal(data, &user_resp)
if err != nil {
t.Errorf(err.Error())
}
require.NoError(t, err)
apiUser := user_resp.ConvertToAPIUser()
user, err := scraper.ParseSingleUser(apiUser)
if err != nil {
t.Errorf(err.Error())
}
user, err := ParseSingleUser(apiUser)
require.NoError(t, err)
expected_id := 44067298
if user.ID != scraper.UserID(expected_id) {
t.Errorf("Expected %q, got %q", expected_id, user.ID)
}
if user.DisplayName != "Michael Malice" {
t.Errorf("Expected %q, got %q", "Michael Malice", user.DisplayName)
}
if user.Handle != "michaelmalice" {
t.Errorf("Expected %q, got %q", "michaelmalice", user.Handle)
}
expectedBio := "Author of Dear Reader, The New Right & The Anarchist Handbook\nHost of \"YOUR WELCOME\" \nSubject of Ego & Hubris by Harvey Pekar\nUnderwear Model\nHe/Him ⚑"
if user.Bio != expectedBio {
t.Errorf("Expected %q, got %q", expectedBio, user.Bio)
}
if user.FollowingCount != 941 {
t.Errorf("Expected %d, got %d", 941, user.FollowingCount)
}
if user.FollowersCount != 208589 {
t.Errorf("Expected %d, got %d", 941, user.FollowersCount)
}
if user.Location != "Brooklyn" {
t.Errorf("Expected %q, got %q", "Brooklyn", user.Location)
}
if user.Website != "https://amzn.to/3oInafv" {
t.Errorf("Expected %q, got %q", "https://amzn.to/3oInafv", user.Website)
}
if user.JoinDate.Unix() != 1243920952 {
t.Errorf("Expected %d, got %d", 1243920952, user.JoinDate.Unix())
}
if user.IsPrivate != false {
t.Errorf("Expected %v, got %v", false, user.IsPrivate)
}
if user.IsVerified != true {
t.Errorf("Expected %v, got %v", true, user.IsPrivate)
}
if user.IsBanned != false {
t.Errorf("User should not be banned")
}
expectedProfileImage := "https://pbs.twimg.com/profile_images/1064051934812913664/Lbwdb_C9.jpg"
if user.ProfileImageUrl != expectedProfileImage {
t.Errorf("Expected %q, got %q", expectedProfileImage, user.ProfileImageUrl)
}
expected_tiny_profile_image := "https://pbs.twimg.com/profile_images/1064051934812913664/Lbwdb_C9_normal.jpg"
if user.GetTinyProfileImageUrl() != expected_tiny_profile_image {
t.Errorf("Expected %q, got %q", expected_tiny_profile_image, user.GetTinyProfileImageUrl())
}
expectedBannerImage := "https://pbs.twimg.com/profile_banners/44067298/1615134676"
if user.BannerImageUrl != expectedBannerImage {
t.Errorf("Expected %q, got %q", expectedBannerImage, user.BannerImageUrl)
}
expected_profile_image_local := "michaelmalice_profile_Lbwdb_C9.jpg"
if user.ProfileImageLocalPath != expected_profile_image_local {
t.Errorf("Expected %q, got %q", expected_profile_image_local, user.ProfileImageLocalPath)
}
expected_banner_image_local := "michaelmalice_banner_1615134676.jpg"
if user.BannerImageLocalPath != expected_banner_image_local {
t.Errorf("Expected %q, got %q", expected_banner_image_local, user.BannerImageLocalPath)
}
expected_id = 1403835414373339136
if user.PinnedTweetID != scraper.TweetID(expected_id) {
t.Errorf("Expected %q, got %q", expected_id, user.PinnedTweet)
}
assert.Equal(UserID(44067298), user.ID)
assert.Equal("Michael Malice", user.DisplayName)
assert.Equal(UserHandle("michaelmalice"), user.Handle)
assert.Equal("Author of Dear Reader, The New Right & The Anarchist Handbook\nHost of \"YOUR WELCOME\" \nSubject of Ego & Hubris by Harvey Pekar\nUnderwear Model\nHe/Him ⚑", user.Bio)
assert.Equal(941, user.FollowingCount)
assert.Equal(208589, user.FollowersCount)
assert.Equal("Brooklyn", user.Location)
assert.Equal("https://amzn.to/3oInafv", user.Website)
assert.Equal(int64(1243920952), user.JoinDate.Unix())
assert.False(user.IsPrivate)
assert.True (user.IsVerified)
assert.False(user.IsBanned)
assert.Equal("https://pbs.twimg.com/profile_images/1064051934812913664/Lbwdb_C9.jpg", user.ProfileImageUrl)
assert.Equal("https://pbs.twimg.com/profile_images/1064051934812913664/Lbwdb_C9_normal.jpg", user.GetTinyProfileImageUrl())
assert.Equal("https://pbs.twimg.com/profile_banners/44067298/1615134676", user.BannerImageUrl)
assert.Equal("michaelmalice_profile_Lbwdb_C9.jpg", user.ProfileImageLocalPath)
assert.Equal("michaelmalice_banner_1615134676.jpg", user.BannerImageLocalPath)
assert.Equal(TweetID(1403835414373339136), user.PinnedTweetID)
}
/**
* Should correctly parse a banned user
*/
func TestParseBannedUser(t *testing.T) {
assert := assert.New(t)
data, err := ioutil.ReadFile("test_responses/suspended_user.json")
if err != nil {
panic(err)
}
var user_resp scraper.UserResponse
var user_resp UserResponse
err = json.Unmarshal(data, &user_resp)
if err != nil {
t.Fatalf(err.Error())
}
require.NoError(t, err)
apiUser := user_resp.ConvertToAPIUser()
user, err := scraper.ParseSingleUser(apiUser)
if err != nil {
t.Fatalf(err.Error())
}
if user.ID != 193918550 {
t.Errorf("Expected id %d, got %d", 193918550, user.ID)
}
if user.IsBanned != true {
t.Errorf("Expected user to be banned")
}
user, err := ParseSingleUser(apiUser)
require.NoError(t, err)
assert.Equal(UserID(193918550), user.ID)
assert.True(user.IsBanned)
// Test generation of profile images for banned user
if user.GetTinyProfileImageUrl() != "https://abs.twimg.com/sticky/default_profile_images/default_profile.png" {
t.Errorf("Incorrect tiny profile image URL for banned user: %q", user.GetTinyProfileImageUrl())
}
if user.GetTinyProfileImageLocalPath() != "default_profile.png" {
t.Errorf("Incorrect tiny profile image local path for banned user: %q", user.GetTinyProfileImageLocalPath())
}
assert.Equal("https://abs.twimg.com/sticky/default_profile_images/default_profile.png", user.GetTinyProfileImageUrl())
assert.Equal("default_profile.png", user.GetTinyProfileImageLocalPath())
}
/**
* Should extract a user handle from a tweet URL, or fail if URL is invalid
*/
func TestParseHandleFromTweetUrl(t *testing.T) {
assert := assert.New(t)
// Test valid tweet url
url := "https://twitter.com/kanesays23/status/1429583672827465730"
expected_user := scraper.UserHandle("kanesays23")
result, err := scraper.ParseHandleFromTweetUrl(url)
if err != nil {
t.Errorf("Unexpected error while parsing handle: %s", err)
}
if result != expected_user {
t.Errorf("Expected handle %q, got %q", expected_user, result)
}
result, err := ParseHandleFromTweetUrl(url)
assert.NoError(err)
assert.Equal(UserHandle("kanesays23"), result)
// Test url with GET params
result, err = scraper.ParseHandleFromTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720?s=20")
expected_user = scraper.UserHandle("NerdNoticing")
if err != nil {
t.Errorf("Unexpected error while parsing handle: %s", err)
}
if result != expected_user {
t.Errorf("Expected handle %q, got %q", expected_user, result)
}
result, err = ParseHandleFromTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720?s=20")
assert.NoError(err)
assert.Equal(UserHandle("NerdNoticing"), result)
// Test invalid url
_, err = scraper.ParseHandleFromTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720s=20")
if err == nil {
t.Errorf("Should have produced an error for invalid URL")
}
_, err = ParseHandleFromTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720s=20")
assert.Error(err)
// Test empty string
_, err = scraper.ParseHandleFromTweetUrl("")
if err == nil {
t.Errorf("Should have produced an error for invalid URL")
}
_, err = ParseHandleFromTweetUrl("")
assert.Error(err)
}
@ -170,6 +104,8 @@ func TestParseHandleFromTweetUrl(t *testing.T) {
* Should extract a user handle from a shortened tweet URL
*/
func TestParseHandleFromShortenedTweetUrl(t *testing.T) {
assert := assert.New(t)
short_url := "https://t.co/rZVrNGJyDe"
expanded_url := "https://twitter.com/MarkSnyderJr1/status/1460857606147350529"
@ -183,16 +119,9 @@ func TestParseHandleFromShortenedTweetUrl(t *testing.T) {
})
// Check the httmock interceptor is working correctly
if scraper.ExpandShortUrl(short_url) != expanded_url {
t.Fatalf("httpmock didn't intercept the request")
}
require.Equal(t, expanded_url, ExpandShortUrl(short_url), "httpmock didn't intercept the request")
result, err := scraper.ParseHandleFromTweetUrl(short_url)
if err != nil {
t.Errorf(err.Error())
}
expected_user := scraper.UserHandle("MarkSnyderJr1")
if result != expected_user {
t.Errorf("Expected user %q, got %q", expected_user, result)
}
result, err := ParseHandleFromTweetUrl(short_url)
require.NoError(t, err)
assert.Equal(UserHandle("MarkSnyderJr1"), result)
}

View File

@ -5,65 +5,33 @@ import (
"io/ioutil"
"encoding/json"
"offline_twitter/scraper"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
. "offline_twitter/scraper"
)
func TestParseAPIVideo(t *testing.T) {
assert := assert.New(t)
data, err := ioutil.ReadFile("test_responses/tweet_content/video.json")
if err != nil {
panic(err)
}
var apivideo scraper.APIExtendedMedia
var apivideo APIExtendedMedia
err = json.Unmarshal(data, &apivideo)
if err != nil {
t.Fatal(err.Error())
}
tweet_id := scraper.TweetID(28)
video := scraper.ParseAPIVideo(apivideo, tweet_id)
require.NoError(t, err)
expected_id := 1418951950020845568
if video.ID != scraper.VideoID(expected_id) {
t.Errorf("Expected ID of %d, got %d", expected_id, video.ID)
}
if video.TweetID != tweet_id {
t.Errorf("Expected ID of %d, got %d", tweet_id, video.TweetID)
}
expected_height := 1280
if video.Height != expected_height {
t.Errorf("Expected height %d, got %d", expected_height, video.Height)
}
expected_width := 720
if video.Width != expected_width {
t.Errorf("Expected width %d, got %d", expected_width, video.Width)
}
expected_remote_url := "https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12"
if video.RemoteURL != expected_remote_url {
t.Errorf("Expected %q, got %q", expected_remote_url, video.RemoteURL)
}
expected_local_filename := "28.mp4"
if video.LocalFilename != expected_local_filename {
t.Errorf("Expected %q, got %q", expected_local_filename, video.LocalFilename)
}
expected_thumbnail_url := "https://pbs.twimg.com/ext_tw_video_thumb/1418951950020845568/pu/img/eUTaYYfuAJ8FyjUi.jpg"
if video.ThumbnailRemoteUrl != expected_thumbnail_url {
t.Errorf("Expected %q, got %q", expected_thumbnail_url, video.ThumbnailRemoteUrl)
}
expected_thumbnail_filename := "eUTaYYfuAJ8FyjUi.jpg"
if video.ThumbnailLocalPath != expected_thumbnail_filename {
t.Errorf("Expected %q, got %q", expected_thumbnail_filename, video.ThumbnailLocalPath)
}
expected_view_count := 275952
if video.ViewCount != expected_view_count {
t.Errorf("Expected view count %d, got %d", expected_view_count, video.ViewCount)
}
expected_duration := 88300
if video.Duration != expected_duration {
t.Errorf("Expected duration %d, got %d", expected_duration, video.Duration)
}
if video.IsDownloaded {
t.Errorf("Expected it not to be downloaded, but it was")
}
tweet_id := TweetID(28)
video := ParseAPIVideo(apivideo, tweet_id)
assert.Equal(VideoID(1418951950020845568), video.ID)
assert.Equal(tweet_id, video.TweetID)
assert.Equal(1280, video.Height)
assert.Equal(720, video.Width)
assert.Equal("https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12", video.RemoteURL)
assert.Equal("28.mp4", video.LocalFilename)
assert.Equal("https://pbs.twimg.com/ext_tw_video_thumb/1418951950020845568/pu/img/eUTaYYfuAJ8FyjUi.jpg", video.ThumbnailRemoteUrl)
assert.Equal("eUTaYYfuAJ8FyjUi.jpg", video.ThumbnailLocalPath)
assert.Equal(275952, video.ViewCount)
assert.Equal(88300, video.Duration)
assert.False(video.IsDownloaded)
}