offline-twitter/pkg/scraper/tweet_test.go
Alessio 81e6dc50be REFACTOR: rename some stuff in scraper apiv1
- rename TweetResponse -> APIv1Response
- rename Entry -> APIv1Entry
- rename APIv1Response.GetCursor -> GetCursorBottom
2024-12-23 12:37:28 -08:00

258 lines
8.4 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package scraper_test
import (
"encoding/json"
"os"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
)
func load_tweet_from_file(filename string) Tweet {
data, err := os.ReadFile(filename)
if err != nil {
panic(err)
}
var apitweet APITweet
err = json.Unmarshal(data, &apitweet)
if err != nil {
panic(err)
}
tweet, err := ParseSingleTweet(apitweet)
if err != nil {
panic(err)
}
return tweet
}
func TestParseSingleTweet(t *testing.T) {
assert := assert.New(t)
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_unicode_chars.json")
assert.Equal("The fact that @michaelmalice new book The Anarchist Handbook is just absolutely destroying on the charts is the "+
"largest white pill Ive swallowed in years.", tweet.Text)
assert.Len(tweet.Mentions, 1)
assert.Contains(tweet.Mentions, "michaelmalice")
assert.Empty(tweet.Urls)
assert.Equal(int64(1621639105), tweet.PostedAt.Unix())
assert.Zero(tweet.QuotedTweetID)
assert.Empty(tweet.Polls)
}
func TestParseTweetWithImage(t *testing.T) {
assert := assert.New(t)
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_image.json")
assert.Equal("this saddens me every time", tweet.Text)
assert.Len(tweet.Images, 1)
}
/**
* Ensure the fake url (link to the quoted tweet) is not parsed as a URL; it should just be ignored
*/
func TestParseTweetWithQuotedTweetAsLink(t *testing.T) {
assert := assert.New(t)
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_quoted_tweet_as_link2.json")
assert.Equal("sometimes they're too dimwitted to even get the wrong title right", tweet.Text)
assert.Equal(TweetID(1395882872729477131), tweet.InReplyToID)
assert.Equal(TweetID(1396194494710788100), tweet.QuotedTweetID)
assert.Empty(tweet.ReplyMentions)
assert.Empty(tweet.Polls)
assert.Empty(tweet.Urls)
}
/**
* Quote-tweets with links should work properly
*/
func TestParseTweetWithQuotedTweetAndLink(t *testing.T) {
assert := assert.New(t)
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_quoted_tweet_and_url.json")
assert.Equal("This is video hes talking about. Please watch. Is there a single US politician capable of doing this with the "+
"weasels and rats running American industry today?", tweet.Text)
assert.Equal(TweetID(1497997890999898115), tweet.QuotedTweetID)
assert.Len(tweet.Urls, 1)
url := tweet.Urls[0]
assert.Equal(url.Text, "https://youtu.be/VjrlTMvirVo")
}
func TestParseTweetWithVideo(t *testing.T) {
assert := assert.New(t)
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_video.json")
assert.Empty(tweet.Images)
assert.Len(tweet.Videos, 1)
v := tweet.Videos[0]
assert.Equal("https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12", v.RemoteURL)
assert.False(v.IsGif)
}
func TestParseTweetWith2Videos(t *testing.T) {
assert := assert.New(t)
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_2_videos.json")
assert.Empty(tweet.Images)
assert.Len(tweet.Videos, 2)
v1 := tweet.Videos[0]
assert.Equal("https://video.twimg.com/ext_tw_video/1579701730148847617/pu/vid/576x576/ghA0fyf58v-2naWR.mp4?tag=12", v1.RemoteURL)
assert.False(v1.IsGif)
assert.Equal("gh/ghA0fyf58v-2naWR.mp4", v1.LocalFilename)
assert.Equal("xU/xUlghaCXbPOVN7vI.jpg", v1.ThumbnailLocalPath)
v2 := tweet.Videos[1]
assert.Equal("https://video.twimg.com/ext_tw_video/1579701730157252608/pu/vid/480x480/VQ69Ut84XT2BgIzX.mp4?tag=12", v2.RemoteURL)
assert.False(v2.IsGif)
assert.Equal("VQ/VQ69Ut84XT2BgIzX.mp4", v2.LocalFilename)
assert.Equal("dY/dYN55HDytKvM1Bi8.jpg", v2.ThumbnailLocalPath)
}
func TestParseTweetWithImageAndVideo(t *testing.T) {
assert := assert.New(t)
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_image_and_video.json")
assert.Len(tweet.Images, 1)
assert.Len(tweet.Videos, 1)
img := tweet.Images[0]
assert.Equal(img.ID, ImageID(1579292192580911104))
assert.Equal(img.RemoteURL, "https://pbs.twimg.com/media/FerF4bdVQAAKeYJ.jpg")
vid := tweet.Videos[0]
assert.Equal(vid.ID, VideoID(1579292197752430592))
assert.Equal(vid.ThumbnailRemoteUrl, "https://pbs.twimg.com/ext_tw_video_thumb/1579292197752430592/pu/img/soG4wMWOy3AVpllM.jpg")
assert.Equal(vid.RemoteURL, "https://video.twimg.com/ext_tw_video/1579292197752430592/pu/vid/640x750/UE-PSqG2EE5N2dN8.mp4?tag=12")
}
func TestParseTweetWithGif(t *testing.T) {
assert := assert.New(t)
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_that_is_a_reply_with_gif.json")
assert.Len(tweet.Videos, 1)
v := tweet.Videos[0]
assert.Equal("https://video.twimg.com/tweet_video/E189-VhVoAYcrDv.mp4", v.RemoteURL)
assert.True(v.IsGif)
}
func TestParseTweetWithUrl(t *testing.T) {
assert := assert.New(t)
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_url_card.json")
assert.Empty(tweet.Polls)
assert.Len(tweet.Urls, 1)
u := tweet.Urls[0]
assert.Equal("https://reason.com/2021/08/30/la-teachers-union-cecily-myart-cruz-learning-loss/", u.Text)
assert.Equal("https://t.co/Y1lWjNEiPK", u.ShortText)
assert.True(u.HasCard)
assert.Equal("reason.com", u.Domain)
}
func TestParseTweetWithUrlButNoCard(t *testing.T) {
assert := assert.New(t)
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_url_but_no_card.json")
assert.Len(tweet.Urls, 1)
u := tweet.Urls[0]
assert.Equal("https://www.politico.com/newsletters/west-wing-playbook/2021/09/16/the-jennifer-rubin-wh-symbiosis-494364", u.Text)
assert.Equal("https://t.co/ZigZyLctwt", u.ShortText)
assert.False(u.HasCard)
}
func TestParseTweetWithMultipleUrls(t *testing.T) {
assert := assert.New(t)
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_multiple_urls.json")
assert.Empty(tweet.Polls)
assert.Len(tweet.Urls, 3)
assert.False(tweet.Urls[0].HasCard)
assert.False(tweet.Urls[1].HasCard)
assert.True(tweet.Urls[2].HasCard)
assert.Equal("Bidens victory came from the suburbs", tweet.Urls[2].Title)
}
func TestTweetWithLotsOfReplyMentions(t *testing.T) {
assert := assert.New(t)
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_at_mentions_in_front.json")
assert.Len(tweet.ReplyMentions, 4)
for i, v := range []string{"rob_mose", "primalpoly", "jmasseypoet", "SpaceX"} {
assert.Equal(v, tweet.ReplyMentions[i])
}
}
func TestTweetWithPoll(t *testing.T) {
assert := assert.New(t)
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_poll_4_choices.json")
assert.Len(tweet.Polls, 1)
p := tweet.Polls[0]
assert.Equal(tweet.ID, p.TweetID)
assert.Equal(4, p.NumChoices)
assert.Equal("Tribal armband", p.Choice1)
assert.Equal("Marijuana leaf", p.Choice2)
assert.Equal("Butterfly", p.Choice3)
assert.Equal("Maple leaf", p.Choice4)
assert.Equal(1593, p.Choice1_Votes)
assert.Equal(624, p.Choice2_Votes)
assert.Equal(778, p.Choice3_Votes)
assert.Equal(1138, p.Choice4_Votes)
assert.Equal(1440*60, p.VotingDuration)
assert.Equal(int64(1638331934), p.VotingEndsAt.Unix())
assert.Equal(int64(1638331935), p.LastUpdatedAt.Unix())
}
func TestTweetWithSpace(t *testing.T) {
assert := assert.New(t)
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_space_card.json")
assert.Len(tweet.Urls, 0)
assert.Len(tweet.Spaces, 1)
s := tweet.Spaces[0]
assert.Equal(SpaceID("1YpKkZVyQjoxj"), s.ID)
assert.Equal("https://t.co/WBPAHNF8Om", s.ShortUrl)
assert.False(s.IsDetailsFetched)
}
func TestParseAPIv1Response(t *testing.T) {
assert := assert.New(t)
data, err := os.ReadFile("test_responses/michael_malice_feed.json")
if err != nil {
panic(err)
}
var tweet_resp APIv1Response
err = json.Unmarshal(data, &tweet_resp)
require.NoError(t, err)
trove, err := tweet_resp.ToTweetTrove()
require.NoError(t, err)
assert.Len(trove.Tweets, 29-3)
assert.Len(trove.Retweets, 3)
assert.Len(trove.Users, 9)
}
func TestParseAPIv1ResponseWithTombstones(t *testing.T) {
assert := assert.New(t)
data, err := os.ReadFile("test_responses/tombstones/tombstone_deleted.json")
if err != nil {
panic(err)
}
var tweet_resp APIv1Response
err = json.Unmarshal(data, &tweet_resp)
require.NoError(t, err)
extra_users := tweet_resp.HandleTombstones()
assert.Len(extra_users, 1)
trove, err := tweet_resp.ToTweetTrove()
require.NoError(t, err)
assert.Len(trove.Tweets, 2)
assert.Len(trove.Retweets, 0)
assert.Len(trove.Users, 1)
}