REFACTOR: move a bunch of stuff around in the scraper to consolidate

- delete type-specific test files and put them in api_types_test
- remove an unneeded test file
This commit is contained in:
Alessio 2024-12-23 19:06:29 -08:00
parent 32531a3bd9
commit bd0e353771
8 changed files with 399 additions and 424 deletions

View File

@ -14,6 +14,10 @@ import (
"time"
)
// -------------------------------------------------------------------------
// Image content
// -------------------------------------------------------------------------
type APIMedia struct {
ID int64 `json:"id_str,string"`
MediaURLHttps string `json:"media_url_https"`
@ -38,6 +42,10 @@ func ParseAPIMedia(apiMedia APIMedia) Image {
}
}
// -------------------------------------------------------------------------
// Video content
// -------------------------------------------------------------------------
type Variant struct {
Bitrate int `json:"bitrate,omitempty"`
URL string `json:"url"`
@ -66,6 +74,60 @@ type APIExtendedMedia struct {
URL string `json:"url"` // For DM videos
}
func ParseAPIVideo(apiVideo APIExtendedMedia) Video {
variants := apiVideo.VideoInfo.Variants
slices.SortFunc(variants, func(a, b Variant) int { return b.Bitrate - a.Bitrate })
video_remote_url := variants[0].URL
var view_count int
r := apiVideo.Ext.MediaStats.R
switch r.(type) {
case string:
view_count = 0
case map[string]interface{}:
OK_entry, ok := r.(map[string]interface{})["ok"]
if !ok {
panic("No 'ok' value found in the R!")
}
view_count_str, ok := OK_entry.(map[string]interface{})["viewCount"]
view_count = int_or_panic(view_count_str.(string))
if !ok {
panic("No 'viewCount' value found in the OK!")
}
}
video_parsed_url, err := url.Parse(video_remote_url)
if err != nil {
panic(err)
}
local_filename := get_prefixed_path(path.Base(video_parsed_url.Path))
return Video{
ID: VideoID(apiVideo.ID),
Width: apiVideo.OriginalInfo.Width,
Height: apiVideo.OriginalInfo.Height,
RemoteURL: video_remote_url,
LocalFilename: local_filename,
ThumbnailRemoteUrl: apiVideo.MediaURLHttps,
ThumbnailLocalPath: get_prefixed_path(path.Base(apiVideo.MediaURLHttps)),
Duration: apiVideo.VideoInfo.Duration,
ViewCount: view_count,
IsDownloaded: false,
IsBlockedByDMCA: false,
IsGeoblocked: apiVideo.ExtMediaAvailability.Reason == "Geoblocked",
IsGif: apiVideo.Type == "animated_gif",
}
}
// -------------------------------------------------------------------------
// Cards: polls and urls
// -------------------------------------------------------------------------
type APICard struct {
Name string `json:"name"`
ShortenedUrl string `json:"url"`
@ -194,56 +256,6 @@ func parse_num_choices(card_name string) int {
return int_or_panic(card_name[4:5])
}
func ParseAPIVideo(apiVideo APIExtendedMedia) Video {
variants := apiVideo.VideoInfo.Variants
slices.SortFunc(variants, func(a, b Variant) int { return b.Bitrate - a.Bitrate })
video_remote_url := variants[0].URL
var view_count int
r := apiVideo.Ext.MediaStats.R
switch r.(type) {
case string:
view_count = 0
case map[string]interface{}:
OK_entry, ok := r.(map[string]interface{})["ok"]
if !ok {
panic("No 'ok' value found in the R!")
}
view_count_str, ok := OK_entry.(map[string]interface{})["viewCount"]
view_count = int_or_panic(view_count_str.(string))
if !ok {
panic("No 'viewCount' value found in the OK!")
}
}
video_parsed_url, err := url.Parse(video_remote_url)
if err != nil {
panic(err)
}
local_filename := get_prefixed_path(path.Base(video_parsed_url.Path))
return Video{
ID: VideoID(apiVideo.ID),
Width: apiVideo.OriginalInfo.Width,
Height: apiVideo.OriginalInfo.Height,
RemoteURL: video_remote_url,
LocalFilename: local_filename,
ThumbnailRemoteUrl: apiVideo.MediaURLHttps,
ThumbnailLocalPath: get_prefixed_path(path.Base(apiVideo.MediaURLHttps)),
Duration: apiVideo.VideoInfo.Duration,
ViewCount: view_count,
IsDownloaded: false,
IsBlockedByDMCA: false,
IsGeoblocked: apiVideo.ExtMediaAvailability.Reason == "Geoblocked",
IsGif: apiVideo.Type == "animated_gif",
}
}
func ParseAPIUrlCard(apiCard APICard) Url {
values := apiCard.BindingValues
ret := Url{}
@ -280,6 +292,10 @@ func ParseAPIUrlCard(apiCard APICard) Url {
return ret
}
// Some filesystems get slow if the number of items in a directory is very large. To handle this,
// we add a 2 letter directory prefix, based on the first 2 letters of the filename:
//
// e.g., `abcdefg.asdf` => `ab/abcdefg.asdf`
func get_prefixed_path(p string) string {
local_prefix_regex := regexp.MustCompile(`^[\w-]{2}`)
local_prefix := local_prefix_regex.FindString(p)
@ -307,6 +323,10 @@ func get_thumbnail_local_path(remote_url string) string {
)
}
// -------------------------------------------------------------------------
// Individual tweets
// -------------------------------------------------------------------------
type APITweet struct {
ID int64 `json:"id_str,string"`
ConversationID int64 `json:"conversation_id_str,string"`
@ -567,6 +587,10 @@ func (t APITweet) String() string {
return string(data)
}
// -------------------------------------------------------------------------
// User information
// -------------------------------------------------------------------------
type APIUser struct {
CreatedAt string `json:"created_at"`
Description string `json:"description"`
@ -643,6 +667,10 @@ func ParseSingleUser(apiUser APIUser) (ret User, err error) {
return
}
// -------------------------------------------------------------------------
// Notifications
// -------------------------------------------------------------------------
type APINotification struct {
ID string `json:"id"`
TimestampMs int64 `json:"timestampMs,string"`
@ -674,6 +702,10 @@ type APINotification struct {
} `json:"template"`
}
// -------------------------------------------------------------------------
// Metadata object for ordering, which contains implicit data for tombstones
// -------------------------------------------------------------------------
type APIv1Entry struct {
EntryID string `json:"entryId"`
SortIndex int64 `json:"sortIndex,string"`
@ -710,6 +742,10 @@ type APIv1Entry struct {
func entry_sorting_cmp(a, b APIv1Entry) int { return int(b.SortIndex - a.SortIndex) }
// -------------------------------------------------------------------------
// Full APIv1 response
// -------------------------------------------------------------------------
type APIv1Response struct {
GlobalObjects struct {
Tweets map[string]APITweet `json:"tweets"`
@ -902,6 +938,10 @@ func (t *APIv1Response) ToTweetTrove() (TweetTrove, error) {
return ret, nil
}
// -------------------------------------------------------------------------
// Utils
// -------------------------------------------------------------------------
func idstr_to_int(s string) int64 {
return int64(int_or_panic(s))
}

View File

@ -5,6 +5,7 @@ import (
"net/http"
"os"
"testing"
"time"
"github.com/jarcoal/httpmock"
"github.com/stretchr/testify/assert"
@ -172,6 +173,277 @@ func TestHandleTombstonesUnavailable(t *testing.T) {
}
}
func TestParseAPIMedia(t *testing.T) {
assert := assert.New(t)
data, err := os.ReadFile("test_responses/tweet_content/image.json")
if err != nil {
panic(err)
}
var apimedia APIMedia
err = json.Unmarshal(data, &apimedia)
require.NoError(t, err)
image := ParseAPIMedia(apimedia)
assert.Equal(ImageID(1395882862289772553), image.ID)
assert.Equal("https://pbs.twimg.com/media/E18sEUrWYAk8dBl.jpg", image.RemoteURL)
assert.Equal(593, image.Width)
assert.Equal(239, image.Height)
assert.Equal("E1/E18sEUrWYAk8dBl.jpg", image.LocalFilename)
assert.False(image.IsDownloaded)
}
func TestParsePoll2Choices(t *testing.T) {
assert := assert.New(t)
data, err := os.ReadFile("test_responses/tweet_content/poll_card_2_options.json")
if err != nil {
panic(err)
}
var apiCard APICard
err = json.Unmarshal(data, &apiCard)
require.NoError(t, err)
poll := ParseAPIPoll(apiCard)
assert.Equal(PollID(1457419248461131776), poll.ID)
assert.Equal(2, poll.NumChoices)
assert.Equal(60*60*24, poll.VotingDuration)
assert.Equal(int64(1636397201), poll.VotingEndsAt.Unix())
assert.Equal(int64(1636318755), poll.LastUpdatedAt.Unix())
assert.Less(poll.LastUpdatedAt.Unix(), poll.VotingEndsAt.Unix())
assert.Equal("Yes", poll.Choice1)
assert.Equal("No", poll.Choice2)
assert.Equal(529, poll.Choice1_Votes)
assert.Equal(2182, poll.Choice2_Votes)
}
func TestParsePoll4Choices(t *testing.T) {
assert := assert.New(t)
data, err := os.ReadFile("test_responses/tweet_content/poll_card_4_options_ended.json")
if err != nil {
panic(err)
}
var apiCard APICard
err = json.Unmarshal(data, &apiCard)
require.NoError(t, err)
poll := ParseAPIPoll(apiCard)
assert.Equal(PollID(1455611588854140929), poll.ID)
assert.Equal(4, poll.NumChoices)
assert.Equal(60*60*24, poll.VotingDuration)
assert.Equal(int64(1635966221), poll.VotingEndsAt.Unix())
assert.Equal(int64(1635966226), poll.LastUpdatedAt.Unix())
assert.Greater(poll.LastUpdatedAt.Unix(), poll.VotingEndsAt.Unix())
assert.Equal("Alec Baldwin", poll.Choice1)
assert.Equal(1669, poll.Choice1_Votes)
assert.Equal("Andew Cuomo", poll.Choice2)
assert.Equal(272, poll.Choice2_Votes)
assert.Equal("George Floyd", poll.Choice3)
assert.Equal(829, poll.Choice3_Votes)
assert.Equal("Derek Chauvin", poll.Choice4)
assert.Equal(2397, poll.Choice4_Votes)
}
func TestPollHelpers(t *testing.T) {
assert := assert.New(t)
p := Poll{
Choice1_Votes: 1,
Choice2_Votes: 2,
Choice3_Votes: 3,
Choice4_Votes: 4,
VotingEndsAt: Timestamp{Time: time.Now().Add(10 * time.Second)},
}
assert.Equal(p.TotalVotes(), 10)
assert.Equal(p.VotePercentage(p.Choice3_Votes), 30.0)
assert.True(p.IsOpen())
assert.False(p.IsWinner(p.Choice4_Votes))
// End the poll
p.VotingEndsAt = Timestamp{Time: time.Now().Add(-10 * time.Second)}
assert.False(p.IsOpen())
assert.False(p.IsWinner(p.Choice2_Votes))
assert.True(p.IsWinner(p.Choice4_Votes))
}
func TestParseSingleRetweet(t *testing.T) {
assert := assert.New(t)
require := require.New(t)
data, err := os.ReadFile("test_responses/tweet_that_is_a_retweet.json")
if err != nil {
panic(err)
}
var api_tweet APITweet
err = json.Unmarshal(data, &api_tweet)
require.NoError(err)
trove, err := api_tweet.ToTweetTrove()
require.NoError(err)
require.Len(trove.Tweets, 0)
require.Len(trove.Retweets, 1)
retweet, is_ok := trove.Retweets[TweetID(1404270043018448896)]
require.True(is_ok)
assert.Equal(TweetID(1404270043018448896), retweet.RetweetID)
assert.Equal(TweetID(1404269989646028804), retweet.TweetID)
assert.Equal(UserID(44067298), retweet.RetweetedByID)
assert.Equal(int64(1623639042), retweet.RetweetedAt.Unix())
}
func TestParseAPIUrlCard(t *testing.T) {
assert := assert.New(t)
data, err := os.ReadFile("test_responses/tweet_content/url_card.json")
if err != nil {
panic(err)
}
var apiCard APICard
err = json.Unmarshal(data, &apiCard)
require.NoError(t, err)
url := ParseAPIUrlCard(apiCard)
assert.Equal("reason.com", url.Domain)
assert.Equal("L.A. Teachers Union Leader: 'There's No Such Thing As Learning Loss'", url.Title)
assert.Equal("\"Its OK that our babies may not have learned all their times tables,\" says Cecily Myart-Cruz. \"They learned "+
"resilience.\"", url.Description)
assert.Equal(600, url.ThumbnailWidth)
assert.Equal(315, url.ThumbnailHeight)
assert.Equal("https://pbs.twimg.com/card_img/1434998862305968129/odDi9EqO?format=jpg&name=600x600", url.ThumbnailRemoteUrl)
assert.Equal("od/odDi9EqO_600x600.jpg", url.ThumbnailLocalPath)
assert.Equal(UserID(155581583), url.CreatorID)
assert.Equal(UserID(16467567), url.SiteID)
assert.True(url.HasThumbnail)
assert.False(url.IsContentDownloaded)
}
func TestParseAPIUrlCardWithPlayer(t *testing.T) {
assert := assert.New(t)
data, err := os.ReadFile("test_responses/tweet_content/url_card_with_player.json")
if err != nil {
panic(err)
}
var apiCard APICard
err = json.Unmarshal(data, &apiCard)
require.NoError(t, err)
url := ParseAPIUrlCard(apiCard)
assert.Equal("www.youtube.com", url.Domain)
assert.Equal("The Politically Incorrect Guide to the Constitution (Starring Tom...", url.Title)
assert.Equal("Watch this episode on LBRY/Odysee: https://odysee.com/@capitalresearch:5/the-politically-incorrect-guide-to-the:8"+
"Watch this episode on Rumble: https://rumble...", url.Description)
assert.Equal("https://pbs.twimg.com/card_img/1437849456423194639/_1t0btyt?format=jpg&name=800x320_1", url.ThumbnailRemoteUrl)
assert.Equal("_1/_1t0btyt_800x320_1.jpg", url.ThumbnailLocalPath)
assert.Equal(UserID(10228272), url.SiteID)
assert.True(url.HasThumbnail)
assert.False(url.IsContentDownloaded)
}
func TestParseAPIUrlCardWithPlayerAndPlaceholderThumbnail(t *testing.T) {
assert := assert.New(t)
data, err := os.ReadFile("test_responses/tweet_content/url_card_with_player_placeholder_image.json")
if err != nil {
panic(err)
}
var apiCard APICard
err = json.Unmarshal(data, &apiCard)
require.NoError(t, err)
url := ParseAPIUrlCard(apiCard)
assert.Equal("www.youtube.com", url.Domain)
assert.Equal("Did Michael Malice Turn Me into an Anarchist? | Ep 181", url.Title)
assert.Equal("SUBSCRIBE TO THE NEW SHOW W/ ELIJAH & SYDNEY: \"YOU ARE HERE\"YT: https://www.youtube.com/youareheredaily____________"+
"__________________________________________...", url.Description)
assert.Equal("https://pbs.twimg.com/cards/player-placeholder.png", url.ThumbnailRemoteUrl)
assert.Equal("player-placeholder.png", url.ThumbnailLocalPath)
assert.Equal(UserID(10228272), url.SiteID)
assert.True(url.HasThumbnail)
assert.False(url.IsContentDownloaded)
}
func TestParseAPIUrlCardWithoutThumbnail(t *testing.T) {
assert := assert.New(t)
data, err := os.ReadFile("test_responses/tweet_content/url_card_without_thumbnail.json")
if err != nil {
panic(err)
}
var apiCard APICard
err = json.Unmarshal(data, &apiCard)
require.NoError(t, err)
url := ParseAPIUrlCard(apiCard)
assert.Equal("en.m.wikipedia.org", url.Domain)
assert.Equal("Entryism - Wikipedia", url.Title)
assert.Equal("", url.Description)
assert.True(url.HasCard)
assert.False(url.HasThumbnail)
}
// Should check if a url is a tweet url, and if so, parse it
func TestParseTweetUrl(t *testing.T) {
assert := assert.New(t)
// Test valid tweet url
url := "https://twitter.com/kanesays23/status/1429583672827465730"
handle, id, is_ok := TryParseTweetUrl(url)
assert.True(is_ok)
assert.Equal(UserHandle("kanesays23"), handle)
assert.Equal(TweetID(1429583672827465730), id)
// Test url with GET params
handle, id, is_ok = TryParseTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720?s=20")
assert.True(is_ok)
assert.Equal(UserHandle("NerdNoticing"), handle)
assert.Equal(TweetID(1263192389050654720), id)
// Test a `mobile.twitter.com` url
handle, id, is_ok = TryParseTweetUrl("https://mobile.twitter.com/APhilosophae/status/1497720548540964864")
assert.True(is_ok)
assert.Equal(UserHandle("APhilosophae"), handle)
assert.Equal(TweetID(1497720548540964864), id)
// Test a `x.com` url
handle, id, is_ok = TryParseTweetUrl("https://x.com/brutedeforce/status/1579695139425222657?s=46")
assert.True(is_ok)
assert.Equal(UserHandle("brutedeforce"), handle)
assert.Equal(TweetID(1579695139425222657), id)
// Test invalid url
_, _, is_ok = TryParseTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720s=20")
assert.False(is_ok)
// Test empty string
_, _, is_ok = TryParseTweetUrl("")
assert.False(is_ok)
}
// Should extract a user handle from a tweet URL, or fail if URL is invalid
func TestParseHandleFromTweetUrl(t *testing.T) {
assert := assert.New(t)
// Test valid tweet url
url := "https://twitter.com/kanesays23/status/1429583672827465730"
result, err := ParseHandleFromTweetUrl(url)
assert.NoError(err)
assert.Equal(UserHandle("kanesays23"), result)
// Test url with GET params
result, err = ParseHandleFromTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720?s=20")
assert.NoError(err)
assert.Equal(UserHandle("NerdNoticing"), result)
// Test invalid url
_, err = ParseHandleFromTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720s=20")
assert.Error(err)
// Test empty string
_, err = ParseHandleFromTweetUrl("")
assert.Error(err)
}
// Should extract a user handle from a shortened tweet URL
func TestParseHandleFromShortenedTweetUrl(t *testing.T) {
assert := assert.New(t)
@ -211,3 +483,40 @@ func TestGetTinyURLs(t *testing.T) {
assert.Equal(u.GetTinyProfileImageUrl(), "https://pbs.twimg.com/profile_images/1208124284/iwRReicO_normal")
assert.Equal(u.GetTinyProfileImageLocalPath(), "testUser_profile_iwRReicO_normal.jpg")
}
func TestParseAPIVideo(t *testing.T) {
assert := assert.New(t)
require := require.New(t)
data, err := os.ReadFile("test_responses/tweet_content/video.json")
require.NoError(err)
var apivideo APIExtendedMedia
err = json.Unmarshal(data, &apivideo)
require.NoError(err)
video := ParseAPIVideo(apivideo)
assert.Equal(VideoID(1418951950020845568), video.ID)
assert.Equal(1280, video.Height)
assert.Equal(720, video.Width)
assert.Equal("https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12", video.RemoteURL)
assert.Equal("sm/sm4iL9_f8Lclh0aa.mp4", video.LocalFilename)
assert.Equal("https://pbs.twimg.com/ext_tw_video_thumb/1418951950020845568/pu/img/eUTaYYfuAJ8FyjUi.jpg", video.ThumbnailRemoteUrl)
assert.Equal("eU/eUTaYYfuAJ8FyjUi.jpg", video.ThumbnailLocalPath)
assert.Equal(275952, video.ViewCount)
assert.Equal(88300, video.Duration)
assert.False(video.IsDownloaded)
}
func TestParseGeoblockedVideo(t *testing.T) {
assert := assert.New(t)
require := require.New(t)
data, err := os.ReadFile("test_responses/tweet_content/video_geoblocked.json")
require.NoError(err)
var apivideo APIExtendedMedia
err = json.Unmarshal(data, &apivideo)
require.NoError(err)
video := ParseAPIVideo(apivideo)
assert.True(video.IsGeoblocked)
}

View File

@ -1,31 +0,0 @@
package scraper_test
import (
"encoding/json"
"os"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
)
func TestParseAPIMedia(t *testing.T) {
assert := assert.New(t)
data, err := os.ReadFile("test_responses/tweet_content/image.json")
if err != nil {
panic(err)
}
var apimedia APIMedia
err = json.Unmarshal(data, &apimedia)
require.NoError(t, err)
image := ParseAPIMedia(apimedia)
assert.Equal(ImageID(1395882862289772553), image.ID)
assert.Equal("https://pbs.twimg.com/media/E18sEUrWYAk8dBl.jpg", image.RemoteURL)
assert.Equal(593, image.Width)
assert.Equal(239, image.Height)
assert.Equal("E1/E18sEUrWYAk8dBl.jpg", image.LocalFilename)
assert.False(image.IsDownloaded)
}

View File

@ -1,90 +0,0 @@
package scraper_test
import (
"encoding/json"
"os"
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
)
func TestParsePoll2Choices(t *testing.T) {
assert := assert.New(t)
data, err := os.ReadFile("test_responses/tweet_content/poll_card_2_options.json")
if err != nil {
panic(err)
}
var apiCard APICard
err = json.Unmarshal(data, &apiCard)
require.NoError(t, err)
poll := ParseAPIPoll(apiCard)
assert.Equal(PollID(1457419248461131776), poll.ID)
assert.Equal(2, poll.NumChoices)
assert.Equal(60*60*24, poll.VotingDuration)
assert.Equal(int64(1636397201), poll.VotingEndsAt.Unix())
assert.Equal(int64(1636318755), poll.LastUpdatedAt.Unix())
assert.Less(poll.LastUpdatedAt.Unix(), poll.VotingEndsAt.Unix())
assert.Equal("Yes", poll.Choice1)
assert.Equal("No", poll.Choice2)
assert.Equal(529, poll.Choice1_Votes)
assert.Equal(2182, poll.Choice2_Votes)
}
func TestParsePoll4Choices(t *testing.T) {
assert := assert.New(t)
data, err := os.ReadFile("test_responses/tweet_content/poll_card_4_options_ended.json")
if err != nil {
panic(err)
}
var apiCard APICard
err = json.Unmarshal(data, &apiCard)
require.NoError(t, err)
poll := ParseAPIPoll(apiCard)
assert.Equal(PollID(1455611588854140929), poll.ID)
assert.Equal(4, poll.NumChoices)
assert.Equal(60*60*24, poll.VotingDuration)
assert.Equal(int64(1635966221), poll.VotingEndsAt.Unix())
assert.Equal(int64(1635966226), poll.LastUpdatedAt.Unix())
assert.Greater(poll.LastUpdatedAt.Unix(), poll.VotingEndsAt.Unix())
assert.Equal("Alec Baldwin", poll.Choice1)
assert.Equal(1669, poll.Choice1_Votes)
assert.Equal("Andew Cuomo", poll.Choice2)
assert.Equal(272, poll.Choice2_Votes)
assert.Equal("George Floyd", poll.Choice3)
assert.Equal(829, poll.Choice3_Votes)
assert.Equal("Derek Chauvin", poll.Choice4)
assert.Equal(2397, poll.Choice4_Votes)
}
func TestPollHelpers(t *testing.T) {
assert := assert.New(t)
p := Poll{
Choice1_Votes: 1,
Choice2_Votes: 2,
Choice3_Votes: 3,
Choice4_Votes: 4,
VotingEndsAt: Timestamp{Time: time.Now().Add(10 * time.Second)},
}
assert.Equal(p.TotalVotes(), 10)
assert.Equal(p.VotePercentage(p.Choice3_Votes), 30.0)
assert.True(p.IsOpen())
assert.False(p.IsWinner(p.Choice4_Votes))
// End the poll
p.VotingEndsAt = Timestamp{Time: time.Now().Add(-10 * time.Second)}
assert.False(p.IsOpen())
assert.False(p.IsWinner(p.Choice2_Votes))
assert.True(p.IsWinner(p.Choice4_Votes))
}

View File

@ -1,38 +0,0 @@
package scraper_test
import (
"encoding/json"
"os"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
)
func TestParseSingleRetweet(t *testing.T) {
assert := assert.New(t)
require := require.New(t)
data, err := os.ReadFile("test_responses/tweet_that_is_a_retweet.json")
if err != nil {
panic(err)
}
var api_tweet APITweet
err = json.Unmarshal(data, &api_tweet)
require.NoError(err)
trove, err := api_tweet.ToTweetTrove()
require.NoError(err)
require.Len(trove.Tweets, 0)
require.Len(trove.Retweets, 1)
retweet, is_ok := trove.Retweets[TweetID(1404270043018448896)]
require.True(is_ok)
assert.Equal(TweetID(1404270043018448896), retweet.RetweetID)
assert.Equal(TweetID(1404269989646028804), retweet.TweetID)
assert.Equal(UserID(44067298), retweet.RetweetedByID)
assert.Equal(int64(1623639042), retweet.RetweetedAt.Unix())
}

File diff suppressed because one or more lines are too long

View File

@ -1,165 +0,0 @@
package scraper_test
import (
"encoding/json"
"os"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
)
func TestParseAPIUrlCard(t *testing.T) {
assert := assert.New(t)
data, err := os.ReadFile("test_responses/tweet_content/url_card.json")
if err != nil {
panic(err)
}
var apiCard APICard
err = json.Unmarshal(data, &apiCard)
require.NoError(t, err)
url := ParseAPIUrlCard(apiCard)
assert.Equal("reason.com", url.Domain)
assert.Equal("L.A. Teachers Union Leader: 'There's No Such Thing As Learning Loss'", url.Title)
assert.Equal("\"Its OK that our babies may not have learned all their times tables,\" says Cecily Myart-Cruz. \"They learned "+
"resilience.\"", url.Description)
assert.Equal(600, url.ThumbnailWidth)
assert.Equal(315, url.ThumbnailHeight)
assert.Equal("https://pbs.twimg.com/card_img/1434998862305968129/odDi9EqO?format=jpg&name=600x600", url.ThumbnailRemoteUrl)
assert.Equal("od/odDi9EqO_600x600.jpg", url.ThumbnailLocalPath)
assert.Equal(UserID(155581583), url.CreatorID)
assert.Equal(UserID(16467567), url.SiteID)
assert.True(url.HasThumbnail)
assert.False(url.IsContentDownloaded)
}
func TestParseAPIUrlCardWithPlayer(t *testing.T) {
assert := assert.New(t)
data, err := os.ReadFile("test_responses/tweet_content/url_card_with_player.json")
if err != nil {
panic(err)
}
var apiCard APICard
err = json.Unmarshal(data, &apiCard)
require.NoError(t, err)
url := ParseAPIUrlCard(apiCard)
assert.Equal("www.youtube.com", url.Domain)
assert.Equal("The Politically Incorrect Guide to the Constitution (Starring Tom...", url.Title)
assert.Equal("Watch this episode on LBRY/Odysee: https://odysee.com/@capitalresearch:5/the-politically-incorrect-guide-to-the:8"+
"Watch this episode on Rumble: https://rumble...", url.Description)
assert.Equal("https://pbs.twimg.com/card_img/1437849456423194639/_1t0btyt?format=jpg&name=800x320_1", url.ThumbnailRemoteUrl)
assert.Equal("_1/_1t0btyt_800x320_1.jpg", url.ThumbnailLocalPath)
assert.Equal(UserID(10228272), url.SiteID)
assert.True(url.HasThumbnail)
assert.False(url.IsContentDownloaded)
}
func TestParseAPIUrlCardWithPlayerAndPlaceholderThumbnail(t *testing.T) {
assert := assert.New(t)
data, err := os.ReadFile("test_responses/tweet_content/url_card_with_player_placeholder_image.json")
if err != nil {
panic(err)
}
var apiCard APICard
err = json.Unmarshal(data, &apiCard)
require.NoError(t, err)
url := ParseAPIUrlCard(apiCard)
assert.Equal("www.youtube.com", url.Domain)
assert.Equal("Did Michael Malice Turn Me into an Anarchist? | Ep 181", url.Title)
assert.Equal("SUBSCRIBE TO THE NEW SHOW W/ ELIJAH & SYDNEY: \"YOU ARE HERE\"YT: https://www.youtube.com/youareheredaily____________"+
"__________________________________________...", url.Description)
assert.Equal("https://pbs.twimg.com/cards/player-placeholder.png", url.ThumbnailRemoteUrl)
assert.Equal("player-placeholder.png", url.ThumbnailLocalPath)
assert.Equal(UserID(10228272), url.SiteID)
assert.True(url.HasThumbnail)
assert.False(url.IsContentDownloaded)
}
func TestParseAPIUrlCardWithoutThumbnail(t *testing.T) {
assert := assert.New(t)
data, err := os.ReadFile("test_responses/tweet_content/url_card_without_thumbnail.json")
if err != nil {
panic(err)
}
var apiCard APICard
err = json.Unmarshal(data, &apiCard)
require.NoError(t, err)
url := ParseAPIUrlCard(apiCard)
assert.Equal("en.m.wikipedia.org", url.Domain)
assert.Equal("Entryism - Wikipedia", url.Title)
assert.Equal("", url.Description)
assert.True(url.HasCard)
assert.False(url.HasThumbnail)
}
/**
* Should check if a url is a tweet url, and if so, parse it
*/
func TestParseTweetUrl(t *testing.T) {
assert := assert.New(t)
// Test valid tweet url
url := "https://twitter.com/kanesays23/status/1429583672827465730"
handle, id, is_ok := TryParseTweetUrl(url)
assert.True(is_ok)
assert.Equal(UserHandle("kanesays23"), handle)
assert.Equal(TweetID(1429583672827465730), id)
// Test url with GET params
handle, id, is_ok = TryParseTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720?s=20")
assert.True(is_ok)
assert.Equal(UserHandle("NerdNoticing"), handle)
assert.Equal(TweetID(1263192389050654720), id)
// Test a `mobile.twitter.com` url
handle, id, is_ok = TryParseTweetUrl("https://mobile.twitter.com/APhilosophae/status/1497720548540964864")
assert.True(is_ok)
assert.Equal(UserHandle("APhilosophae"), handle)
assert.Equal(TweetID(1497720548540964864), id)
// Test a `x.com` url
handle, id, is_ok = TryParseTweetUrl("https://x.com/brutedeforce/status/1579695139425222657?s=46")
assert.True(is_ok)
assert.Equal(UserHandle("brutedeforce"), handle)
assert.Equal(TweetID(1579695139425222657), id)
// Test invalid url
_, _, is_ok = TryParseTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720s=20")
assert.False(is_ok)
// Test empty string
_, _, is_ok = TryParseTweetUrl("")
assert.False(is_ok)
}
/**
* Should extract a user handle from a tweet URL, or fail if URL is invalid
*/
func TestParseHandleFromTweetUrl(t *testing.T) {
assert := assert.New(t)
// Test valid tweet url
url := "https://twitter.com/kanesays23/status/1429583672827465730"
result, err := ParseHandleFromTweetUrl(url)
assert.NoError(err)
assert.Equal(UserHandle("kanesays23"), result)
// Test url with GET params
result, err = ParseHandleFromTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720?s=20")
assert.NoError(err)
assert.Equal(UserHandle("NerdNoticing"), result)
// Test invalid url
_, err = ParseHandleFromTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720s=20")
assert.Error(err)
// Test empty string
_, err = ParseHandleFromTweetUrl("")
assert.Error(err)
}

View File

@ -1,49 +0,0 @@
package scraper_test
import (
"encoding/json"
"os"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
)
func TestParseAPIVideo(t *testing.T) {
assert := assert.New(t)
require := require.New(t)
data, err := os.ReadFile("test_responses/tweet_content/video.json")
require.NoError(err)
var apivideo APIExtendedMedia
err = json.Unmarshal(data, &apivideo)
require.NoError(err)
video := ParseAPIVideo(apivideo)
assert.Equal(VideoID(1418951950020845568), video.ID)
assert.Equal(1280, video.Height)
assert.Equal(720, video.Width)
assert.Equal("https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12", video.RemoteURL)
assert.Equal("sm/sm4iL9_f8Lclh0aa.mp4", video.LocalFilename)
assert.Equal("https://pbs.twimg.com/ext_tw_video_thumb/1418951950020845568/pu/img/eUTaYYfuAJ8FyjUi.jpg", video.ThumbnailRemoteUrl)
assert.Equal("eU/eUTaYYfuAJ8FyjUi.jpg", video.ThumbnailLocalPath)
assert.Equal(275952, video.ViewCount)
assert.Equal(88300, video.Duration)
assert.False(video.IsDownloaded)
}
func TestParseGeoblockedVideo(t *testing.T) {
assert := assert.New(t)
require := require.New(t)
data, err := os.ReadFile("test_responses/tweet_content/video_geoblocked.json")
require.NoError(err)
var apivideo APIExtendedMedia
err = json.Unmarshal(data, &apivideo)
require.NoError(err)
video := ParseAPIVideo(apivideo)
assert.True(video.IsGeoblocked)
}