REFACTOR: move a bunch of stuff around in the scraper to consolidate
- delete type-specific test files and put them in api_types_test - remove an unneeded test file
This commit is contained in:
parent
32531a3bd9
commit
bd0e353771
@ -14,6 +14,10 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// Image content
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
type APIMedia struct {
|
type APIMedia struct {
|
||||||
ID int64 `json:"id_str,string"`
|
ID int64 `json:"id_str,string"`
|
||||||
MediaURLHttps string `json:"media_url_https"`
|
MediaURLHttps string `json:"media_url_https"`
|
||||||
@ -38,6 +42,10 @@ func ParseAPIMedia(apiMedia APIMedia) Image {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// Video content
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
type Variant struct {
|
type Variant struct {
|
||||||
Bitrate int `json:"bitrate,omitempty"`
|
Bitrate int `json:"bitrate,omitempty"`
|
||||||
URL string `json:"url"`
|
URL string `json:"url"`
|
||||||
@ -66,6 +74,60 @@ type APIExtendedMedia struct {
|
|||||||
URL string `json:"url"` // For DM videos
|
URL string `json:"url"` // For DM videos
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func ParseAPIVideo(apiVideo APIExtendedMedia) Video {
|
||||||
|
variants := apiVideo.VideoInfo.Variants
|
||||||
|
slices.SortFunc(variants, func(a, b Variant) int { return b.Bitrate - a.Bitrate })
|
||||||
|
video_remote_url := variants[0].URL
|
||||||
|
|
||||||
|
var view_count int
|
||||||
|
|
||||||
|
r := apiVideo.Ext.MediaStats.R
|
||||||
|
|
||||||
|
switch r.(type) {
|
||||||
|
case string:
|
||||||
|
view_count = 0
|
||||||
|
case map[string]interface{}:
|
||||||
|
OK_entry, ok := r.(map[string]interface{})["ok"]
|
||||||
|
if !ok {
|
||||||
|
panic("No 'ok' value found in the R!")
|
||||||
|
}
|
||||||
|
view_count_str, ok := OK_entry.(map[string]interface{})["viewCount"]
|
||||||
|
view_count = int_or_panic(view_count_str.(string))
|
||||||
|
if !ok {
|
||||||
|
panic("No 'viewCount' value found in the OK!")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
video_parsed_url, err := url.Parse(video_remote_url)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
local_filename := get_prefixed_path(path.Base(video_parsed_url.Path))
|
||||||
|
|
||||||
|
return Video{
|
||||||
|
ID: VideoID(apiVideo.ID),
|
||||||
|
Width: apiVideo.OriginalInfo.Width,
|
||||||
|
Height: apiVideo.OriginalInfo.Height,
|
||||||
|
RemoteURL: video_remote_url,
|
||||||
|
LocalFilename: local_filename,
|
||||||
|
|
||||||
|
ThumbnailRemoteUrl: apiVideo.MediaURLHttps,
|
||||||
|
ThumbnailLocalPath: get_prefixed_path(path.Base(apiVideo.MediaURLHttps)),
|
||||||
|
Duration: apiVideo.VideoInfo.Duration,
|
||||||
|
ViewCount: view_count,
|
||||||
|
|
||||||
|
IsDownloaded: false,
|
||||||
|
IsBlockedByDMCA: false,
|
||||||
|
IsGeoblocked: apiVideo.ExtMediaAvailability.Reason == "Geoblocked",
|
||||||
|
IsGif: apiVideo.Type == "animated_gif",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// Cards: polls and urls
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
type APICard struct {
|
type APICard struct {
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
ShortenedUrl string `json:"url"`
|
ShortenedUrl string `json:"url"`
|
||||||
@ -194,56 +256,6 @@ func parse_num_choices(card_name string) int {
|
|||||||
return int_or_panic(card_name[4:5])
|
return int_or_panic(card_name[4:5])
|
||||||
}
|
}
|
||||||
|
|
||||||
func ParseAPIVideo(apiVideo APIExtendedMedia) Video {
|
|
||||||
variants := apiVideo.VideoInfo.Variants
|
|
||||||
slices.SortFunc(variants, func(a, b Variant) int { return b.Bitrate - a.Bitrate })
|
|
||||||
video_remote_url := variants[0].URL
|
|
||||||
|
|
||||||
var view_count int
|
|
||||||
|
|
||||||
r := apiVideo.Ext.MediaStats.R
|
|
||||||
|
|
||||||
switch r.(type) {
|
|
||||||
case string:
|
|
||||||
view_count = 0
|
|
||||||
case map[string]interface{}:
|
|
||||||
OK_entry, ok := r.(map[string]interface{})["ok"]
|
|
||||||
if !ok {
|
|
||||||
panic("No 'ok' value found in the R!")
|
|
||||||
}
|
|
||||||
view_count_str, ok := OK_entry.(map[string]interface{})["viewCount"]
|
|
||||||
view_count = int_or_panic(view_count_str.(string))
|
|
||||||
if !ok {
|
|
||||||
panic("No 'viewCount' value found in the OK!")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
video_parsed_url, err := url.Parse(video_remote_url)
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
local_filename := get_prefixed_path(path.Base(video_parsed_url.Path))
|
|
||||||
|
|
||||||
return Video{
|
|
||||||
ID: VideoID(apiVideo.ID),
|
|
||||||
Width: apiVideo.OriginalInfo.Width,
|
|
||||||
Height: apiVideo.OriginalInfo.Height,
|
|
||||||
RemoteURL: video_remote_url,
|
|
||||||
LocalFilename: local_filename,
|
|
||||||
|
|
||||||
ThumbnailRemoteUrl: apiVideo.MediaURLHttps,
|
|
||||||
ThumbnailLocalPath: get_prefixed_path(path.Base(apiVideo.MediaURLHttps)),
|
|
||||||
Duration: apiVideo.VideoInfo.Duration,
|
|
||||||
ViewCount: view_count,
|
|
||||||
|
|
||||||
IsDownloaded: false,
|
|
||||||
IsBlockedByDMCA: false,
|
|
||||||
IsGeoblocked: apiVideo.ExtMediaAvailability.Reason == "Geoblocked",
|
|
||||||
IsGif: apiVideo.Type == "animated_gif",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func ParseAPIUrlCard(apiCard APICard) Url {
|
func ParseAPIUrlCard(apiCard APICard) Url {
|
||||||
values := apiCard.BindingValues
|
values := apiCard.BindingValues
|
||||||
ret := Url{}
|
ret := Url{}
|
||||||
@ -280,6 +292,10 @@ func ParseAPIUrlCard(apiCard APICard) Url {
|
|||||||
return ret
|
return ret
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Some filesystems get slow if the number of items in a directory is very large. To handle this,
|
||||||
|
// we add a 2 letter directory prefix, based on the first 2 letters of the filename:
|
||||||
|
//
|
||||||
|
// e.g., `abcdefg.asdf` => `ab/abcdefg.asdf`
|
||||||
func get_prefixed_path(p string) string {
|
func get_prefixed_path(p string) string {
|
||||||
local_prefix_regex := regexp.MustCompile(`^[\w-]{2}`)
|
local_prefix_regex := regexp.MustCompile(`^[\w-]{2}`)
|
||||||
local_prefix := local_prefix_regex.FindString(p)
|
local_prefix := local_prefix_regex.FindString(p)
|
||||||
@ -307,6 +323,10 @@ func get_thumbnail_local_path(remote_url string) string {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// Individual tweets
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
type APITweet struct {
|
type APITweet struct {
|
||||||
ID int64 `json:"id_str,string"`
|
ID int64 `json:"id_str,string"`
|
||||||
ConversationID int64 `json:"conversation_id_str,string"`
|
ConversationID int64 `json:"conversation_id_str,string"`
|
||||||
@ -567,6 +587,10 @@ func (t APITweet) String() string {
|
|||||||
return string(data)
|
return string(data)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// User information
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
type APIUser struct {
|
type APIUser struct {
|
||||||
CreatedAt string `json:"created_at"`
|
CreatedAt string `json:"created_at"`
|
||||||
Description string `json:"description"`
|
Description string `json:"description"`
|
||||||
@ -643,6 +667,10 @@ func ParseSingleUser(apiUser APIUser) (ret User, err error) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// Notifications
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
type APINotification struct {
|
type APINotification struct {
|
||||||
ID string `json:"id"`
|
ID string `json:"id"`
|
||||||
TimestampMs int64 `json:"timestampMs,string"`
|
TimestampMs int64 `json:"timestampMs,string"`
|
||||||
@ -674,6 +702,10 @@ type APINotification struct {
|
|||||||
} `json:"template"`
|
} `json:"template"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// Metadata object for ordering, which contains implicit data for tombstones
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
type APIv1Entry struct {
|
type APIv1Entry struct {
|
||||||
EntryID string `json:"entryId"`
|
EntryID string `json:"entryId"`
|
||||||
SortIndex int64 `json:"sortIndex,string"`
|
SortIndex int64 `json:"sortIndex,string"`
|
||||||
@ -710,6 +742,10 @@ type APIv1Entry struct {
|
|||||||
|
|
||||||
func entry_sorting_cmp(a, b APIv1Entry) int { return int(b.SortIndex - a.SortIndex) }
|
func entry_sorting_cmp(a, b APIv1Entry) int { return int(b.SortIndex - a.SortIndex) }
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// Full APIv1 response
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
type APIv1Response struct {
|
type APIv1Response struct {
|
||||||
GlobalObjects struct {
|
GlobalObjects struct {
|
||||||
Tweets map[string]APITweet `json:"tweets"`
|
Tweets map[string]APITweet `json:"tweets"`
|
||||||
@ -902,6 +938,10 @@ func (t *APIv1Response) ToTweetTrove() (TweetTrove, error) {
|
|||||||
return ret, nil
|
return ret, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// Utils
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
func idstr_to_int(s string) int64 {
|
func idstr_to_int(s string) int64 {
|
||||||
return int64(int_or_panic(s))
|
return int64(int_or_panic(s))
|
||||||
}
|
}
|
||||||
|
@ -5,6 +5,7 @@ import (
|
|||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
"testing"
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/jarcoal/httpmock"
|
"github.com/jarcoal/httpmock"
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
@ -172,6 +173,277 @@ func TestHandleTombstonesUnavailable(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestParseAPIMedia(t *testing.T) {
|
||||||
|
assert := assert.New(t)
|
||||||
|
data, err := os.ReadFile("test_responses/tweet_content/image.json")
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
var apimedia APIMedia
|
||||||
|
err = json.Unmarshal(data, &apimedia)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
image := ParseAPIMedia(apimedia)
|
||||||
|
assert.Equal(ImageID(1395882862289772553), image.ID)
|
||||||
|
assert.Equal("https://pbs.twimg.com/media/E18sEUrWYAk8dBl.jpg", image.RemoteURL)
|
||||||
|
assert.Equal(593, image.Width)
|
||||||
|
assert.Equal(239, image.Height)
|
||||||
|
assert.Equal("E1/E18sEUrWYAk8dBl.jpg", image.LocalFilename)
|
||||||
|
assert.False(image.IsDownloaded)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParsePoll2Choices(t *testing.T) {
|
||||||
|
assert := assert.New(t)
|
||||||
|
data, err := os.ReadFile("test_responses/tweet_content/poll_card_2_options.json")
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
var apiCard APICard
|
||||||
|
err = json.Unmarshal(data, &apiCard)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
poll := ParseAPIPoll(apiCard)
|
||||||
|
assert.Equal(PollID(1457419248461131776), poll.ID)
|
||||||
|
assert.Equal(2, poll.NumChoices)
|
||||||
|
assert.Equal(60*60*24, poll.VotingDuration)
|
||||||
|
assert.Equal(int64(1636397201), poll.VotingEndsAt.Unix())
|
||||||
|
assert.Equal(int64(1636318755), poll.LastUpdatedAt.Unix())
|
||||||
|
|
||||||
|
assert.Less(poll.LastUpdatedAt.Unix(), poll.VotingEndsAt.Unix())
|
||||||
|
assert.Equal("Yes", poll.Choice1)
|
||||||
|
assert.Equal("No", poll.Choice2)
|
||||||
|
assert.Equal(529, poll.Choice1_Votes)
|
||||||
|
assert.Equal(2182, poll.Choice2_Votes)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParsePoll4Choices(t *testing.T) {
|
||||||
|
assert := assert.New(t)
|
||||||
|
data, err := os.ReadFile("test_responses/tweet_content/poll_card_4_options_ended.json")
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
var apiCard APICard
|
||||||
|
err = json.Unmarshal(data, &apiCard)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
poll := ParseAPIPoll(apiCard)
|
||||||
|
assert.Equal(PollID(1455611588854140929), poll.ID)
|
||||||
|
assert.Equal(4, poll.NumChoices)
|
||||||
|
assert.Equal(60*60*24, poll.VotingDuration)
|
||||||
|
assert.Equal(int64(1635966221), poll.VotingEndsAt.Unix())
|
||||||
|
assert.Equal(int64(1635966226), poll.LastUpdatedAt.Unix())
|
||||||
|
assert.Greater(poll.LastUpdatedAt.Unix(), poll.VotingEndsAt.Unix())
|
||||||
|
|
||||||
|
assert.Equal("Alec Baldwin", poll.Choice1)
|
||||||
|
assert.Equal(1669, poll.Choice1_Votes)
|
||||||
|
|
||||||
|
assert.Equal("Andew Cuomo", poll.Choice2)
|
||||||
|
assert.Equal(272, poll.Choice2_Votes)
|
||||||
|
|
||||||
|
assert.Equal("George Floyd", poll.Choice3)
|
||||||
|
assert.Equal(829, poll.Choice3_Votes)
|
||||||
|
|
||||||
|
assert.Equal("Derek Chauvin", poll.Choice4)
|
||||||
|
assert.Equal(2397, poll.Choice4_Votes)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPollHelpers(t *testing.T) {
|
||||||
|
assert := assert.New(t)
|
||||||
|
p := Poll{
|
||||||
|
Choice1_Votes: 1,
|
||||||
|
Choice2_Votes: 2,
|
||||||
|
Choice3_Votes: 3,
|
||||||
|
Choice4_Votes: 4,
|
||||||
|
VotingEndsAt: Timestamp{Time: time.Now().Add(10 * time.Second)},
|
||||||
|
}
|
||||||
|
assert.Equal(p.TotalVotes(), 10)
|
||||||
|
assert.Equal(p.VotePercentage(p.Choice3_Votes), 30.0)
|
||||||
|
|
||||||
|
assert.True(p.IsOpen())
|
||||||
|
assert.False(p.IsWinner(p.Choice4_Votes))
|
||||||
|
|
||||||
|
// End the poll
|
||||||
|
p.VotingEndsAt = Timestamp{Time: time.Now().Add(-10 * time.Second)}
|
||||||
|
assert.False(p.IsOpen())
|
||||||
|
assert.False(p.IsWinner(p.Choice2_Votes))
|
||||||
|
assert.True(p.IsWinner(p.Choice4_Votes))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseSingleRetweet(t *testing.T) {
|
||||||
|
assert := assert.New(t)
|
||||||
|
require := require.New(t)
|
||||||
|
data, err := os.ReadFile("test_responses/tweet_that_is_a_retweet.json")
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
var api_tweet APITweet
|
||||||
|
err = json.Unmarshal(data, &api_tweet)
|
||||||
|
require.NoError(err)
|
||||||
|
|
||||||
|
trove, err := api_tweet.ToTweetTrove()
|
||||||
|
require.NoError(err)
|
||||||
|
|
||||||
|
require.Len(trove.Tweets, 0)
|
||||||
|
require.Len(trove.Retweets, 1)
|
||||||
|
|
||||||
|
retweet, is_ok := trove.Retweets[TweetID(1404270043018448896)]
|
||||||
|
require.True(is_ok)
|
||||||
|
|
||||||
|
assert.Equal(TweetID(1404270043018448896), retweet.RetweetID)
|
||||||
|
assert.Equal(TweetID(1404269989646028804), retweet.TweetID)
|
||||||
|
assert.Equal(UserID(44067298), retweet.RetweetedByID)
|
||||||
|
assert.Equal(int64(1623639042), retweet.RetweetedAt.Unix())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseAPIUrlCard(t *testing.T) {
|
||||||
|
assert := assert.New(t)
|
||||||
|
data, err := os.ReadFile("test_responses/tweet_content/url_card.json")
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
var apiCard APICard
|
||||||
|
err = json.Unmarshal(data, &apiCard)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
url := ParseAPIUrlCard(apiCard)
|
||||||
|
assert.Equal("reason.com", url.Domain)
|
||||||
|
assert.Equal("L.A. Teachers Union Leader: 'There's No Such Thing As Learning Loss'", url.Title)
|
||||||
|
assert.Equal("\"It’s OK that our babies may not have learned all their times tables,\" says Cecily Myart-Cruz. \"They learned "+
|
||||||
|
"resilience.\"", url.Description)
|
||||||
|
assert.Equal(600, url.ThumbnailWidth)
|
||||||
|
assert.Equal(315, url.ThumbnailHeight)
|
||||||
|
assert.Equal("https://pbs.twimg.com/card_img/1434998862305968129/odDi9EqO?format=jpg&name=600x600", url.ThumbnailRemoteUrl)
|
||||||
|
assert.Equal("od/odDi9EqO_600x600.jpg", url.ThumbnailLocalPath)
|
||||||
|
assert.Equal(UserID(155581583), url.CreatorID)
|
||||||
|
assert.Equal(UserID(16467567), url.SiteID)
|
||||||
|
assert.True(url.HasThumbnail)
|
||||||
|
assert.False(url.IsContentDownloaded)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseAPIUrlCardWithPlayer(t *testing.T) {
|
||||||
|
assert := assert.New(t)
|
||||||
|
data, err := os.ReadFile("test_responses/tweet_content/url_card_with_player.json")
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
var apiCard APICard
|
||||||
|
err = json.Unmarshal(data, &apiCard)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
url := ParseAPIUrlCard(apiCard)
|
||||||
|
assert.Equal("www.youtube.com", url.Domain)
|
||||||
|
assert.Equal("The Politically Incorrect Guide to the Constitution (Starring Tom...", url.Title)
|
||||||
|
assert.Equal("Watch this episode on LBRY/Odysee: https://odysee.com/@capitalresearch:5/the-politically-incorrect-guide-to-the:8"+
|
||||||
|
"Watch this episode on Rumble: https://rumble...", url.Description)
|
||||||
|
assert.Equal("https://pbs.twimg.com/card_img/1437849456423194639/_1t0btyt?format=jpg&name=800x320_1", url.ThumbnailRemoteUrl)
|
||||||
|
assert.Equal("_1/_1t0btyt_800x320_1.jpg", url.ThumbnailLocalPath)
|
||||||
|
assert.Equal(UserID(10228272), url.SiteID)
|
||||||
|
assert.True(url.HasThumbnail)
|
||||||
|
assert.False(url.IsContentDownloaded)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseAPIUrlCardWithPlayerAndPlaceholderThumbnail(t *testing.T) {
|
||||||
|
assert := assert.New(t)
|
||||||
|
data, err := os.ReadFile("test_responses/tweet_content/url_card_with_player_placeholder_image.json")
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
var apiCard APICard
|
||||||
|
err = json.Unmarshal(data, &apiCard)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
url := ParseAPIUrlCard(apiCard)
|
||||||
|
assert.Equal("www.youtube.com", url.Domain)
|
||||||
|
assert.Equal("Did Michael Malice Turn Me into an Anarchist? | Ep 181", url.Title)
|
||||||
|
assert.Equal("SUBSCRIBE TO THE NEW SHOW W/ ELIJAH & SYDNEY: \"YOU ARE HERE\"YT: https://www.youtube.com/youareheredaily____________"+
|
||||||
|
"__________________________________________...", url.Description)
|
||||||
|
assert.Equal("https://pbs.twimg.com/cards/player-placeholder.png", url.ThumbnailRemoteUrl)
|
||||||
|
assert.Equal("player-placeholder.png", url.ThumbnailLocalPath)
|
||||||
|
assert.Equal(UserID(10228272), url.SiteID)
|
||||||
|
assert.True(url.HasThumbnail)
|
||||||
|
assert.False(url.IsContentDownloaded)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseAPIUrlCardWithoutThumbnail(t *testing.T) {
|
||||||
|
assert := assert.New(t)
|
||||||
|
data, err := os.ReadFile("test_responses/tweet_content/url_card_without_thumbnail.json")
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
var apiCard APICard
|
||||||
|
err = json.Unmarshal(data, &apiCard)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
url := ParseAPIUrlCard(apiCard)
|
||||||
|
assert.Equal("en.m.wikipedia.org", url.Domain)
|
||||||
|
assert.Equal("Entryism - Wikipedia", url.Title)
|
||||||
|
assert.Equal("", url.Description)
|
||||||
|
assert.True(url.HasCard)
|
||||||
|
assert.False(url.HasThumbnail)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Should check if a url is a tweet url, and if so, parse it
|
||||||
|
func TestParseTweetUrl(t *testing.T) {
|
||||||
|
assert := assert.New(t)
|
||||||
|
|
||||||
|
// Test valid tweet url
|
||||||
|
url := "https://twitter.com/kanesays23/status/1429583672827465730"
|
||||||
|
handle, id, is_ok := TryParseTweetUrl(url)
|
||||||
|
assert.True(is_ok)
|
||||||
|
assert.Equal(UserHandle("kanesays23"), handle)
|
||||||
|
assert.Equal(TweetID(1429583672827465730), id)
|
||||||
|
|
||||||
|
// Test url with GET params
|
||||||
|
handle, id, is_ok = TryParseTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720?s=20")
|
||||||
|
assert.True(is_ok)
|
||||||
|
assert.Equal(UserHandle("NerdNoticing"), handle)
|
||||||
|
assert.Equal(TweetID(1263192389050654720), id)
|
||||||
|
|
||||||
|
// Test a `mobile.twitter.com` url
|
||||||
|
handle, id, is_ok = TryParseTweetUrl("https://mobile.twitter.com/APhilosophae/status/1497720548540964864")
|
||||||
|
assert.True(is_ok)
|
||||||
|
assert.Equal(UserHandle("APhilosophae"), handle)
|
||||||
|
assert.Equal(TweetID(1497720548540964864), id)
|
||||||
|
|
||||||
|
// Test a `x.com` url
|
||||||
|
handle, id, is_ok = TryParseTweetUrl("https://x.com/brutedeforce/status/1579695139425222657?s=46")
|
||||||
|
assert.True(is_ok)
|
||||||
|
assert.Equal(UserHandle("brutedeforce"), handle)
|
||||||
|
assert.Equal(TweetID(1579695139425222657), id)
|
||||||
|
|
||||||
|
// Test invalid url
|
||||||
|
_, _, is_ok = TryParseTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720s=20")
|
||||||
|
assert.False(is_ok)
|
||||||
|
|
||||||
|
// Test empty string
|
||||||
|
_, _, is_ok = TryParseTweetUrl("")
|
||||||
|
assert.False(is_ok)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Should extract a user handle from a tweet URL, or fail if URL is invalid
|
||||||
|
func TestParseHandleFromTweetUrl(t *testing.T) {
|
||||||
|
assert := assert.New(t)
|
||||||
|
|
||||||
|
// Test valid tweet url
|
||||||
|
url := "https://twitter.com/kanesays23/status/1429583672827465730"
|
||||||
|
result, err := ParseHandleFromTweetUrl(url)
|
||||||
|
assert.NoError(err)
|
||||||
|
assert.Equal(UserHandle("kanesays23"), result)
|
||||||
|
|
||||||
|
// Test url with GET params
|
||||||
|
result, err = ParseHandleFromTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720?s=20")
|
||||||
|
assert.NoError(err)
|
||||||
|
assert.Equal(UserHandle("NerdNoticing"), result)
|
||||||
|
|
||||||
|
// Test invalid url
|
||||||
|
_, err = ParseHandleFromTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720s=20")
|
||||||
|
assert.Error(err)
|
||||||
|
|
||||||
|
// Test empty string
|
||||||
|
_, err = ParseHandleFromTweetUrl("")
|
||||||
|
assert.Error(err)
|
||||||
|
}
|
||||||
|
|
||||||
// Should extract a user handle from a shortened tweet URL
|
// Should extract a user handle from a shortened tweet URL
|
||||||
func TestParseHandleFromShortenedTweetUrl(t *testing.T) {
|
func TestParseHandleFromShortenedTweetUrl(t *testing.T) {
|
||||||
assert := assert.New(t)
|
assert := assert.New(t)
|
||||||
@ -211,3 +483,40 @@ func TestGetTinyURLs(t *testing.T) {
|
|||||||
assert.Equal(u.GetTinyProfileImageUrl(), "https://pbs.twimg.com/profile_images/1208124284/iwRReicO_normal")
|
assert.Equal(u.GetTinyProfileImageUrl(), "https://pbs.twimg.com/profile_images/1208124284/iwRReicO_normal")
|
||||||
assert.Equal(u.GetTinyProfileImageLocalPath(), "testUser_profile_iwRReicO_normal.jpg")
|
assert.Equal(u.GetTinyProfileImageLocalPath(), "testUser_profile_iwRReicO_normal.jpg")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestParseAPIVideo(t *testing.T) {
|
||||||
|
assert := assert.New(t)
|
||||||
|
require := require.New(t)
|
||||||
|
data, err := os.ReadFile("test_responses/tweet_content/video.json")
|
||||||
|
require.NoError(err)
|
||||||
|
|
||||||
|
var apivideo APIExtendedMedia
|
||||||
|
err = json.Unmarshal(data, &apivideo)
|
||||||
|
require.NoError(err)
|
||||||
|
|
||||||
|
video := ParseAPIVideo(apivideo)
|
||||||
|
assert.Equal(VideoID(1418951950020845568), video.ID)
|
||||||
|
assert.Equal(1280, video.Height)
|
||||||
|
assert.Equal(720, video.Width)
|
||||||
|
assert.Equal("https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12", video.RemoteURL)
|
||||||
|
assert.Equal("sm/sm4iL9_f8Lclh0aa.mp4", video.LocalFilename)
|
||||||
|
assert.Equal("https://pbs.twimg.com/ext_tw_video_thumb/1418951950020845568/pu/img/eUTaYYfuAJ8FyjUi.jpg", video.ThumbnailRemoteUrl)
|
||||||
|
assert.Equal("eU/eUTaYYfuAJ8FyjUi.jpg", video.ThumbnailLocalPath)
|
||||||
|
assert.Equal(275952, video.ViewCount)
|
||||||
|
assert.Equal(88300, video.Duration)
|
||||||
|
assert.False(video.IsDownloaded)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseGeoblockedVideo(t *testing.T) {
|
||||||
|
assert := assert.New(t)
|
||||||
|
require := require.New(t)
|
||||||
|
data, err := os.ReadFile("test_responses/tweet_content/video_geoblocked.json")
|
||||||
|
require.NoError(err)
|
||||||
|
|
||||||
|
var apivideo APIExtendedMedia
|
||||||
|
err = json.Unmarshal(data, &apivideo)
|
||||||
|
require.NoError(err)
|
||||||
|
|
||||||
|
video := ParseAPIVideo(apivideo)
|
||||||
|
assert.True(video.IsGeoblocked)
|
||||||
|
}
|
||||||
|
@ -1,31 +0,0 @@
|
|||||||
package scraper_test
|
|
||||||
|
|
||||||
import (
|
|
||||||
"encoding/json"
|
|
||||||
"os"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
|
||||||
"github.com/stretchr/testify/require"
|
|
||||||
|
|
||||||
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestParseAPIMedia(t *testing.T) {
|
|
||||||
assert := assert.New(t)
|
|
||||||
data, err := os.ReadFile("test_responses/tweet_content/image.json")
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
var apimedia APIMedia
|
|
||||||
err = json.Unmarshal(data, &apimedia)
|
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
image := ParseAPIMedia(apimedia)
|
|
||||||
assert.Equal(ImageID(1395882862289772553), image.ID)
|
|
||||||
assert.Equal("https://pbs.twimg.com/media/E18sEUrWYAk8dBl.jpg", image.RemoteURL)
|
|
||||||
assert.Equal(593, image.Width)
|
|
||||||
assert.Equal(239, image.Height)
|
|
||||||
assert.Equal("E1/E18sEUrWYAk8dBl.jpg", image.LocalFilename)
|
|
||||||
assert.False(image.IsDownloaded)
|
|
||||||
}
|
|
@ -1,90 +0,0 @@
|
|||||||
package scraper_test
|
|
||||||
|
|
||||||
import (
|
|
||||||
"encoding/json"
|
|
||||||
"os"
|
|
||||||
"testing"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
|
||||||
"github.com/stretchr/testify/require"
|
|
||||||
|
|
||||||
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestParsePoll2Choices(t *testing.T) {
|
|
||||||
assert := assert.New(t)
|
|
||||||
data, err := os.ReadFile("test_responses/tweet_content/poll_card_2_options.json")
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
var apiCard APICard
|
|
||||||
err = json.Unmarshal(data, &apiCard)
|
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
poll := ParseAPIPoll(apiCard)
|
|
||||||
assert.Equal(PollID(1457419248461131776), poll.ID)
|
|
||||||
assert.Equal(2, poll.NumChoices)
|
|
||||||
assert.Equal(60*60*24, poll.VotingDuration)
|
|
||||||
assert.Equal(int64(1636397201), poll.VotingEndsAt.Unix())
|
|
||||||
assert.Equal(int64(1636318755), poll.LastUpdatedAt.Unix())
|
|
||||||
|
|
||||||
assert.Less(poll.LastUpdatedAt.Unix(), poll.VotingEndsAt.Unix())
|
|
||||||
assert.Equal("Yes", poll.Choice1)
|
|
||||||
assert.Equal("No", poll.Choice2)
|
|
||||||
assert.Equal(529, poll.Choice1_Votes)
|
|
||||||
assert.Equal(2182, poll.Choice2_Votes)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestParsePoll4Choices(t *testing.T) {
|
|
||||||
assert := assert.New(t)
|
|
||||||
data, err := os.ReadFile("test_responses/tweet_content/poll_card_4_options_ended.json")
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
var apiCard APICard
|
|
||||||
err = json.Unmarshal(data, &apiCard)
|
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
poll := ParseAPIPoll(apiCard)
|
|
||||||
assert.Equal(PollID(1455611588854140929), poll.ID)
|
|
||||||
assert.Equal(4, poll.NumChoices)
|
|
||||||
assert.Equal(60*60*24, poll.VotingDuration)
|
|
||||||
assert.Equal(int64(1635966221), poll.VotingEndsAt.Unix())
|
|
||||||
assert.Equal(int64(1635966226), poll.LastUpdatedAt.Unix())
|
|
||||||
assert.Greater(poll.LastUpdatedAt.Unix(), poll.VotingEndsAt.Unix())
|
|
||||||
|
|
||||||
assert.Equal("Alec Baldwin", poll.Choice1)
|
|
||||||
assert.Equal(1669, poll.Choice1_Votes)
|
|
||||||
|
|
||||||
assert.Equal("Andew Cuomo", poll.Choice2)
|
|
||||||
assert.Equal(272, poll.Choice2_Votes)
|
|
||||||
|
|
||||||
assert.Equal("George Floyd", poll.Choice3)
|
|
||||||
assert.Equal(829, poll.Choice3_Votes)
|
|
||||||
|
|
||||||
assert.Equal("Derek Chauvin", poll.Choice4)
|
|
||||||
assert.Equal(2397, poll.Choice4_Votes)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestPollHelpers(t *testing.T) {
|
|
||||||
assert := assert.New(t)
|
|
||||||
p := Poll{
|
|
||||||
Choice1_Votes: 1,
|
|
||||||
Choice2_Votes: 2,
|
|
||||||
Choice3_Votes: 3,
|
|
||||||
Choice4_Votes: 4,
|
|
||||||
VotingEndsAt: Timestamp{Time: time.Now().Add(10 * time.Second)},
|
|
||||||
}
|
|
||||||
assert.Equal(p.TotalVotes(), 10)
|
|
||||||
assert.Equal(p.VotePercentage(p.Choice3_Votes), 30.0)
|
|
||||||
|
|
||||||
assert.True(p.IsOpen())
|
|
||||||
assert.False(p.IsWinner(p.Choice4_Votes))
|
|
||||||
|
|
||||||
// End the poll
|
|
||||||
p.VotingEndsAt = Timestamp{Time: time.Now().Add(-10 * time.Second)}
|
|
||||||
assert.False(p.IsOpen())
|
|
||||||
assert.False(p.IsWinner(p.Choice2_Votes))
|
|
||||||
assert.True(p.IsWinner(p.Choice4_Votes))
|
|
||||||
}
|
|
@ -1,38 +0,0 @@
|
|||||||
package scraper_test
|
|
||||||
|
|
||||||
import (
|
|
||||||
"encoding/json"
|
|
||||||
"os"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
|
||||||
"github.com/stretchr/testify/require"
|
|
||||||
|
|
||||||
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestParseSingleRetweet(t *testing.T) {
|
|
||||||
assert := assert.New(t)
|
|
||||||
require := require.New(t)
|
|
||||||
data, err := os.ReadFile("test_responses/tweet_that_is_a_retweet.json")
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
var api_tweet APITweet
|
|
||||||
err = json.Unmarshal(data, &api_tweet)
|
|
||||||
require.NoError(err)
|
|
||||||
|
|
||||||
trove, err := api_tweet.ToTweetTrove()
|
|
||||||
require.NoError(err)
|
|
||||||
|
|
||||||
require.Len(trove.Tweets, 0)
|
|
||||||
require.Len(trove.Retweets, 1)
|
|
||||||
|
|
||||||
retweet, is_ok := trove.Retweets[TweetID(1404270043018448896)]
|
|
||||||
require.True(is_ok)
|
|
||||||
|
|
||||||
assert.Equal(TweetID(1404270043018448896), retweet.RetweetID)
|
|
||||||
assert.Equal(TweetID(1404269989646028804), retweet.TweetID)
|
|
||||||
assert.Equal(UserID(44067298), retweet.RetweetedByID)
|
|
||||||
assert.Equal(int64(1623639042), retweet.RetweetedAt.Unix())
|
|
||||||
}
|
|
File diff suppressed because one or more lines are too long
@ -1,165 +0,0 @@
|
|||||||
package scraper_test
|
|
||||||
|
|
||||||
import (
|
|
||||||
"encoding/json"
|
|
||||||
"os"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
|
||||||
"github.com/stretchr/testify/require"
|
|
||||||
|
|
||||||
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestParseAPIUrlCard(t *testing.T) {
|
|
||||||
assert := assert.New(t)
|
|
||||||
data, err := os.ReadFile("test_responses/tweet_content/url_card.json")
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
var apiCard APICard
|
|
||||||
err = json.Unmarshal(data, &apiCard)
|
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
url := ParseAPIUrlCard(apiCard)
|
|
||||||
assert.Equal("reason.com", url.Domain)
|
|
||||||
assert.Equal("L.A. Teachers Union Leader: 'There's No Such Thing As Learning Loss'", url.Title)
|
|
||||||
assert.Equal("\"It’s OK that our babies may not have learned all their times tables,\" says Cecily Myart-Cruz. \"They learned "+
|
|
||||||
"resilience.\"", url.Description)
|
|
||||||
assert.Equal(600, url.ThumbnailWidth)
|
|
||||||
assert.Equal(315, url.ThumbnailHeight)
|
|
||||||
assert.Equal("https://pbs.twimg.com/card_img/1434998862305968129/odDi9EqO?format=jpg&name=600x600", url.ThumbnailRemoteUrl)
|
|
||||||
assert.Equal("od/odDi9EqO_600x600.jpg", url.ThumbnailLocalPath)
|
|
||||||
assert.Equal(UserID(155581583), url.CreatorID)
|
|
||||||
assert.Equal(UserID(16467567), url.SiteID)
|
|
||||||
assert.True(url.HasThumbnail)
|
|
||||||
assert.False(url.IsContentDownloaded)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestParseAPIUrlCardWithPlayer(t *testing.T) {
|
|
||||||
assert := assert.New(t)
|
|
||||||
data, err := os.ReadFile("test_responses/tweet_content/url_card_with_player.json")
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
var apiCard APICard
|
|
||||||
err = json.Unmarshal(data, &apiCard)
|
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
url := ParseAPIUrlCard(apiCard)
|
|
||||||
assert.Equal("www.youtube.com", url.Domain)
|
|
||||||
assert.Equal("The Politically Incorrect Guide to the Constitution (Starring Tom...", url.Title)
|
|
||||||
assert.Equal("Watch this episode on LBRY/Odysee: https://odysee.com/@capitalresearch:5/the-politically-incorrect-guide-to-the:8"+
|
|
||||||
"Watch this episode on Rumble: https://rumble...", url.Description)
|
|
||||||
assert.Equal("https://pbs.twimg.com/card_img/1437849456423194639/_1t0btyt?format=jpg&name=800x320_1", url.ThumbnailRemoteUrl)
|
|
||||||
assert.Equal("_1/_1t0btyt_800x320_1.jpg", url.ThumbnailLocalPath)
|
|
||||||
assert.Equal(UserID(10228272), url.SiteID)
|
|
||||||
assert.True(url.HasThumbnail)
|
|
||||||
assert.False(url.IsContentDownloaded)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestParseAPIUrlCardWithPlayerAndPlaceholderThumbnail(t *testing.T) {
|
|
||||||
assert := assert.New(t)
|
|
||||||
data, err := os.ReadFile("test_responses/tweet_content/url_card_with_player_placeholder_image.json")
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
var apiCard APICard
|
|
||||||
err = json.Unmarshal(data, &apiCard)
|
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
url := ParseAPIUrlCard(apiCard)
|
|
||||||
assert.Equal("www.youtube.com", url.Domain)
|
|
||||||
assert.Equal("Did Michael Malice Turn Me into an Anarchist? | Ep 181", url.Title)
|
|
||||||
assert.Equal("SUBSCRIBE TO THE NEW SHOW W/ ELIJAH & SYDNEY: \"YOU ARE HERE\"YT: https://www.youtube.com/youareheredaily____________"+
|
|
||||||
"__________________________________________...", url.Description)
|
|
||||||
assert.Equal("https://pbs.twimg.com/cards/player-placeholder.png", url.ThumbnailRemoteUrl)
|
|
||||||
assert.Equal("player-placeholder.png", url.ThumbnailLocalPath)
|
|
||||||
assert.Equal(UserID(10228272), url.SiteID)
|
|
||||||
assert.True(url.HasThumbnail)
|
|
||||||
assert.False(url.IsContentDownloaded)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestParseAPIUrlCardWithoutThumbnail(t *testing.T) {
|
|
||||||
assert := assert.New(t)
|
|
||||||
data, err := os.ReadFile("test_responses/tweet_content/url_card_without_thumbnail.json")
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
var apiCard APICard
|
|
||||||
err = json.Unmarshal(data, &apiCard)
|
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
url := ParseAPIUrlCard(apiCard)
|
|
||||||
assert.Equal("en.m.wikipedia.org", url.Domain)
|
|
||||||
assert.Equal("Entryism - Wikipedia", url.Title)
|
|
||||||
assert.Equal("", url.Description)
|
|
||||||
assert.True(url.HasCard)
|
|
||||||
assert.False(url.HasThumbnail)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Should check if a url is a tweet url, and if so, parse it
|
|
||||||
*/
|
|
||||||
func TestParseTweetUrl(t *testing.T) {
|
|
||||||
assert := assert.New(t)
|
|
||||||
|
|
||||||
// Test valid tweet url
|
|
||||||
url := "https://twitter.com/kanesays23/status/1429583672827465730"
|
|
||||||
handle, id, is_ok := TryParseTweetUrl(url)
|
|
||||||
assert.True(is_ok)
|
|
||||||
assert.Equal(UserHandle("kanesays23"), handle)
|
|
||||||
assert.Equal(TweetID(1429583672827465730), id)
|
|
||||||
|
|
||||||
// Test url with GET params
|
|
||||||
handle, id, is_ok = TryParseTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720?s=20")
|
|
||||||
assert.True(is_ok)
|
|
||||||
assert.Equal(UserHandle("NerdNoticing"), handle)
|
|
||||||
assert.Equal(TweetID(1263192389050654720), id)
|
|
||||||
|
|
||||||
// Test a `mobile.twitter.com` url
|
|
||||||
handle, id, is_ok = TryParseTweetUrl("https://mobile.twitter.com/APhilosophae/status/1497720548540964864")
|
|
||||||
assert.True(is_ok)
|
|
||||||
assert.Equal(UserHandle("APhilosophae"), handle)
|
|
||||||
assert.Equal(TweetID(1497720548540964864), id)
|
|
||||||
|
|
||||||
// Test a `x.com` url
|
|
||||||
handle, id, is_ok = TryParseTweetUrl("https://x.com/brutedeforce/status/1579695139425222657?s=46")
|
|
||||||
assert.True(is_ok)
|
|
||||||
assert.Equal(UserHandle("brutedeforce"), handle)
|
|
||||||
assert.Equal(TweetID(1579695139425222657), id)
|
|
||||||
|
|
||||||
// Test invalid url
|
|
||||||
_, _, is_ok = TryParseTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720s=20")
|
|
||||||
assert.False(is_ok)
|
|
||||||
|
|
||||||
// Test empty string
|
|
||||||
_, _, is_ok = TryParseTweetUrl("")
|
|
||||||
assert.False(is_ok)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Should extract a user handle from a tweet URL, or fail if URL is invalid
|
|
||||||
*/
|
|
||||||
func TestParseHandleFromTweetUrl(t *testing.T) {
|
|
||||||
assert := assert.New(t)
|
|
||||||
|
|
||||||
// Test valid tweet url
|
|
||||||
url := "https://twitter.com/kanesays23/status/1429583672827465730"
|
|
||||||
result, err := ParseHandleFromTweetUrl(url)
|
|
||||||
assert.NoError(err)
|
|
||||||
assert.Equal(UserHandle("kanesays23"), result)
|
|
||||||
|
|
||||||
// Test url with GET params
|
|
||||||
result, err = ParseHandleFromTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720?s=20")
|
|
||||||
assert.NoError(err)
|
|
||||||
assert.Equal(UserHandle("NerdNoticing"), result)
|
|
||||||
|
|
||||||
// Test invalid url
|
|
||||||
_, err = ParseHandleFromTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720s=20")
|
|
||||||
assert.Error(err)
|
|
||||||
|
|
||||||
// Test empty string
|
|
||||||
_, err = ParseHandleFromTweetUrl("")
|
|
||||||
assert.Error(err)
|
|
||||||
}
|
|
@ -1,49 +0,0 @@
|
|||||||
package scraper_test
|
|
||||||
|
|
||||||
import (
|
|
||||||
"encoding/json"
|
|
||||||
"os"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
|
||||||
"github.com/stretchr/testify/require"
|
|
||||||
|
|
||||||
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestParseAPIVideo(t *testing.T) {
|
|
||||||
assert := assert.New(t)
|
|
||||||
require := require.New(t)
|
|
||||||
data, err := os.ReadFile("test_responses/tweet_content/video.json")
|
|
||||||
require.NoError(err)
|
|
||||||
|
|
||||||
var apivideo APIExtendedMedia
|
|
||||||
err = json.Unmarshal(data, &apivideo)
|
|
||||||
require.NoError(err)
|
|
||||||
|
|
||||||
video := ParseAPIVideo(apivideo)
|
|
||||||
assert.Equal(VideoID(1418951950020845568), video.ID)
|
|
||||||
assert.Equal(1280, video.Height)
|
|
||||||
assert.Equal(720, video.Width)
|
|
||||||
assert.Equal("https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12", video.RemoteURL)
|
|
||||||
assert.Equal("sm/sm4iL9_f8Lclh0aa.mp4", video.LocalFilename)
|
|
||||||
assert.Equal("https://pbs.twimg.com/ext_tw_video_thumb/1418951950020845568/pu/img/eUTaYYfuAJ8FyjUi.jpg", video.ThumbnailRemoteUrl)
|
|
||||||
assert.Equal("eU/eUTaYYfuAJ8FyjUi.jpg", video.ThumbnailLocalPath)
|
|
||||||
assert.Equal(275952, video.ViewCount)
|
|
||||||
assert.Equal(88300, video.Duration)
|
|
||||||
assert.False(video.IsDownloaded)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestParseGeoblockedVideo(t *testing.T) {
|
|
||||||
assert := assert.New(t)
|
|
||||||
require := require.New(t)
|
|
||||||
data, err := os.ReadFile("test_responses/tweet_content/video_geoblocked.json")
|
|
||||||
require.NoError(err)
|
|
||||||
|
|
||||||
var apivideo APIExtendedMedia
|
|
||||||
err = json.Unmarshal(data, &apivideo)
|
|
||||||
require.NoError(err)
|
|
||||||
|
|
||||||
video := ParseAPIVideo(apivideo)
|
|
||||||
assert.True(video.IsGeoblocked)
|
|
||||||
}
|
|
Loading…
x
Reference in New Issue
Block a user