Add 'gofmt' linter
This commit is contained in:
parent
223734d001
commit
d1d80a91cd
@ -27,6 +27,7 @@ linters:
|
||||
- wrapcheck
|
||||
- lll
|
||||
- godox
|
||||
- gofmt
|
||||
- errorlint
|
||||
- nolintlint
|
||||
|
||||
@ -203,9 +204,9 @@ linters-settings:
|
||||
keywords: # default keywords are TODO, BUG, and FIXME, these can be overwritten by this setting
|
||||
- XXX
|
||||
|
||||
# gofmt:
|
||||
# # simplify code: gofmt with `-s` option, true by default
|
||||
# simplify: true
|
||||
gofmt:
|
||||
# simplify code: gofmt with `-s` option, true by default
|
||||
simplify: true
|
||||
|
||||
# gofumpt:
|
||||
# # Select the Go version to target. The default is `1.15`.
|
||||
|
@ -6,9 +6,9 @@ import (
|
||||
"fmt"
|
||||
"math/rand"
|
||||
|
||||
"github.com/go-test/deep"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"github.com/go-test/deep"
|
||||
|
||||
"offline_twitter/scraper"
|
||||
)
|
||||
@ -76,7 +76,7 @@ func TestModifyUser(t *testing.T) {
|
||||
fake_user.FollowersCount = 2000
|
||||
fake_user.JoinDate = scraper.TimestampFromUnix(2000)
|
||||
fake_user.ProfileImageUrl = "asdf2"
|
||||
fake_user.IsContentDownloaded = false // test No Worsening
|
||||
fake_user.IsContentDownloaded = false // test No Worsening
|
||||
|
||||
// Save the modified user
|
||||
err = profile.SaveUser(&fake_user)
|
||||
|
@ -3,9 +3,9 @@ package persistence
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"regexp"
|
||||
"strings"
|
||||
"os"
|
||||
)
|
||||
|
||||
var NotInDatabase = errors.New("Not in database")
|
||||
@ -35,7 +35,7 @@ func file_exists(path string) bool {
|
||||
* https://stackoverflow.com/questions/56616196/how-to-convert-camel-case-string-to-snake-case#56616250
|
||||
*/
|
||||
func ToSnakeCase(str string) string {
|
||||
snake := regexp.MustCompile("(.)_?([A-Z][a-z]+)").ReplaceAllString(str, "${1}_${2}")
|
||||
snake = regexp.MustCompile("([a-z0-9])_?([A-Z])").ReplaceAllString(snake, "${1}_${2}")
|
||||
return strings.ToLower(snake)
|
||||
snake := regexp.MustCompile("(.)_?([A-Z][a-z]+)").ReplaceAllString(str, "${1}_${2}")
|
||||
snake = regexp.MustCompile("([a-z0-9])_?([A-Z])").ReplaceAllString(snake, "${1}_${2}")
|
||||
return strings.ToLower(snake)
|
||||
}
|
||||
|
@ -70,13 +70,13 @@ func create_stable_user() scraper.User {
|
||||
func create_image_from_id(id int) scraper.Image {
|
||||
filename := fmt.Sprintf("image%d.jpg", id)
|
||||
return scraper.Image{
|
||||
ID: scraper.ImageID(id),
|
||||
TweetID: -1,
|
||||
Width: id * 10,
|
||||
Height: id * 5,
|
||||
RemoteURL: filename,
|
||||
ID: scraper.ImageID(id),
|
||||
TweetID: -1,
|
||||
Width: id * 10,
|
||||
Height: id * 5,
|
||||
RemoteURL: filename,
|
||||
LocalFilename: filename,
|
||||
IsDownloaded: false,
|
||||
IsDownloaded: false,
|
||||
}
|
||||
}
|
||||
|
||||
@ -86,18 +86,18 @@ func create_image_from_id(id int) scraper.Image {
|
||||
func create_video_from_id(id int) scraper.Video {
|
||||
filename := fmt.Sprintf("video%d.jpg", id)
|
||||
return scraper.Video{
|
||||
ID: scraper.VideoID(id),
|
||||
TweetID: -1,
|
||||
Width: id * 10,
|
||||
Height: id * 5,
|
||||
RemoteURL: filename,
|
||||
LocalFilename: filename,
|
||||
ID: scraper.VideoID(id),
|
||||
TweetID: -1,
|
||||
Width: id * 10,
|
||||
Height: id * 5,
|
||||
RemoteURL: filename,
|
||||
LocalFilename: filename,
|
||||
ThumbnailRemoteUrl: filename,
|
||||
ThumbnailLocalPath: filename,
|
||||
Duration: 10000,
|
||||
ViewCount: 200,
|
||||
IsDownloaded: false,
|
||||
IsGif: false,
|
||||
Duration: 10000,
|
||||
ViewCount: 200,
|
||||
IsDownloaded: false,
|
||||
IsGif: false,
|
||||
}
|
||||
}
|
||||
|
||||
@ -265,13 +265,13 @@ func create_dummy_tombstone() scraper.Tweet {
|
||||
tweet_id := scraper.TweetID(rand.Int())
|
||||
|
||||
return scraper.Tweet{
|
||||
ID: tweet_id,
|
||||
UserID: -1,
|
||||
ID: tweet_id,
|
||||
UserID: -1,
|
||||
TombstoneType: "deleted",
|
||||
IsStub: true,
|
||||
Mentions: []scraper.UserHandle{},
|
||||
IsStub: true,
|
||||
Mentions: []scraper.UserHandle{},
|
||||
ReplyMentions: []scraper.UserHandle{},
|
||||
Hashtags: []string{},
|
||||
Hashtags: []string{},
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -7,29 +7,27 @@ import (
|
||||
"offline_twitter/terminal_utils"
|
||||
)
|
||||
|
||||
|
||||
const ENGINE_DATABASE_VERSION = 11
|
||||
|
||||
|
||||
type VersionMismatchError struct {
|
||||
EngineVersion int
|
||||
EngineVersion int
|
||||
DatabaseVersion int
|
||||
}
|
||||
|
||||
func (e VersionMismatchError) Error() string {
|
||||
return fmt.Sprintf(
|
||||
`This profile was created with database schema version %d, which is newer than this application's database schema version, %d.
|
||||
`This profile was created with database schema version %d, which is newer than this application's database schema version, %d.
|
||||
Please upgrade this application to a newer version to use this profile. Or downgrade the profile's schema version, somehow.`,
|
||||
e.DatabaseVersion, e.EngineVersion,
|
||||
e.DatabaseVersion, e.EngineVersion,
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The Nth entry is the migration that moves you from version N to version N+1.
|
||||
* `len(MIGRATIONS)` should always equal `ENGINE_DATABASE_VERSION`.
|
||||
*/
|
||||
var MIGRATIONS = []string{
|
||||
`create table polls (rowid integer primary key,
|
||||
`create table polls (rowid integer primary key,
|
||||
id integer unique not null check(typeof(id) = 'integer'),
|
||||
tweet_id integer not null,
|
||||
num_choices integer not null,
|
||||
@ -50,25 +48,25 @@ var MIGRATIONS = []string{
|
||||
|
||||
foreign key(tweet_id) references tweets(id)
|
||||
);`,
|
||||
`alter table tweets add column is_conversation_scraped boolean default 0;
|
||||
`alter table tweets add column is_conversation_scraped boolean default 0;
|
||||
alter table tweets add column last_scraped_at integer not null default 0`,
|
||||
`update tombstone_types set tombstone_text = 'This Tweet is from a suspended account' where rowid = 2;
|
||||
`update tombstone_types set tombstone_text = 'This Tweet is from a suspended account' where rowid = 2;
|
||||
insert into tombstone_types (rowid, short_name, tombstone_text)
|
||||
values (5, 'violated', 'This Tweet violated the Twitter Rules'),
|
||||
(6, 'no longer exists', 'This Tweet is from an account that no longer exists')`,
|
||||
`alter table videos add column thumbnail_remote_url text not null default "missing";
|
||||
`alter table videos add column thumbnail_remote_url text not null default "missing";
|
||||
alter table videos add column thumbnail_local_filename text not null default "missing"`,
|
||||
`alter table videos add column duration integer not null default 0;
|
||||
`alter table videos add column duration integer not null default 0;
|
||||
alter table videos add column view_count integer not null default 0`,
|
||||
`alter table users add column is_banned boolean default 0`,
|
||||
`alter table urls add column short_text text not null default ""`,
|
||||
`insert into tombstone_types (rowid, short_name, tombstone_text) values (7, 'age-restricted', 'Age-restricted adult content. '
|
||||
`alter table users add column is_banned boolean default 0`,
|
||||
`alter table urls add column short_text text not null default ""`,
|
||||
`insert into tombstone_types (rowid, short_name, tombstone_text) values (7, 'age-restricted', 'Age-restricted adult content. '
|
||||
|| 'This content might not be appropriate for people under 18 years old. To view this media, you’ll need to log in to Twitter')`,
|
||||
`alter table users add column is_followed boolean default 0`,
|
||||
`create table fake_user_sequence(latest_fake_id integer not null);
|
||||
`alter table users add column is_followed boolean default 0`,
|
||||
`create table fake_user_sequence(latest_fake_id integer not null);
|
||||
insert into fake_user_sequence values(0x4000000000000000);
|
||||
alter table users add column is_id_fake boolean default 0;`,
|
||||
`delete from urls where rowid in (select urls.rowid from tweets join urls on tweets.id = urls.tweet_id where urls.text like
|
||||
`delete from urls where rowid in (select urls.rowid from tweets join urls on tweets.id = urls.tweet_id where urls.text like
|
||||
'https://twitter.com/%/status/' || tweets.quoted_tweet_id || "%")`,
|
||||
}
|
||||
|
||||
|
@ -2,12 +2,13 @@ package persistence_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"os"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"offline_twitter/scraper"
|
||||
"offline_twitter/persistence"
|
||||
"offline_twitter/scraper"
|
||||
)
|
||||
|
||||
func TestVersionUpgrade(t *testing.T) {
|
||||
@ -25,7 +26,7 @@ func TestVersionUpgrade(t *testing.T) {
|
||||
require.False(profile.IsTweetInDatabase(test_tweet_id), "Test tweet shouldn't be in db yet")
|
||||
|
||||
persistence.MIGRATIONS = append(persistence.MIGRATIONS, test_migration)
|
||||
err := profile.UpgradeFromXToY(persistence.ENGINE_DATABASE_VERSION, persistence.ENGINE_DATABASE_VERSION + 1)
|
||||
err := profile.UpgradeFromXToY(persistence.ENGINE_DATABASE_VERSION, persistence.ENGINE_DATABASE_VERSION+1)
|
||||
require.NoError(err)
|
||||
|
||||
require.True(profile.IsTweetInDatabase(test_tweet_id), "Migration should have created the tweet, but it didn't")
|
||||
|
@ -5,8 +5,8 @@ import (
|
||||
)
|
||||
|
||||
var (
|
||||
END_OF_FEED = fmt.Errorf("End of feed")
|
||||
DOESNT_EXIST = fmt.Errorf("Doesn't exist")
|
||||
EXTERNAL_API_ERROR = fmt.Errorf("Unexpected result from external API")
|
||||
API_PARSE_ERROR = fmt.Errorf("Couldn't parse the result returned from the API")
|
||||
END_OF_FEED = fmt.Errorf("End of feed")
|
||||
DOESNT_EXIST = fmt.Errorf("Doesn't exist")
|
||||
EXTERNAL_API_ERROR = fmt.Errorf("Unexpected result from external API")
|
||||
API_PARSE_ERROR = fmt.Errorf("Couldn't parse the result returned from the API")
|
||||
)
|
||||
|
@ -1,33 +1,33 @@
|
||||
package scraper
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"html"
|
||||
"time"
|
||||
"strings"
|
||||
"encoding/json"
|
||||
"strconv"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
|
||||
type APIMedia struct {
|
||||
ID int64 `json:"id_str,string"`
|
||||
MediaURLHttps string `json:"media_url_https"`
|
||||
Type string `json:"type"`
|
||||
URL string `json:"url"`
|
||||
OriginalInfo struct {
|
||||
Width int `json:"width"`
|
||||
Height int `json:"height"`
|
||||
OriginalInfo struct {
|
||||
Width int `json:"width"`
|
||||
Height int `json:"height"`
|
||||
} `json:"original_info"`
|
||||
}
|
||||
|
||||
type SortableVariants []struct {
|
||||
Bitrate int `json:"bitrate,omitempty"`
|
||||
URL string `json:"url"`
|
||||
Bitrate int `json:"bitrate,omitempty"`
|
||||
URL string `json:"url"`
|
||||
}
|
||||
func (v SortableVariants) Len() int { return len(v) }
|
||||
func (v SortableVariants) Swap(i, j int) { v[i], v[j] = v[j], v[i] }
|
||||
|
||||
func (v SortableVariants) Len() int { return len(v) }
|
||||
func (v SortableVariants) Swap(i, j int) { v[i], v[j] = v[j], v[i] }
|
||||
func (v SortableVariants) Less(i, j int) bool { return v[i].Bitrate > v[j].Bitrate }
|
||||
|
||||
type APIExtendedMedia struct {
|
||||
@ -35,12 +35,12 @@ type APIExtendedMedia struct {
|
||||
MediaURLHttps string `json:"media_url_https"`
|
||||
Type string `json:"type"`
|
||||
VideoInfo struct {
|
||||
Variants SortableVariants `json:"variants"`
|
||||
Duration int `json:"duration_millis"`
|
||||
Variants SortableVariants `json:"variants"`
|
||||
Duration int `json:"duration_millis"`
|
||||
} `json:"video_info"`
|
||||
OriginalInfo struct {
|
||||
Width int `json:"width"`
|
||||
Height int `json:"height"`
|
||||
Width int `json:"width"`
|
||||
Height int `json:"height"`
|
||||
} `json:"original_info"`
|
||||
Ext struct {
|
||||
MediaStats struct {
|
||||
@ -74,9 +74,9 @@ type APICard struct {
|
||||
} `json:"description"`
|
||||
Thumbnail struct {
|
||||
ImageValue struct {
|
||||
Url string `json:"url"`
|
||||
Width int `json:"width"`
|
||||
Height int `json:"height"`
|
||||
Url string `json:"url"`
|
||||
Width int `json:"width"`
|
||||
Height int `json:"height"`
|
||||
} `json:"image_value"`
|
||||
} `json:"thumbnail_image_large"`
|
||||
PlayerImage struct {
|
||||
@ -128,18 +128,18 @@ type APICard struct {
|
||||
}
|
||||
|
||||
type APITweet struct {
|
||||
ID int64 `json:"id_str,string"`
|
||||
ConversationID int64 `json:"conversation_id_str,string"`
|
||||
CreatedAt string `json:"created_at"`
|
||||
FavoriteCount int `json:"favorite_count"`
|
||||
FullText string `json:"full_text"`
|
||||
DisplayTextRange []int `json:"display_text_range"`
|
||||
Entities struct {
|
||||
ID int64 `json:"id_str,string"`
|
||||
ConversationID int64 `json:"conversation_id_str,string"`
|
||||
CreatedAt string `json:"created_at"`
|
||||
FavoriteCount int `json:"favorite_count"`
|
||||
FullText string `json:"full_text"`
|
||||
DisplayTextRange []int `json:"display_text_range"`
|
||||
Entities struct {
|
||||
Hashtags []struct {
|
||||
Text string `json:"text"`
|
||||
} `json:"hashtags"`
|
||||
Media []APIMedia `json:"media"`
|
||||
URLs []struct {
|
||||
URLs []struct {
|
||||
ExpandedURL string `json:"expanded_url"`
|
||||
ShortenedUrl string `json:"url"`
|
||||
} `json:"urls"`
|
||||
@ -147,30 +147,30 @@ type APITweet struct {
|
||||
UserName string `json:"screen_name"`
|
||||
UserID int64 `json:"id_str,string"`
|
||||
} `json:"user_mentions"`
|
||||
ReplyMentions string // The leading part of the text which is cut off by "DisplayTextRange"
|
||||
ReplyMentions string // The leading part of the text which is cut off by "DisplayTextRange"
|
||||
} `json:"entities"`
|
||||
ExtendedEntities struct {
|
||||
Media []APIExtendedMedia `json:"media"`
|
||||
} `json:"extended_entities"`
|
||||
InReplyToStatusID int64 `json:"in_reply_to_status_id_str,string"`
|
||||
InReplyToUserID int64 `json:"in_reply_to_user_id_str,string"`
|
||||
InReplyToScreenName string `json:"in_reply_to_screen_name"`
|
||||
ReplyCount int `json:"reply_count"`
|
||||
RetweetCount int `json:"retweet_count"`
|
||||
QuoteCount int `json:"quote_count"`
|
||||
RetweetedStatusIDStr string `json:"retweeted_status_id_str"` // Can be empty string
|
||||
InReplyToStatusID int64 `json:"in_reply_to_status_id_str,string"`
|
||||
InReplyToUserID int64 `json:"in_reply_to_user_id_str,string"`
|
||||
InReplyToScreenName string `json:"in_reply_to_screen_name"`
|
||||
ReplyCount int `json:"reply_count"`
|
||||
RetweetCount int `json:"retweet_count"`
|
||||
QuoteCount int `json:"quote_count"`
|
||||
RetweetedStatusIDStr string `json:"retweeted_status_id_str"` // Can be empty string
|
||||
RetweetedStatusID int64
|
||||
QuotedStatusIDStr string `json:"quoted_status_id_str"` // Can be empty string
|
||||
QuotedStatusIDStr string `json:"quoted_status_id_str"` // Can be empty string
|
||||
QuotedStatusID int64
|
||||
QuotedStatusPermalink struct {
|
||||
ShortURL string `json:"url"`
|
||||
ExpandedURL string `json:"expanded"`
|
||||
} `json:"quoted_status_permalink"`
|
||||
Time time.Time `json:"time"`
|
||||
UserID int64 `json:"user_id_str,string"`
|
||||
UserHandle string
|
||||
Card APICard `json:"card"`
|
||||
TombstoneText string
|
||||
Time time.Time `json:"time"`
|
||||
UserID int64 `json:"user_id_str,string"`
|
||||
UserHandle string
|
||||
Card APICard `json:"card"`
|
||||
TombstoneText string
|
||||
}
|
||||
|
||||
func (t *APITweet) NormalizeContent() {
|
||||
@ -183,7 +183,7 @@ func (t *APITweet) NormalizeContent() {
|
||||
t.RetweetedStatusID = int64(id)
|
||||
}
|
||||
|
||||
if (len(t.DisplayTextRange) == 2) {
|
||||
if len(t.DisplayTextRange) == 2 {
|
||||
t.Entities.ReplyMentions = strings.TrimSpace(string([]rune(t.FullText)[0:t.DisplayTextRange[0]]))
|
||||
t.FullText = string([]rune(t.FullText)[t.DisplayTextRange[0]:t.DisplayTextRange[1]])
|
||||
}
|
||||
@ -217,7 +217,6 @@ func (t APITweet) String() string {
|
||||
return string(data)
|
||||
}
|
||||
|
||||
|
||||
type APIUser struct {
|
||||
CreatedAt string `json:"created_at"`
|
||||
Description string `json:"description"`
|
||||
@ -235,7 +234,7 @@ type APIUser struct {
|
||||
ListedCount int `json:"listed_count"`
|
||||
Name string `json:"name"`
|
||||
Location string `json:"location"`
|
||||
PinnedTweetIdsStr []string `json:"pinned_tweet_ids_str"` // Dunno how to type-convert an array
|
||||
PinnedTweetIdsStr []string `json:"pinned_tweet_ids_str"` // Dunno how to type-convert an array
|
||||
ProfileBannerURL string `json:"profile_banner_url"`
|
||||
ProfileImageURLHTTPS string `json:"profile_image_url_https"`
|
||||
Protected bool `json:"protected"`
|
||||
@ -246,7 +245,6 @@ type APIUser struct {
|
||||
DoesntExist bool
|
||||
}
|
||||
|
||||
|
||||
type UserResponse struct {
|
||||
Data struct {
|
||||
User struct {
|
||||
@ -255,11 +253,12 @@ type UserResponse struct {
|
||||
} `json:"user"`
|
||||
} `json:"data"`
|
||||
Errors []struct {
|
||||
Message string `json:"message"`
|
||||
Name string `json:"name"`
|
||||
Code int `json:"code"`
|
||||
Message string `json:"message"`
|
||||
Name string `json:"name"`
|
||||
Code int `json:"code"`
|
||||
} `json:"errors"`
|
||||
}
|
||||
|
||||
func (u UserResponse) ConvertToAPIUser() APIUser {
|
||||
ret := u.Data.User.Legacy
|
||||
ret.ID = u.Data.User.ID
|
||||
@ -279,9 +278,9 @@ func (u UserResponse) ConvertToAPIUser() APIUser {
|
||||
}
|
||||
|
||||
type Entry struct {
|
||||
EntryID string `json:"entryId"`
|
||||
SortIndex int64 `json:"sortIndex,string"`
|
||||
Content struct {
|
||||
EntryID string `json:"entryId"`
|
||||
SortIndex int64 `json:"sortIndex,string"`
|
||||
Content struct {
|
||||
Item struct {
|
||||
Content struct {
|
||||
Tombstone struct {
|
||||
@ -303,12 +302,15 @@ type Entry struct {
|
||||
} `json:"operation"`
|
||||
} `json:"content"`
|
||||
}
|
||||
|
||||
func (e Entry) GetTombstoneText() string {
|
||||
return e.Content.Item.Content.Tombstone.TombstoneInfo.RichText.Text
|
||||
}
|
||||
|
||||
type SortableEntries []Entry
|
||||
func (e SortableEntries) Len() int { return len(e) }
|
||||
func (e SortableEntries) Swap(i, j int) { e[i], e[j] = e[j], e[i] }
|
||||
|
||||
func (e SortableEntries) Len() int { return len(e) }
|
||||
func (e SortableEntries) Swap(i, j int) { e[i], e[j] = e[j], e[i] }
|
||||
func (e SortableEntries) Less(i, j int) bool { return e[i].SortIndex > e[j].SortIndex }
|
||||
|
||||
type TweetResponse struct {
|
||||
@ -329,15 +331,16 @@ type TweetResponse struct {
|
||||
}
|
||||
|
||||
var tombstone_types = map[string]string{
|
||||
"This Tweet was deleted by the Tweet author. Learn more": "deleted",
|
||||
"This Tweet is from a suspended account. Learn more": "suspended",
|
||||
"This Tweet was deleted by the Tweet author. Learn more": "deleted",
|
||||
"This Tweet is from a suspended account. Learn more": "suspended",
|
||||
"You’re unable to view this Tweet because this account owner limits who can view their Tweets. Learn more": "hidden",
|
||||
"This Tweet is unavailable. Learn more": "unavailable",
|
||||
"This Tweet violated the Twitter Rules. Learn more": "violated",
|
||||
"This Tweet is from an account that no longer exists. Learn more": "no longer exists",
|
||||
"This Tweet is unavailable. Learn more": "unavailable",
|
||||
"This Tweet violated the Twitter Rules. Learn more": "violated",
|
||||
"This Tweet is from an account that no longer exists. Learn more": "no longer exists",
|
||||
"Age-restricted adult content. This content might not be appropriate for people under 18 years old. To view this media, " +
|
||||
"you’ll need to log in to Twitter. Learn more": "age-restricted",
|
||||
}
|
||||
|
||||
/**
|
||||
* Insert tweets into GlobalObjects for each tombstone. Returns a list of users that need to
|
||||
* be fetched for tombstones.
|
||||
@ -379,8 +382,8 @@ func (t *TweetResponse) HandleTombstones() []UserHandle {
|
||||
if entry.GetTombstoneText() != "" {
|
||||
// Try to reconstruct the tombstone tweet
|
||||
var tombstoned_tweet APITweet
|
||||
tombstoned_tweet.ID = int64(i) // Set a default to prevent clobbering other tombstones
|
||||
if i + 1 < len(entries) && entries[i+1].Content.Item.Content.Tweet.ID != 0 {
|
||||
tombstoned_tweet.ID = int64(i) // Set a default to prevent clobbering other tombstones
|
||||
if i+1 < len(entries) && entries[i+1].Content.Item.Content.Tweet.ID != 0 {
|
||||
next_tweet_id := entries[i+1].Content.Item.Content.Tweet.ID
|
||||
api_tweet, ok := t.GlobalObjects.Tweets[fmt.Sprint(next_tweet_id)]
|
||||
if !ok {
|
||||
@ -390,7 +393,7 @@ func (t *TweetResponse) HandleTombstones() []UserHandle {
|
||||
tombstoned_tweet.UserID = api_tweet.InReplyToUserID
|
||||
ret = append(ret, UserHandle(api_tweet.InReplyToScreenName))
|
||||
}
|
||||
if i - 1 >= 0 && entries[i-1].Content.Item.Content.Tweet.ID != 0 {
|
||||
if i-1 >= 0 && entries[i-1].Content.Item.Content.Tweet.ID != 0 {
|
||||
prev_tweet_id := entries[i-1].Content.Item.Content.Tweet.ID
|
||||
_, ok := t.GlobalObjects.Tweets[fmt.Sprint(prev_tweet_id)]
|
||||
if !ok {
|
||||
@ -416,7 +419,7 @@ func (t *TweetResponse) HandleTombstones() []UserHandle {
|
||||
func (t *TweetResponse) GetCursor() string {
|
||||
entries := t.Timeline.Instructions[0].AddEntries.Entries
|
||||
if len(entries) > 0 {
|
||||
last_entry := entries[len(entries) - 1]
|
||||
last_entry := entries[len(entries)-1]
|
||||
if strings.Contains(last_entry.EntryID, "cursor") {
|
||||
return last_entry.Content.Operation.Cursor.Value
|
||||
}
|
||||
@ -424,7 +427,7 @@ func (t *TweetResponse) GetCursor() string {
|
||||
|
||||
// Next, try the other format ("replaceEntry")
|
||||
instructions := t.Timeline.Instructions
|
||||
last_replace_entry := instructions[len(instructions) - 1].ReplaceEntry.Entry
|
||||
last_replace_entry := instructions[len(instructions)-1].ReplaceEntry.Entry
|
||||
if strings.Contains(last_replace_entry.EntryID, "cursor") {
|
||||
return last_replace_entry.Content.Operation.Cursor.Value
|
||||
}
|
||||
@ -450,7 +453,6 @@ func (t *TweetResponse) IsEndOfFeed() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
|
||||
func idstr_to_int(idstr string) int64 {
|
||||
id, err := strconv.Atoi(idstr)
|
||||
if err != nil {
|
||||
|
@ -1,9 +1,9 @@
|
||||
package scraper_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"os"
|
||||
"encoding/json"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
@ -11,17 +11,16 @@ import (
|
||||
. "offline_twitter/scraper"
|
||||
)
|
||||
|
||||
|
||||
func TestNormalizeContent(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
test_cases := []struct {
|
||||
filename string
|
||||
eventual_full_text string
|
||||
quoted_status_id TweetID
|
||||
in_reply_to_id TweetID
|
||||
filename string
|
||||
eventual_full_text string
|
||||
quoted_status_id TweetID
|
||||
in_reply_to_id TweetID
|
||||
retweeted_status_id TweetID
|
||||
reply_mentions string
|
||||
} {
|
||||
reply_mentions string
|
||||
}{
|
||||
{"test_responses/single_tweets/tweet_that_is_a_reply_with_gif.json", "", 0, 1395882872729477131, 0, "@michaelmalice"},
|
||||
{"test_responses/single_tweets/tweet_with_image.json", "this saddens me every time", 0, 0, 0, ""},
|
||||
{"test_responses/single_tweets/tweet_that_is_a_reply.json", "Noted", 0, 1396194494710788100, 0, "@RvaTeddy @michaelmalice"},
|
||||
@ -48,7 +47,7 @@ func TestNormalizeContent(t *testing.T) {
|
||||
}
|
||||
var tweet APITweet
|
||||
err = json.Unmarshal(data, &tweet)
|
||||
assert.NoError(err, "Failed at " + v.filename)
|
||||
assert.NoError(err, "Failed at "+v.filename)
|
||||
|
||||
tweet.NormalizeContent()
|
||||
|
||||
@ -60,7 +59,6 @@ func TestNormalizeContent(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
func TestUserProfileToAPIUser(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
data, err := os.ReadFile("test_responses/michael_malice_user_profile.json")
|
||||
@ -76,7 +74,6 @@ func TestUserProfileToAPIUser(t *testing.T) {
|
||||
assert.Equal(user_resp.Data.User.Legacy.FollowersCount, result.FollowersCount)
|
||||
}
|
||||
|
||||
|
||||
func TestGetCursor(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
data, err := os.ReadFile("test_responses/midriffs_anarchist_cookbook.json")
|
||||
@ -91,13 +88,12 @@ func TestGetCursor(t *testing.T) {
|
||||
tweet_resp.GetCursor())
|
||||
}
|
||||
|
||||
|
||||
func TestIsEndOfFeed(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
test_cases := []struct {
|
||||
filename string
|
||||
filename string
|
||||
is_end_of_feed bool
|
||||
} {
|
||||
}{
|
||||
{"test_responses/michael_malice_feed.json", false},
|
||||
{"test_responses/kwiber_end_of_feed.json", true},
|
||||
}
|
||||
@ -113,7 +109,6 @@ func TestIsEndOfFeed(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
func TestHandleTombstonesHidden(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
data, err := os.ReadFile("test_responses/tombstones/tombstone_hidden_1.json")
|
||||
|
@ -13,13 +13,13 @@ import (
|
||||
)
|
||||
|
||||
type CardValue struct {
|
||||
Type string `json:"type"`
|
||||
Type string `json:"type"`
|
||||
StringValue string `json:"string_value"`
|
||||
ImageValue struct {
|
||||
ImageValue struct {
|
||||
AltText string `json:"alt"`
|
||||
Height int `json:"height"`
|
||||
Width int `json:"width"`
|
||||
Url string `json:"url"`
|
||||
Height int `json:"height"`
|
||||
Width int `json:"width"`
|
||||
Url string `json:"url"`
|
||||
} `json:"image_value"`
|
||||
UserValue struct {
|
||||
ID int64 `json:"id_str,string"`
|
||||
@ -30,13 +30,14 @@ type CardValue struct {
|
||||
type APIV2Card struct {
|
||||
Legacy struct {
|
||||
BindingValues []struct {
|
||||
Key string `json:"key"`
|
||||
Key string `json:"key"`
|
||||
Value CardValue `json:"value"`
|
||||
} `json:"binding_values"`
|
||||
Name string `json:"name"`
|
||||
Url string `json:"url"`
|
||||
Url string `json:"url"`
|
||||
} `json:"legacy"`
|
||||
}
|
||||
|
||||
func (card APIV2Card) ParseAsUrl() Url {
|
||||
values := make(map[string]CardValue)
|
||||
for _, obj := range card.Legacy.BindingValues {
|
||||
@ -121,6 +122,7 @@ type APIV2UserResult struct {
|
||||
} `json:"result"`
|
||||
} `json:"user_results"`
|
||||
}
|
||||
|
||||
func (u APIV2UserResult) ToUser() User {
|
||||
user, err := ParseSingleUser(u.UserResults.Result.Legacy)
|
||||
if err != nil {
|
||||
@ -131,16 +133,16 @@ func (u APIV2UserResult) ToUser() User {
|
||||
}
|
||||
|
||||
type _Result struct {
|
||||
ID int64 `json:"rest_id,string"`
|
||||
Legacy APIV2Tweet `json:"legacy"`
|
||||
ID int64 `json:"rest_id,string"`
|
||||
Legacy APIV2Tweet `json:"legacy"`
|
||||
Tombstone *struct {
|
||||
Text struct {
|
||||
Text string `json:"text"`
|
||||
} `json:"text"`
|
||||
} `json:"tombstone"`
|
||||
Core *APIV2UserResult `json:"core"`
|
||||
Card APIV2Card `json:"card"`
|
||||
QuotedStatusResult *APIV2Result `json:"quoted_status_result"`
|
||||
Core *APIV2UserResult `json:"core"`
|
||||
Card APIV2Card `json:"card"`
|
||||
QuotedStatusResult *APIV2Result `json:"quoted_status_result"`
|
||||
}
|
||||
|
||||
type APIV2Result struct {
|
||||
@ -149,11 +151,12 @@ type APIV2Result struct {
|
||||
Tweet _Result `json:"tweet"`
|
||||
} `json:"result"`
|
||||
}
|
||||
|
||||
func (api_result APIV2Result) ToTweetTrove(ignore_null_entries bool) TweetTrove {
|
||||
ret := NewTweetTrove()
|
||||
|
||||
// Start by checking if this is a null entry in a feed
|
||||
if api_result.Result.Tombstone != nil && ignore_null_entries{
|
||||
if api_result.Result.Tombstone != nil && ignore_null_entries {
|
||||
// TODO: this is becoming really spaghetti. Why do we need a separate execution path for this?
|
||||
return ret
|
||||
}
|
||||
@ -221,7 +224,7 @@ func (api_result APIV2Result) ToTweetTrove(ignore_null_entries bool) TweetTrove
|
||||
continue
|
||||
}
|
||||
found = true
|
||||
url.Text = main_tweet.Urls[i].Text // Copy the expanded URL over, since the card doesn't have it in the new API
|
||||
url.Text = main_tweet.Urls[i].Text // Copy the expanded URL over, since the card doesn't have it in the new API
|
||||
main_tweet.Urls[i] = url
|
||||
}
|
||||
if !found {
|
||||
@ -245,6 +248,7 @@ type APIV2Tweet struct {
|
||||
RetweetedStatusResult *APIV2Result `json:"retweeted_status_result"`
|
||||
APITweet
|
||||
}
|
||||
|
||||
func (api_v2_tweet APIV2Tweet) ToTweetTrove() TweetTrove {
|
||||
ret := NewTweetTrove()
|
||||
|
||||
@ -253,7 +257,6 @@ func (api_v2_tweet APIV2Tweet) ToTweetTrove() TweetTrove {
|
||||
orig_tweet_trove := api_v2_tweet.RetweetedStatusResult.ToTweetTrove(false)
|
||||
ret.MergeWith(orig_tweet_trove)
|
||||
|
||||
|
||||
retweet := Retweet{}
|
||||
var err error
|
||||
retweet.RetweetID = TweetID(api_v2_tweet.ID)
|
||||
@ -277,25 +280,24 @@ func (api_v2_tweet APIV2Tweet) ToTweetTrove() TweetTrove {
|
||||
}
|
||||
|
||||
type APIV2Entry struct {
|
||||
EntryID string `json:"entryId"`
|
||||
SortIndex int64 `json:"sortIndex,string"`
|
||||
Content struct {
|
||||
EntryID string `json:"entryId"`
|
||||
SortIndex int64 `json:"sortIndex,string"`
|
||||
Content struct {
|
||||
ItemContent struct {
|
||||
EntryType string `json:"entryType"`
|
||||
EntryType string `json:"entryType"`
|
||||
TweetResults APIV2Result `json:"tweet_results"`
|
||||
} `json:"itemContent"`
|
||||
|
||||
// Cursors
|
||||
EntryType string `json:"entryType"`
|
||||
Value string `json:"value"`
|
||||
EntryType string `json:"entryType"`
|
||||
Value string `json:"value"`
|
||||
CursorType string `json:"cursorType"`
|
||||
|
||||
} `json:"content"`
|
||||
}
|
||||
|
||||
type APIV2Instruction struct {
|
||||
Type string `json:"type"`
|
||||
Entries []APIV2Entry`json:"entries"`
|
||||
Type string `json:"type"`
|
||||
Entries []APIV2Entry `json:"entries"`
|
||||
}
|
||||
|
||||
type APIV2Response struct {
|
||||
@ -324,7 +326,7 @@ func (api_response APIV2Response) GetMainInstruction() *APIV2Instruction {
|
||||
|
||||
func (api_response APIV2Response) GetCursorBottom() string {
|
||||
entries := api_response.GetMainInstruction().Entries
|
||||
last_entry := entries[len(entries) - 1]
|
||||
last_entry := entries[len(entries)-1]
|
||||
if last_entry.Content.CursorType != "Bottom" {
|
||||
panic("No bottom cursor found")
|
||||
}
|
||||
@ -349,7 +351,7 @@ func (api_response APIV2Response) IsEmpty() bool {
|
||||
*/
|
||||
func (api_response APIV2Response) ToTweetTrove() (TweetTrove, error) {
|
||||
ret := NewTweetTrove()
|
||||
for _, entry := range api_response.GetMainInstruction().Entries { // TODO: the second Instruction is the pinned tweet
|
||||
for _, entry := range api_response.GetMainInstruction().Entries { // TODO: the second Instruction is the pinned tweet
|
||||
if !strings.HasPrefix(entry.EntryID, "tweet-") {
|
||||
continue
|
||||
}
|
||||
@ -363,12 +365,11 @@ func (api_response APIV2Response) ToTweetTrove() (TweetTrove, error) {
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
|
||||
func get_graphql_user_timeline_url(user_id UserID, cursor string) string {
|
||||
if cursor != "" {
|
||||
return "https://twitter.com/i/api/graphql/CwLU7qTfeu0doqhSr6tW4A/UserTweetsAndReplies?variables=%7B%22userId%22%3A%22" + fmt.Sprint(user_id) + "%22%2C%22count%22%3A40%2C%22cursor%22%3A%22" + url.QueryEscape(cursor) + "%22%2C%22includePromotedContent%22%3Atrue%2C%22withCommunity%22%3Atrue%2C%22withSuperFollowsUserFields%22%3Atrue%2C%22withBirdwatchPivots%22%3Afalse%2C%22withDownvotePerspective%22%3Afalse%2C%22withReactionsMetadata%22%3Afalse%2C%22withReactionsPerspective%22%3Afalse%2C%22withSuperFollowsTweetFields%22%3Atrue%2C%22withVoice%22%3Atrue%2C%22withV2Timeline%22%3Afalse%2C%22__fs_interactive_text%22%3Afalse%2C%22__fs_responsive_web_uc_gql_enabled%22%3Afalse%2C%22__fs_dont_mention_me_view_api_enabled%22%3Afalse%7D" // nolint:lll // It's a URL, come on
|
||||
return "https://twitter.com/i/api/graphql/CwLU7qTfeu0doqhSr6tW4A/UserTweetsAndReplies?variables=%7B%22userId%22%3A%22" + fmt.Sprint(user_id) + "%22%2C%22count%22%3A40%2C%22cursor%22%3A%22" + url.QueryEscape(cursor) + "%22%2C%22includePromotedContent%22%3Atrue%2C%22withCommunity%22%3Atrue%2C%22withSuperFollowsUserFields%22%3Atrue%2C%22withBirdwatchPivots%22%3Afalse%2C%22withDownvotePerspective%22%3Afalse%2C%22withReactionsMetadata%22%3Afalse%2C%22withReactionsPerspective%22%3Afalse%2C%22withSuperFollowsTweetFields%22%3Atrue%2C%22withVoice%22%3Atrue%2C%22withV2Timeline%22%3Afalse%2C%22__fs_interactive_text%22%3Afalse%2C%22__fs_responsive_web_uc_gql_enabled%22%3Afalse%2C%22__fs_dont_mention_me_view_api_enabled%22%3Afalse%7D" // nolint:lll // It's a URL, come on
|
||||
}
|
||||
return "https://twitter.com/i/api/graphql/CwLU7qTfeu0doqhSr6tW4A/UserTweetsAndReplies?variables=%7B%22userId%22%3A%22" + fmt.Sprint(user_id) + "%22%2C%22count%22%3A40%2C%22includePromotedContent%22%3Afalse%2C%22withCommunity%22%3Atrue%2C%22withSuperFollowsUserFields%22%3Atrue%2C%22withBirdwatchPivots%22%3Afalse%2C%22withDownvotePerspective%22%3Afalse%2C%22withReactionsMetadata%22%3Afalse%2C%22withReactionsPerspective%22%3Afalse%2C%22withSuperFollowsTweetFields%22%3Atrue%2C%22withVoice%22%3Atrue%2C%22withV2Timeline%22%3Afalse%2C%22__fs_interactive_text%22%3Afalse%2C%22__fs_dont_mention_me_view_api_enabled%22%3Afalse%7D" // nolint:lll // It's a URL, come on
|
||||
return "https://twitter.com/i/api/graphql/CwLU7qTfeu0doqhSr6tW4A/UserTweetsAndReplies?variables=%7B%22userId%22%3A%22" + fmt.Sprint(user_id) + "%22%2C%22count%22%3A40%2C%22includePromotedContent%22%3Afalse%2C%22withCommunity%22%3Atrue%2C%22withSuperFollowsUserFields%22%3Atrue%2C%22withBirdwatchPivots%22%3Afalse%2C%22withDownvotePerspective%22%3Afalse%2C%22withReactionsMetadata%22%3Afalse%2C%22withReactionsPerspective%22%3Afalse%2C%22withSuperFollowsTweetFields%22%3Atrue%2C%22withVoice%22%3Atrue%2C%22withV2Timeline%22%3Afalse%2C%22__fs_interactive_text%22%3Afalse%2C%22__fs_dont_mention_me_view_api_enabled%22%3Afalse%7D" // nolint:lll // It's a URL, come on
|
||||
}
|
||||
|
||||
/**
|
||||
@ -446,7 +447,7 @@ func (api API) GetMoreTweetsFromGraphqlFeed(user_id UserID, response *APIV2Respo
|
||||
}
|
||||
if fresh_response.IsEmpty() {
|
||||
// Response has a pinned tweet, but no other content: end of feed has been reached
|
||||
return END_OF_FEED // TODO: check that there actually is a pinned tweet and the request didn't just fail lol
|
||||
return END_OF_FEED // TODO: check that there actually is a pinned tweet and the request didn't just fail lol
|
||||
}
|
||||
|
||||
last_response = &fresh_response
|
||||
|
@ -1,10 +1,10 @@
|
||||
package scraper_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"os"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
@ -34,7 +34,7 @@ func TestAPIV2ParseUser(t *testing.T) {
|
||||
assert.Equal(user.ID, UserID(44067298))
|
||||
assert.Equal(user.DisplayName, "Michael Malice")
|
||||
assert.Equal(user.Handle, UserHandle("michaelmalice"))
|
||||
assert.Equal(user.Bio, "Author of Dear Reader, The New Right & The Anarchist Handbook\nHost of \"YOUR WELCOME\" \nSubject of Ego & " +
|
||||
assert.Equal(user.Bio, "Author of Dear Reader, The New Right & The Anarchist Handbook\nHost of \"YOUR WELCOME\" \nSubject of Ego & "+
|
||||
"Hubris by Harvey Pekar\nHe/Him ⚑\n@SheathUnderwear Model")
|
||||
assert.Equal(user.FollowingCount, 964)
|
||||
assert.Equal(user.FollowersCount, 334571)
|
||||
@ -70,7 +70,7 @@ func TestAPIV2ParseTweet(t *testing.T) {
|
||||
assert.True(ok)
|
||||
assert.Equal(tweet.ID, TweetID(1485708879174508550))
|
||||
assert.Equal(tweet.UserID, UserID(44067298))
|
||||
assert.Equal(tweet.Text, "If Boris Johnson is driven out of office, it wouldn't mark the first time the Tories had four PMs in a " +
|
||||
assert.Equal(tweet.Text, "If Boris Johnson is driven out of office, it wouldn't mark the first time the Tories had four PMs in a "+
|
||||
"row\nThey had previously governed the UK for 13 years with 4 PMs, from 1951-1964")
|
||||
assert.Equal(tweet.PostedAt.Unix(), int64(1643055574))
|
||||
assert.Equal(tweet.QuotedTweetID, TweetID(0))
|
||||
@ -133,7 +133,7 @@ func TestAPIV2ParseTweetWithQuotedTweet(t *testing.T) {
|
||||
assert.True(ok)
|
||||
assert.Equal(TweetID(1485690410899021826), quote_tweet.ID)
|
||||
assert.Equal(TweetID(1485690069079846915), quote_tweet.QuotedTweetID)
|
||||
assert.Equal("Hatred is powerless in and of itself despite all the agitprop to the contrary\nHatred didnt stop Trump's election, " +
|
||||
assert.Equal("Hatred is powerless in and of itself despite all the agitprop to the contrary\nHatred didnt stop Trump's election, "+
|
||||
"for example", quote_tweet.Text)
|
||||
|
||||
// Should be 2 users: quoter and quoted
|
||||
@ -182,7 +182,7 @@ func TestAPIV2ParseRetweet(t *testing.T) {
|
||||
// Check the video
|
||||
v := tweet.Videos[0]
|
||||
assert.Equal("https://pbs.twimg.com/ext_tw_video_thumb/1485627274594590721/pu/img/O6mMKrsqWl8WcMy1.jpg", v.ThumbnailRemoteUrl)
|
||||
assert.Equal(0, v.ViewCount) // TODO: make this work
|
||||
assert.Equal(0, v.ViewCount) // TODO: make this work
|
||||
assert.Equal(720, v.Height)
|
||||
assert.Equal(720, v.Width)
|
||||
assert.Equal(30066, v.Duration)
|
||||
@ -200,7 +200,6 @@ func TestAPIV2ParseRetweet(t *testing.T) {
|
||||
assert.Equal(UserID(44067298), retweeting_user.ID)
|
||||
assert.Equal(UserHandle("michaelmalice"), retweeting_user.Handle)
|
||||
|
||||
|
||||
// Should be 1 retweet
|
||||
assert.Equal(1, len(trove.Retweets))
|
||||
retweet, ok := trove.Retweets[1485699748514476037]
|
||||
@ -270,7 +269,6 @@ func TestAPIV2ParseRetweetedQuoteTweet(t *testing.T) {
|
||||
assert.Equal(UserID(599817378), retweet.RetweetedByID)
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Parse tweet with quoted tombstone
|
||||
*/
|
||||
@ -300,13 +298,12 @@ func TestAPIV2ParseTweetWithQuotedTombstone(t *testing.T) {
|
||||
assert.True(ok)
|
||||
assert.Equal(TweetID(1485774025347371008), tombstoned_tweet.ID)
|
||||
assert.Equal("no longer exists", tombstoned_tweet.TombstoneType)
|
||||
assert.True (tombstoned_tweet.IsStub)
|
||||
assert.True(tombstoned_tweet.IsStub)
|
||||
assert.Equal(UserHandle("coltnkat"), tombstoned_tweet.UserHandle)
|
||||
|
||||
assert.Equal(0, len(trove.Retweets))
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Parse a tweet with a link
|
||||
*/
|
||||
@ -326,7 +323,7 @@ func TestAPIV2ParseTweetWithURL(t *testing.T) {
|
||||
assert.Equal(1, len(trove.Tweets))
|
||||
tweet, ok := trove.Tweets[1485695695025803264]
|
||||
assert.True(ok)
|
||||
assert.Equal("This led to what I discussed as \"anguish signaling,\" where progs competed in proclaiming their distress both to " +
|
||||
assert.Equal("This led to what I discussed as \"anguish signaling,\" where progs competed in proclaiming their distress both to "+
|
||||
"show they were the Good Guys but also to get the pack to regroup, akin to wolves howling.", tweet.Text)
|
||||
|
||||
assert.Equal(1, len(tweet.Urls))
|
||||
@ -335,7 +332,7 @@ func TestAPIV2ParseTweetWithURL(t *testing.T) {
|
||||
assert.Equal("observer.com", url.Domain)
|
||||
assert.Equal("Why Evangelical Progressives Need to Demonstrate Anguish Publicly", url.Title)
|
||||
assert.Equal("https://observer.com/2016/12/why-evangelical-progressives-need-to-demonstrate-anguish-publicly/", url.Text)
|
||||
assert.Equal("The concept of “virtue signaling” gained a great deal of currency in this past year. It’s a way to demonstrate to " +
|
||||
assert.Equal("The concept of “virtue signaling” gained a great deal of currency in this past year. It’s a way to demonstrate to "+
|
||||
"others that one is a good person without having to do anything", url.Description)
|
||||
assert.Equal("https://pbs.twimg.com/card_img/1485694664640507911/WsproWyP?format=jpg&name=600x600", url.ThumbnailRemoteUrl)
|
||||
assert.Equal(600, url.ThumbnailWidth)
|
||||
@ -439,10 +436,9 @@ func TestAPIV2ParseTweetWithPoll(t *testing.T) {
|
||||
|
||||
assert.Equal(int64(1643137976), poll.VotingEndsAt.Unix())
|
||||
assert.Equal(int64(1643055638), poll.LastUpdatedAt.Unix())
|
||||
assert.Equal(1440 * 60, poll.VotingDuration)
|
||||
assert.Equal(1440*60, poll.VotingDuration)
|
||||
}
|
||||
|
||||
|
||||
func TestParseAPIV2UserFeed(t *testing.T) {
|
||||
data, err := os.ReadFile("test_responses/api_v2/user_feed_apiv2.json")
|
||||
if err != nil {
|
||||
@ -495,7 +491,6 @@ func TestParseAPIV2UserFeed(t *testing.T) {
|
||||
fmt.Printf("%d Users, %d Tweets, %d Retweets\n", len(tweet_trove.Users), len(tweet_trove.Tweets), len(tweet_trove.Retweets))
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Should correctly identify an "empty" response
|
||||
*/
|
||||
@ -562,13 +557,12 @@ func TestAPIV2TombstoneEntry(t *testing.T) {
|
||||
err = json.Unmarshal(data, &tweet_result)
|
||||
require.NoError(t, err)
|
||||
|
||||
trove := tweet_result.ToTweetTrove(true) // 'true' indicates to ignore empty entries
|
||||
trove := tweet_result.ToTweetTrove(true) // 'true' indicates to ignore empty entries
|
||||
assert.Len(trove.Tweets, 0)
|
||||
assert.Len(trove.Users, 0)
|
||||
assert.Len(trove.Retweets, 0)
|
||||
}
|
||||
|
||||
|
||||
func TestTweetWithWarning(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
data, err := os.ReadFile("test_responses/api_v2/tweet_with_warning.json")
|
||||
|
@ -1,29 +1,29 @@
|
||||
package scraper
|
||||
|
||||
import (
|
||||
"path"
|
||||
"path"
|
||||
)
|
||||
|
||||
type ImageID int64
|
||||
|
||||
type Image struct {
|
||||
ID ImageID
|
||||
TweetID TweetID
|
||||
Width int
|
||||
Height int
|
||||
RemoteURL string
|
||||
LocalFilename string
|
||||
IsDownloaded bool
|
||||
ID ImageID
|
||||
TweetID TweetID
|
||||
Width int
|
||||
Height int
|
||||
RemoteURL string
|
||||
LocalFilename string
|
||||
IsDownloaded bool
|
||||
}
|
||||
|
||||
func ParseAPIMedia(apiMedia APIMedia) Image {
|
||||
local_filename := path.Base(apiMedia.MediaURLHttps)
|
||||
return Image{
|
||||
ID: ImageID(apiMedia.ID),
|
||||
RemoteURL: apiMedia.MediaURLHttps,
|
||||
Width: apiMedia.OriginalInfo.Width,
|
||||
Height: apiMedia.OriginalInfo.Height,
|
||||
LocalFilename: local_filename,
|
||||
IsDownloaded: false,
|
||||
}
|
||||
local_filename := path.Base(apiMedia.MediaURLHttps)
|
||||
return Image{
|
||||
ID: ImageID(apiMedia.ID),
|
||||
RemoteURL: apiMedia.MediaURLHttps,
|
||||
Width: apiMedia.OriginalInfo.Width,
|
||||
Height: apiMedia.OriginalInfo.Height,
|
||||
LocalFilename: local_filename,
|
||||
IsDownloaded: false,
|
||||
}
|
||||
}
|
||||
|
@ -1,31 +1,31 @@
|
||||
package scraper_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"os"
|
||||
"encoding/json"
|
||||
"encoding/json"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
. "offline_twitter/scraper"
|
||||
. "offline_twitter/scraper"
|
||||
)
|
||||
|
||||
func TestParseAPIMedia(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
data, err := os.ReadFile("test_responses/tweet_content/image.json")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
var apimedia APIMedia
|
||||
err = json.Unmarshal(data, &apimedia)
|
||||
require.NoError(t, err)
|
||||
assert := assert.New(t)
|
||||
data, err := os.ReadFile("test_responses/tweet_content/image.json")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
var apimedia APIMedia
|
||||
err = json.Unmarshal(data, &apimedia)
|
||||
require.NoError(t, err)
|
||||
|
||||
image := ParseAPIMedia(apimedia)
|
||||
assert.Equal(ImageID(1395882862289772553), image.ID)
|
||||
assert.Equal("https://pbs.twimg.com/media/E18sEUrWYAk8dBl.jpg", image.RemoteURL)
|
||||
assert.Equal(593, image.Width)
|
||||
assert.Equal(239, image.Height)
|
||||
assert.Equal("E18sEUrWYAk8dBl.jpg", image.LocalFilename)
|
||||
assert.False(image.IsDownloaded)
|
||||
image := ParseAPIMedia(apimedia)
|
||||
assert.Equal(ImageID(1395882862289772553), image.ID)
|
||||
assert.Equal("https://pbs.twimg.com/media/E18sEUrWYAk8dBl.jpg", image.RemoteURL)
|
||||
assert.Equal(593, image.Width)
|
||||
assert.Equal(239, image.Height)
|
||||
assert.Equal("E18sEUrWYAk8dBl.jpg", image.LocalFilename)
|
||||
assert.False(image.IsDownloaded)
|
||||
}
|
||||
|
@ -2,11 +2,10 @@ package scraper
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
"net/http"
|
||||
"time"
|
||||
)
|
||||
|
||||
|
||||
/**
|
||||
* Return the expanded version of a short URL. Input must be a real short URL.
|
||||
*/
|
||||
@ -21,7 +20,7 @@ func ExpandShortUrl(short_url string) string {
|
||||
|
||||
resp, err := client.Get(short_url)
|
||||
if err != nil {
|
||||
panic(err) // TODO: handle timeouts
|
||||
panic(err) // TODO: handle timeouts
|
||||
}
|
||||
if resp.StatusCode != 301 {
|
||||
panic(fmt.Errorf("Unknown status code returned when expanding short url %q: %s\n %w", short_url, resp.Status, EXTERNAL_API_ERROR))
|
||||
|
@ -6,12 +6,11 @@ import (
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
. "offline_twitter/scraper"
|
||||
)
|
||||
|
||||
|
||||
func TestExpandShortUrl(t *testing.T) {
|
||||
redirecting_to := "redirect target"
|
||||
srvr := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
|
||||
|
112
scraper/poll.go
112
scraper/poll.go
@ -1,82 +1,82 @@
|
||||
package scraper
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"strconv"
|
||||
"net/url"
|
||||
"net/url"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type PollID int64
|
||||
|
||||
type Poll struct {
|
||||
ID PollID
|
||||
TweetID TweetID
|
||||
NumChoices int
|
||||
ID PollID
|
||||
TweetID TweetID
|
||||
NumChoices int
|
||||
|
||||
Choice1 string
|
||||
Choice1_Votes int
|
||||
Choice2 string
|
||||
Choice2_Votes int
|
||||
Choice3 string
|
||||
Choice3_Votes int
|
||||
Choice4 string
|
||||
Choice4_Votes int
|
||||
Choice1 string
|
||||
Choice1_Votes int
|
||||
Choice2 string
|
||||
Choice2_Votes int
|
||||
Choice3 string
|
||||
Choice3_Votes int
|
||||
Choice4 string
|
||||
Choice4_Votes int
|
||||
|
||||
VotingDuration int // In seconds
|
||||
VotingEndsAt Timestamp
|
||||
VotingDuration int // In seconds
|
||||
VotingEndsAt Timestamp
|
||||
|
||||
LastUpdatedAt Timestamp `db:"last_scraped_at"`
|
||||
LastUpdatedAt Timestamp `db:"last_scraped_at"`
|
||||
}
|
||||
|
||||
func ParseAPIPoll(apiCard APICard) Poll {
|
||||
card_url, err := url.Parse(apiCard.ShortenedUrl)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
id := int_or_panic(card_url.Hostname())
|
||||
card_url, err := url.Parse(apiCard.ShortenedUrl)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
id := int_or_panic(card_url.Hostname())
|
||||
|
||||
ret := Poll{}
|
||||
ret.ID = PollID(id)
|
||||
ret.NumChoices = parse_num_choices(apiCard.Name)
|
||||
ret.VotingDuration = int_or_panic(apiCard.BindingValues.DurationMinutes.StringValue) * 60
|
||||
ret.VotingEndsAt, err = TimestampFromString(apiCard.BindingValues.EndDatetimeUTC.StringValue)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
ret.LastUpdatedAt, err = TimestampFromString(apiCard.BindingValues.LastUpdatedAt.StringValue)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
ret := Poll{}
|
||||
ret.ID = PollID(id)
|
||||
ret.NumChoices = parse_num_choices(apiCard.Name)
|
||||
ret.VotingDuration = int_or_panic(apiCard.BindingValues.DurationMinutes.StringValue) * 60
|
||||
ret.VotingEndsAt, err = TimestampFromString(apiCard.BindingValues.EndDatetimeUTC.StringValue)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
ret.LastUpdatedAt, err = TimestampFromString(apiCard.BindingValues.LastUpdatedAt.StringValue)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
ret.Choice1 = apiCard.BindingValues.Choice1.StringValue
|
||||
ret.Choice1_Votes = int_or_panic(apiCard.BindingValues.Choice1_Count.StringValue)
|
||||
ret.Choice2 = apiCard.BindingValues.Choice2.StringValue
|
||||
ret.Choice2_Votes = int_or_panic(apiCard.BindingValues.Choice2_Count.StringValue)
|
||||
ret.Choice1 = apiCard.BindingValues.Choice1.StringValue
|
||||
ret.Choice1_Votes = int_or_panic(apiCard.BindingValues.Choice1_Count.StringValue)
|
||||
ret.Choice2 = apiCard.BindingValues.Choice2.StringValue
|
||||
ret.Choice2_Votes = int_or_panic(apiCard.BindingValues.Choice2_Count.StringValue)
|
||||
|
||||
if ret.NumChoices > 2 {
|
||||
ret.Choice3 = apiCard.BindingValues.Choice3.StringValue
|
||||
ret.Choice3_Votes = int_or_panic(apiCard.BindingValues.Choice3_Count.StringValue)
|
||||
}
|
||||
if ret.NumChoices > 3 {
|
||||
ret.Choice4 = apiCard.BindingValues.Choice4.StringValue
|
||||
ret.Choice4_Votes = int_or_panic(apiCard.BindingValues.Choice4_Count.StringValue)
|
||||
}
|
||||
if ret.NumChoices > 2 {
|
||||
ret.Choice3 = apiCard.BindingValues.Choice3.StringValue
|
||||
ret.Choice3_Votes = int_or_panic(apiCard.BindingValues.Choice3_Count.StringValue)
|
||||
}
|
||||
if ret.NumChoices > 3 {
|
||||
ret.Choice4 = apiCard.BindingValues.Choice4.StringValue
|
||||
ret.Choice4_Votes = int_or_panic(apiCard.BindingValues.Choice4_Count.StringValue)
|
||||
}
|
||||
|
||||
return ret
|
||||
return ret
|
||||
}
|
||||
|
||||
func parse_num_choices(card_name string) int {
|
||||
if strings.Index(card_name, "poll") != 0 || strings.Index(card_name, "choice") != 5 {
|
||||
panic("Not valid card name: " + card_name)
|
||||
}
|
||||
if strings.Index(card_name, "poll") != 0 || strings.Index(card_name, "choice") != 5 {
|
||||
panic("Not valid card name: " + card_name)
|
||||
}
|
||||
|
||||
return int_or_panic(card_name[4:5])
|
||||
return int_or_panic(card_name[4:5])
|
||||
}
|
||||
|
||||
func int_or_panic(s string) int {
|
||||
result, err := strconv.Atoi(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return result
|
||||
result, err := strconv.Atoi(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
@ -1,67 +1,67 @@
|
||||
package scraper_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"os"
|
||||
"encoding/json"
|
||||
"encoding/json"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
. "offline_twitter/scraper"
|
||||
. "offline_twitter/scraper"
|
||||
)
|
||||
|
||||
func TestParsePoll2Choices(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
data, err := os.ReadFile("test_responses/tweet_content/poll_card_2_options.json")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
var apiCard APICard
|
||||
err = json.Unmarshal(data, &apiCard)
|
||||
require.NoError(t, err)
|
||||
assert := assert.New(t)
|
||||
data, err := os.ReadFile("test_responses/tweet_content/poll_card_2_options.json")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
var apiCard APICard
|
||||
err = json.Unmarshal(data, &apiCard)
|
||||
require.NoError(t, err)
|
||||
|
||||
poll := ParseAPIPoll(apiCard)
|
||||
assert.Equal(PollID(1457419248461131776), poll.ID)
|
||||
assert.Equal(2, poll.NumChoices)
|
||||
assert.Equal(60 * 60 * 24, poll.VotingDuration)
|
||||
assert.Equal(int64(1636397201), poll.VotingEndsAt.Unix())
|
||||
assert.Equal(int64(1636318755), poll.LastUpdatedAt.Unix())
|
||||
poll := ParseAPIPoll(apiCard)
|
||||
assert.Equal(PollID(1457419248461131776), poll.ID)
|
||||
assert.Equal(2, poll.NumChoices)
|
||||
assert.Equal(60*60*24, poll.VotingDuration)
|
||||
assert.Equal(int64(1636397201), poll.VotingEndsAt.Unix())
|
||||
assert.Equal(int64(1636318755), poll.LastUpdatedAt.Unix())
|
||||
|
||||
assert.Less(poll.LastUpdatedAt.Unix(), poll.VotingEndsAt.Unix())
|
||||
assert.Equal("Yes", poll.Choice1)
|
||||
assert.Equal("No", poll.Choice2)
|
||||
assert.Equal(529, poll.Choice1_Votes)
|
||||
assert.Equal(2182, poll.Choice2_Votes)
|
||||
assert.Less(poll.LastUpdatedAt.Unix(), poll.VotingEndsAt.Unix())
|
||||
assert.Equal("Yes", poll.Choice1)
|
||||
assert.Equal("No", poll.Choice2)
|
||||
assert.Equal(529, poll.Choice1_Votes)
|
||||
assert.Equal(2182, poll.Choice2_Votes)
|
||||
}
|
||||
|
||||
func TestParsePoll4Choices(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
data, err := os.ReadFile("test_responses/tweet_content/poll_card_4_options_ended.json")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
var apiCard APICard
|
||||
err = json.Unmarshal(data, &apiCard)
|
||||
require.NoError(t, err)
|
||||
assert := assert.New(t)
|
||||
data, err := os.ReadFile("test_responses/tweet_content/poll_card_4_options_ended.json")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
var apiCard APICard
|
||||
err = json.Unmarshal(data, &apiCard)
|
||||
require.NoError(t, err)
|
||||
|
||||
poll := ParseAPIPoll(apiCard)
|
||||
assert.Equal(PollID(1455611588854140929), poll.ID)
|
||||
assert.Equal(4, poll.NumChoices)
|
||||
assert.Equal(60 * 60 * 24, poll.VotingDuration)
|
||||
assert.Equal(int64(1635966221), poll.VotingEndsAt.Unix())
|
||||
assert.Equal(int64(1635966226), poll.LastUpdatedAt.Unix())
|
||||
assert.Greater(poll.LastUpdatedAt.Unix(), poll.VotingEndsAt.Unix())
|
||||
poll := ParseAPIPoll(apiCard)
|
||||
assert.Equal(PollID(1455611588854140929), poll.ID)
|
||||
assert.Equal(4, poll.NumChoices)
|
||||
assert.Equal(60*60*24, poll.VotingDuration)
|
||||
assert.Equal(int64(1635966221), poll.VotingEndsAt.Unix())
|
||||
assert.Equal(int64(1635966226), poll.LastUpdatedAt.Unix())
|
||||
assert.Greater(poll.LastUpdatedAt.Unix(), poll.VotingEndsAt.Unix())
|
||||
|
||||
assert.Equal("Alec Baldwin", poll.Choice1)
|
||||
assert.Equal(1669, poll.Choice1_Votes)
|
||||
assert.Equal("Alec Baldwin", poll.Choice1)
|
||||
assert.Equal(1669, poll.Choice1_Votes)
|
||||
|
||||
assert.Equal("Andew Cuomo", poll.Choice2)
|
||||
assert.Equal(272, poll.Choice2_Votes)
|
||||
assert.Equal("Andew Cuomo", poll.Choice2)
|
||||
assert.Equal(272, poll.Choice2_Votes)
|
||||
|
||||
assert.Equal("George Floyd", poll.Choice3)
|
||||
assert.Equal(829, poll.Choice3_Votes)
|
||||
assert.Equal("George Floyd", poll.Choice3)
|
||||
assert.Equal(829, poll.Choice3_Votes)
|
||||
|
||||
assert.Equal("Derek Chauvin", poll.Choice4)
|
||||
assert.Equal(2397, poll.Choice4_Votes)
|
||||
assert.Equal("Derek Chauvin", poll.Choice4)
|
||||
assert.Equal(2397, poll.Choice4_Votes)
|
||||
}
|
||||
|
@ -1,12 +1,12 @@
|
||||
package scraper
|
||||
|
||||
type Retweet struct {
|
||||
RetweetID TweetID
|
||||
TweetID TweetID
|
||||
Tweet *Tweet
|
||||
RetweetedByID UserID `db:"retweeted_by"`
|
||||
RetweetedBy *User
|
||||
RetweetedAt Timestamp
|
||||
RetweetID TweetID
|
||||
TweetID TweetID
|
||||
Tweet *Tweet
|
||||
RetweetedByID UserID `db:"retweeted_by"`
|
||||
RetweetedBy *User
|
||||
RetweetedAt Timestamp
|
||||
}
|
||||
|
||||
func ParseSingleRetweet(apiTweet APITweet) (ret Retweet, err error) {
|
||||
|
@ -5,8 +5,8 @@ import (
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
. "offline_twitter/scraper"
|
||||
)
|
||||
|
@ -5,7 +5,7 @@ import (
|
||||
)
|
||||
|
||||
func TimestampToDateString(timestamp int) string {
|
||||
panic("???") // TODO
|
||||
panic("???") // TODO
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1,9 +1,9 @@
|
||||
package scraper
|
||||
|
||||
import (
|
||||
"time"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"offline_twitter/terminal_utils"
|
||||
)
|
||||
@ -13,18 +13,18 @@ const DEFAULT_MAX_REPLIES_EAGER_LOAD = 50
|
||||
type TweetID int64
|
||||
|
||||
type Tweet struct {
|
||||
ID TweetID
|
||||
UserID UserID
|
||||
UserHandle UserHandle // For processing tombstones
|
||||
User *User
|
||||
Text string
|
||||
PostedAt Timestamp
|
||||
NumLikes int
|
||||
NumRetweets int
|
||||
NumReplies int
|
||||
NumQuoteTweets int
|
||||
InReplyToID TweetID
|
||||
QuotedTweetID TweetID
|
||||
ID TweetID
|
||||
UserID UserID
|
||||
UserHandle UserHandle // For processing tombstones
|
||||
User *User
|
||||
Text string
|
||||
PostedAt Timestamp
|
||||
NumLikes int
|
||||
NumRetweets int
|
||||
NumReplies int
|
||||
NumQuoteTweets int
|
||||
InReplyToID TweetID
|
||||
QuotedTweetID TweetID
|
||||
|
||||
Images []Image
|
||||
Videos []Video
|
||||
@ -35,14 +35,13 @@ type Tweet struct {
|
||||
Polls []Poll
|
||||
|
||||
TombstoneType string
|
||||
IsStub bool
|
||||
IsStub bool
|
||||
|
||||
IsContentDownloaded bool
|
||||
IsContentDownloaded bool
|
||||
IsConversationScraped bool
|
||||
LastScrapedAt Timestamp
|
||||
LastScrapedAt Timestamp
|
||||
}
|
||||
|
||||
|
||||
func (t Tweet) String() string {
|
||||
var author string
|
||||
if t.User != nil {
|
||||
@ -52,7 +51,7 @@ func (t Tweet) String() string {
|
||||
}
|
||||
|
||||
ret := fmt.Sprintf(
|
||||
`%s
|
||||
`%s
|
||||
%s
|
||||
%s
|
||||
Replies: %d RT: %d QT: %d Likes: %d
|
||||
@ -67,11 +66,11 @@ Replies: %d RT: %d QT: %d Likes: %d
|
||||
)
|
||||
|
||||
if len(t.Images) > 0 {
|
||||
ret += fmt.Sprintf(terminal_utils.COLOR_GREEN + "images: %d\n" + terminal_utils.COLOR_RESET, len(t.Images))
|
||||
ret += fmt.Sprintf(terminal_utils.COLOR_GREEN+"images: %d\n"+terminal_utils.COLOR_RESET, len(t.Images))
|
||||
}
|
||||
if len(t.Urls) > 0 {
|
||||
ret += "urls: [\n"
|
||||
for _, url := range(t.Urls) {
|
||||
for _, url := range t.Urls {
|
||||
ret += " " + url.Text + "\n"
|
||||
}
|
||||
ret += "]"
|
||||
@ -90,7 +89,7 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
|
||||
ret.Text = apiTweet.FullText
|
||||
|
||||
// Process "posted-at" date and time
|
||||
if apiTweet.TombstoneText == "" { // Skip time parsing for tombstones
|
||||
if apiTweet.TombstoneText == "" { // Skip time parsing for tombstones
|
||||
ret.PostedAt, err = TimestampFromString(apiTweet.CreatedAt)
|
||||
if err != nil {
|
||||
return Tweet{}, fmt.Errorf("Error parsing time on tweet ID %d:\n %w", ret.ID, err)
|
||||
@ -125,7 +124,7 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
|
||||
|
||||
// Process images
|
||||
for _, media := range apiTweet.Entities.Media {
|
||||
if media.Type != "photo" { // TODO: remove this eventually
|
||||
if media.Type != "photo" { // TODO: remove this eventually
|
||||
panic(fmt.Errorf("Unknown media type %q:\n %w", media.Type, EXTERNAL_API_ERROR))
|
||||
}
|
||||
new_image := ParseAPIMedia(media)
|
||||
@ -151,7 +150,6 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Process videos
|
||||
for _, entity := range apiTweet.ExtendedEntities.Media {
|
||||
if entity.Type != "video" && entity.Type != "animated_gif" {
|
||||
@ -175,13 +173,12 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
|
||||
// Process tombstones and other metadata
|
||||
ret.TombstoneType = apiTweet.TombstoneText
|
||||
ret.IsStub = !(ret.TombstoneType == "")
|
||||
ret.LastScrapedAt = TimestampFromUnix(0) // Caller will change this for the tweet that was actually scraped
|
||||
ret.IsConversationScraped = false // Safe due to the "No Worsening" principle
|
||||
ret.LastScrapedAt = TimestampFromUnix(0) // Caller will change this for the tweet that was actually scraped
|
||||
ret.IsConversationScraped = false // Safe due to the "No Worsening" principle
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get a single tweet with no replies from the API.
|
||||
*
|
||||
@ -206,7 +203,6 @@ func GetTweet(id TweetID) (Tweet, error) {
|
||||
return ParseSingleTweet(single_tweet)
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return a list of tweets, including the original and the rest of its thread,
|
||||
* along with a list of associated users.
|
||||
@ -227,7 +223,7 @@ func GetTweetFull(id TweetID) (trove TweetTrove, err error) {
|
||||
return
|
||||
}
|
||||
if len(tweet_response.GlobalObjects.Tweets) < DEFAULT_MAX_REPLIES_EAGER_LOAD &&
|
||||
tweet_response.GetCursor() != "" {
|
||||
tweet_response.GetCursor() != "" {
|
||||
err = api.GetMoreReplies(id, &tweet_response, DEFAULT_MAX_REPLIES_EAGER_LOAD)
|
||||
if err != nil {
|
||||
err = fmt.Errorf("Error getting more tweet replies: %d\n %w", id, err)
|
||||
|
@ -5,13 +5,13 @@ import (
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
. "offline_twitter/scraper"
|
||||
)
|
||||
|
||||
func load_tweet_from_file(filename string) Tweet{
|
||||
func load_tweet_from_file(filename string) Tweet {
|
||||
data, err := os.ReadFile(filename)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
@ -28,12 +28,11 @@ func load_tweet_from_file(filename string) Tweet{
|
||||
return tweet
|
||||
}
|
||||
|
||||
|
||||
func TestParseSingleTweet(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_unicode_chars.json")
|
||||
|
||||
assert.Equal("The fact that @michaelmalice new book ‘The Anarchist Handbook’ is just absolutely destroying on the charts is the " +
|
||||
assert.Equal("The fact that @michaelmalice new book ‘The Anarchist Handbook’ is just absolutely destroying on the charts is the "+
|
||||
"largest white pill I’ve swallowed in years.", tweet.Text)
|
||||
assert.Len(tweet.Mentions, 1)
|
||||
assert.Contains(tweet.Mentions, UserHandle("michaelmalice"))
|
||||
@ -73,7 +72,7 @@ func TestParseTweetWithQuotedTweetAndLink(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_quoted_tweet_and_url.json")
|
||||
|
||||
assert.Equal("This is video he’s talking about. Please watch. Is there a single US politician capable of doing this with the " +
|
||||
assert.Equal("This is video he’s talking about. Please watch. Is there a single US politician capable of doing this with the "+
|
||||
"weasels and rats running American industry today?", tweet.Text)
|
||||
assert.Equal(TweetID(1497997890999898115), tweet.QuotedTweetID)
|
||||
|
||||
@ -135,7 +134,7 @@ func TestParseTweetWithMultipleUrls(t *testing.T) {
|
||||
|
||||
assert.False(tweet.Urls[0].HasCard)
|
||||
assert.False(tweet.Urls[1].HasCard)
|
||||
assert.True (tweet.Urls[2].HasCard)
|
||||
assert.True(tweet.Urls[2].HasCard)
|
||||
|
||||
assert.Equal("Biden’s victory came from the suburbs", tweet.Urls[2].Title)
|
||||
}
|
||||
@ -166,12 +165,11 @@ func TestTweetWithPoll(t *testing.T) {
|
||||
assert.Equal(624, p.Choice2_Votes)
|
||||
assert.Equal(778, p.Choice3_Votes)
|
||||
assert.Equal(1138, p.Choice4_Votes)
|
||||
assert.Equal(1440 * 60, p.VotingDuration)
|
||||
assert.Equal(1440*60, p.VotingDuration)
|
||||
assert.Equal(int64(1638331934), p.VotingEndsAt.Unix())
|
||||
assert.Equal(int64(1638331935), p.LastUpdatedAt.Unix())
|
||||
}
|
||||
|
||||
|
||||
func TestParseTweetResponse(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
data, err := os.ReadFile("test_responses/michael_malice_feed.json")
|
||||
@ -186,7 +184,7 @@ func TestParseTweetResponse(t *testing.T) {
|
||||
require.NoError(t, err)
|
||||
tweets, retweets, users := trove.Transform()
|
||||
|
||||
assert.Len(tweets, 29 - 3)
|
||||
assert.Len(tweets, 29-3)
|
||||
assert.Len(retweets, 3)
|
||||
assert.Len(users, 9)
|
||||
}
|
||||
|
@ -8,9 +8,9 @@ import (
|
||||
)
|
||||
|
||||
type TweetTrove struct {
|
||||
Tweets map[TweetID]Tweet
|
||||
Users map[UserID]User
|
||||
Retweets map[TweetID]Retweet
|
||||
Tweets map[TweetID]Tweet
|
||||
Users map[UserID]User
|
||||
Retweets map[TweetID]Retweet
|
||||
|
||||
TombstoneUsers []UserHandle
|
||||
}
|
||||
@ -38,7 +38,7 @@ func (trove TweetTrove) Transform() (tweets []Tweet, retweets []Retweet, users [
|
||||
retweets = append(retweets, val)
|
||||
}
|
||||
return
|
||||
} // TODO: refactor until this function isn't needed anymore
|
||||
} // TODO: refactor until this function isn't needed anymore
|
||||
|
||||
/**
|
||||
* Search for a user by handle. Second param is whether the user was found or not.
|
||||
|
@ -2,28 +2,28 @@ package scraper
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/url"
|
||||
"path"
|
||||
"regexp"
|
||||
"net/url"
|
||||
)
|
||||
|
||||
type Url struct {
|
||||
TweetID TweetID
|
||||
|
||||
Domain string
|
||||
Text string
|
||||
ShortText string
|
||||
Title string
|
||||
Description string
|
||||
ThumbnailWidth int
|
||||
ThumbnailHeight int
|
||||
Domain string
|
||||
Text string
|
||||
ShortText string
|
||||
Title string
|
||||
Description string
|
||||
ThumbnailWidth int
|
||||
ThumbnailHeight int
|
||||
ThumbnailRemoteUrl string
|
||||
ThumbnailLocalPath string
|
||||
CreatorID UserID
|
||||
SiteID UserID
|
||||
CreatorID UserID
|
||||
SiteID UserID
|
||||
|
||||
HasCard bool
|
||||
HasThumbnail bool
|
||||
HasCard bool
|
||||
HasThumbnail bool
|
||||
IsContentDownloaded bool
|
||||
}
|
||||
|
||||
@ -86,7 +86,7 @@ func TryParseTweetUrl(url string) (UserHandle, TweetID, bool) {
|
||||
if matches == nil {
|
||||
return UserHandle(""), TweetID(0), false
|
||||
}
|
||||
if len(matches) != 3 { // matches[0] is the full string
|
||||
if len(matches) != 3 { // matches[0] is the full string
|
||||
panic(matches)
|
||||
}
|
||||
return UserHandle(matches[1]), TweetID(int_or_panic(matches[2])), true
|
||||
|
@ -1,153 +1,153 @@
|
||||
package scraper_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"os"
|
||||
"encoding/json"
|
||||
"encoding/json"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
. "offline_twitter/scraper"
|
||||
. "offline_twitter/scraper"
|
||||
)
|
||||
|
||||
func TestParseAPIUrlCard(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
data, err := os.ReadFile("test_responses/tweet_content/url_card.json")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
var apiCard APICard
|
||||
err = json.Unmarshal(data, &apiCard)
|
||||
require.NoError(t, err)
|
||||
assert := assert.New(t)
|
||||
data, err := os.ReadFile("test_responses/tweet_content/url_card.json")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
var apiCard APICard
|
||||
err = json.Unmarshal(data, &apiCard)
|
||||
require.NoError(t, err)
|
||||
|
||||
url := ParseAPIUrlCard(apiCard)
|
||||
assert.Equal("reason.com", url.Domain)
|
||||
assert.Equal("L.A. Teachers Union Leader: 'There's No Such Thing As Learning Loss'", url.Title)
|
||||
assert.Equal("\"It’s OK that our babies may not have learned all their times tables,\" says Cecily Myart-Cruz. \"They learned " +
|
||||
"resilience.\"", url.Description)
|
||||
assert.Equal(600, url.ThumbnailWidth)
|
||||
assert.Equal(315, url.ThumbnailHeight)
|
||||
assert.Equal("https://pbs.twimg.com/card_img/1434998862305968129/odDi9EqO?format=jpg&name=600x600", url.ThumbnailRemoteUrl)
|
||||
assert.Equal("odDi9EqO_600x600.jpg", url.ThumbnailLocalPath)
|
||||
assert.Equal(UserID(155581583), url.CreatorID)
|
||||
assert.Equal(UserID(16467567), url.SiteID)
|
||||
assert.True(url.HasThumbnail)
|
||||
assert.False(url.IsContentDownloaded)
|
||||
url := ParseAPIUrlCard(apiCard)
|
||||
assert.Equal("reason.com", url.Domain)
|
||||
assert.Equal("L.A. Teachers Union Leader: 'There's No Such Thing As Learning Loss'", url.Title)
|
||||
assert.Equal("\"It’s OK that our babies may not have learned all their times tables,\" says Cecily Myart-Cruz. \"They learned "+
|
||||
"resilience.\"", url.Description)
|
||||
assert.Equal(600, url.ThumbnailWidth)
|
||||
assert.Equal(315, url.ThumbnailHeight)
|
||||
assert.Equal("https://pbs.twimg.com/card_img/1434998862305968129/odDi9EqO?format=jpg&name=600x600", url.ThumbnailRemoteUrl)
|
||||
assert.Equal("odDi9EqO_600x600.jpg", url.ThumbnailLocalPath)
|
||||
assert.Equal(UserID(155581583), url.CreatorID)
|
||||
assert.Equal(UserID(16467567), url.SiteID)
|
||||
assert.True(url.HasThumbnail)
|
||||
assert.False(url.IsContentDownloaded)
|
||||
}
|
||||
|
||||
func TestParseAPIUrlCardWithPlayer(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
data, err := os.ReadFile("test_responses/tweet_content/url_card_with_player.json")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
var apiCard APICard
|
||||
err = json.Unmarshal(data, &apiCard)
|
||||
require.NoError(t, err)
|
||||
assert := assert.New(t)
|
||||
data, err := os.ReadFile("test_responses/tweet_content/url_card_with_player.json")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
var apiCard APICard
|
||||
err = json.Unmarshal(data, &apiCard)
|
||||
require.NoError(t, err)
|
||||
|
||||
url := ParseAPIUrlCard(apiCard)
|
||||
assert.Equal("www.youtube.com", url.Domain)
|
||||
assert.Equal("The Politically Incorrect Guide to the Constitution (Starring Tom...", url.Title)
|
||||
assert.Equal("Watch this episode on LBRY/Odysee: https://odysee.com/@capitalresearch:5/the-politically-incorrect-guide-to-the:8" +
|
||||
"Watch this episode on Rumble: https://rumble...", url.Description)
|
||||
assert.Equal("https://pbs.twimg.com/card_img/1437849456423194639/_1t0btyt?format=jpg&name=800x320_1", url.ThumbnailRemoteUrl)
|
||||
assert.Equal("_1t0btyt_800x320_1.jpg", url.ThumbnailLocalPath)
|
||||
assert.Equal(UserID(10228272), url.SiteID)
|
||||
assert.True(url.HasThumbnail)
|
||||
assert.False(url.IsContentDownloaded)
|
||||
url := ParseAPIUrlCard(apiCard)
|
||||
assert.Equal("www.youtube.com", url.Domain)
|
||||
assert.Equal("The Politically Incorrect Guide to the Constitution (Starring Tom...", url.Title)
|
||||
assert.Equal("Watch this episode on LBRY/Odysee: https://odysee.com/@capitalresearch:5/the-politically-incorrect-guide-to-the:8"+
|
||||
"Watch this episode on Rumble: https://rumble...", url.Description)
|
||||
assert.Equal("https://pbs.twimg.com/card_img/1437849456423194639/_1t0btyt?format=jpg&name=800x320_1", url.ThumbnailRemoteUrl)
|
||||
assert.Equal("_1t0btyt_800x320_1.jpg", url.ThumbnailLocalPath)
|
||||
assert.Equal(UserID(10228272), url.SiteID)
|
||||
assert.True(url.HasThumbnail)
|
||||
assert.False(url.IsContentDownloaded)
|
||||
}
|
||||
|
||||
func TestParseAPIUrlCardWithPlayerAndPlaceholderThumbnail(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
data, err := os.ReadFile("test_responses/tweet_content/url_card_with_player_placeholder_image.json")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
var apiCard APICard
|
||||
err = json.Unmarshal(data, &apiCard)
|
||||
require.NoError(t, err)
|
||||
assert := assert.New(t)
|
||||
data, err := os.ReadFile("test_responses/tweet_content/url_card_with_player_placeholder_image.json")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
var apiCard APICard
|
||||
err = json.Unmarshal(data, &apiCard)
|
||||
require.NoError(t, err)
|
||||
|
||||
url := ParseAPIUrlCard(apiCard)
|
||||
assert.Equal("www.youtube.com", url.Domain)
|
||||
assert.Equal("Did Michael Malice Turn Me into an Anarchist? | Ep 181", url.Title)
|
||||
assert.Equal("SUBSCRIBE TO THE NEW SHOW W/ ELIJAH & SYDNEY: \"YOU ARE HERE\"YT: https://www.youtube.com/youareheredaily____________" +
|
||||
"__________________________________________...", url.Description)
|
||||
assert.Equal("https://pbs.twimg.com/cards/player-placeholder.png", url.ThumbnailRemoteUrl)
|
||||
assert.Equal("player-placeholder.png", url.ThumbnailLocalPath)
|
||||
assert.Equal(UserID(10228272), url.SiteID)
|
||||
assert.True(url.HasThumbnail)
|
||||
assert.False(url.IsContentDownloaded)
|
||||
url := ParseAPIUrlCard(apiCard)
|
||||
assert.Equal("www.youtube.com", url.Domain)
|
||||
assert.Equal("Did Michael Malice Turn Me into an Anarchist? | Ep 181", url.Title)
|
||||
assert.Equal("SUBSCRIBE TO THE NEW SHOW W/ ELIJAH & SYDNEY: \"YOU ARE HERE\"YT: https://www.youtube.com/youareheredaily____________"+
|
||||
"__________________________________________...", url.Description)
|
||||
assert.Equal("https://pbs.twimg.com/cards/player-placeholder.png", url.ThumbnailRemoteUrl)
|
||||
assert.Equal("player-placeholder.png", url.ThumbnailLocalPath)
|
||||
assert.Equal(UserID(10228272), url.SiteID)
|
||||
assert.True(url.HasThumbnail)
|
||||
assert.False(url.IsContentDownloaded)
|
||||
}
|
||||
|
||||
func TestParseAPIUrlCardWithoutThumbnail(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
data, err := os.ReadFile("test_responses/tweet_content/url_card_without_thumbnail.json")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
var apiCard APICard
|
||||
err = json.Unmarshal(data, &apiCard)
|
||||
require.NoError(t, err)
|
||||
assert := assert.New(t)
|
||||
data, err := os.ReadFile("test_responses/tweet_content/url_card_without_thumbnail.json")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
var apiCard APICard
|
||||
err = json.Unmarshal(data, &apiCard)
|
||||
require.NoError(t, err)
|
||||
|
||||
url := ParseAPIUrlCard(apiCard)
|
||||
assert.Equal("en.m.wikipedia.org", url.Domain)
|
||||
assert.Equal("Entryism - Wikipedia", url.Title)
|
||||
assert.Equal("", url.Description)
|
||||
assert.True(url.HasCard)
|
||||
assert.False(url.HasThumbnail)
|
||||
url := ParseAPIUrlCard(apiCard)
|
||||
assert.Equal("en.m.wikipedia.org", url.Domain)
|
||||
assert.Equal("Entryism - Wikipedia", url.Title)
|
||||
assert.Equal("", url.Description)
|
||||
assert.True(url.HasCard)
|
||||
assert.False(url.HasThumbnail)
|
||||
}
|
||||
|
||||
/**
|
||||
* Should check if a url is a tweet url, and if so, parse it
|
||||
*/
|
||||
func TestParseTweetUrl(t *testing.T) {
|
||||
assert:= assert.New(t)
|
||||
assert := assert.New(t)
|
||||
|
||||
// Test valid tweet url
|
||||
url := "https://twitter.com/kanesays23/status/1429583672827465730"
|
||||
handle, id, is_ok := TryParseTweetUrl(url)
|
||||
assert.True(is_ok)
|
||||
assert.Equal(UserHandle("kanesays23"), handle)
|
||||
assert.Equal(TweetID(1429583672827465730), id)
|
||||
// Test valid tweet url
|
||||
url := "https://twitter.com/kanesays23/status/1429583672827465730"
|
||||
handle, id, is_ok := TryParseTweetUrl(url)
|
||||
assert.True(is_ok)
|
||||
assert.Equal(UserHandle("kanesays23"), handle)
|
||||
assert.Equal(TweetID(1429583672827465730), id)
|
||||
|
||||
// Test url with GET params
|
||||
handle, id, is_ok = TryParseTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720?s=20")
|
||||
assert.True(is_ok)
|
||||
assert.Equal(UserHandle("NerdNoticing"), handle)
|
||||
assert.Equal(TweetID(1263192389050654720), id)
|
||||
// Test url with GET params
|
||||
handle, id, is_ok = TryParseTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720?s=20")
|
||||
assert.True(is_ok)
|
||||
assert.Equal(UserHandle("NerdNoticing"), handle)
|
||||
assert.Equal(TweetID(1263192389050654720), id)
|
||||
|
||||
// Test invalid url
|
||||
_, _, is_ok = TryParseTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720s=20")
|
||||
assert.False(is_ok)
|
||||
// Test invalid url
|
||||
_, _, is_ok = TryParseTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720s=20")
|
||||
assert.False(is_ok)
|
||||
|
||||
// Test empty string
|
||||
_, _, is_ok = TryParseTweetUrl("")
|
||||
assert.False(is_ok)
|
||||
// Test empty string
|
||||
_, _, is_ok = TryParseTweetUrl("")
|
||||
assert.False(is_ok)
|
||||
}
|
||||
|
||||
/**
|
||||
* Should extract a user handle from a tweet URL, or fail if URL is invalid
|
||||
*/
|
||||
func TestParseHandleFromTweetUrl(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
assert := assert.New(t)
|
||||
|
||||
// Test valid tweet url
|
||||
url := "https://twitter.com/kanesays23/status/1429583672827465730"
|
||||
result, err := ParseHandleFromTweetUrl(url)
|
||||
assert.NoError(err)
|
||||
assert.Equal(UserHandle("kanesays23"), result)
|
||||
// Test valid tweet url
|
||||
url := "https://twitter.com/kanesays23/status/1429583672827465730"
|
||||
result, err := ParseHandleFromTweetUrl(url)
|
||||
assert.NoError(err)
|
||||
assert.Equal(UserHandle("kanesays23"), result)
|
||||
|
||||
// Test url with GET params
|
||||
result, err = ParseHandleFromTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720?s=20")
|
||||
assert.NoError(err)
|
||||
assert.Equal(UserHandle("NerdNoticing"), result)
|
||||
// Test url with GET params
|
||||
result, err = ParseHandleFromTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720?s=20")
|
||||
assert.NoError(err)
|
||||
assert.Equal(UserHandle("NerdNoticing"), result)
|
||||
|
||||
// Test invalid url
|
||||
_, err = ParseHandleFromTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720s=20")
|
||||
assert.Error(err)
|
||||
// Test invalid url
|
||||
_, err = ParseHandleFromTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720s=20")
|
||||
assert.Error(err)
|
||||
|
||||
// Test empty string
|
||||
_, err = ParseHandleFromTweetUrl("")
|
||||
assert.Error(err)
|
||||
// Test empty string
|
||||
_, err = ParseHandleFromTweetUrl("")
|
||||
assert.Error(err)
|
||||
}
|
||||
|
292
scraper/user.go
292
scraper/user.go
@ -1,12 +1,12 @@
|
||||
package scraper
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"regexp"
|
||||
"path"
|
||||
"fmt"
|
||||
"path"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"offline_twitter/terminal_utils"
|
||||
"offline_twitter/terminal_utils"
|
||||
)
|
||||
|
||||
const DEFAULT_PROFILE_IMAGE_URL = "https://abs.twimg.com/sticky/default_profile_images/default_profile.png"
|
||||
@ -15,47 +15,48 @@ type UserID int64
|
||||
type UserHandle string
|
||||
|
||||
func JoinArrayOfHandles(handles []UserHandle) string {
|
||||
ret := []string{}
|
||||
for _, h := range handles {
|
||||
ret = append(ret, string(h))
|
||||
}
|
||||
return strings.Join(ret, ",")
|
||||
ret := []string{}
|
||||
for _, h := range handles {
|
||||
ret = append(ret, string(h))
|
||||
}
|
||||
return strings.Join(ret, ",")
|
||||
}
|
||||
|
||||
type User struct {
|
||||
ID UserID
|
||||
DisplayName string
|
||||
Handle UserHandle
|
||||
Bio string
|
||||
FollowingCount int
|
||||
FollowersCount int
|
||||
Location string
|
||||
Website string
|
||||
JoinDate Timestamp
|
||||
IsPrivate bool
|
||||
IsVerified bool
|
||||
IsBanned bool
|
||||
ProfileImageUrl string
|
||||
ProfileImageLocalPath string
|
||||
BannerImageUrl string
|
||||
BannerImageLocalPath string
|
||||
ID UserID
|
||||
DisplayName string
|
||||
Handle UserHandle
|
||||
Bio string
|
||||
FollowingCount int
|
||||
FollowersCount int
|
||||
Location string
|
||||
Website string
|
||||
JoinDate Timestamp
|
||||
IsPrivate bool
|
||||
IsVerified bool
|
||||
IsBanned bool
|
||||
IsDeleted bool
|
||||
ProfileImageUrl string
|
||||
ProfileImageLocalPath string
|
||||
BannerImageUrl string
|
||||
BannerImageLocalPath string
|
||||
|
||||
PinnedTweetID TweetID
|
||||
PinnedTweet *Tweet
|
||||
PinnedTweetID TweetID
|
||||
PinnedTweet *Tweet
|
||||
|
||||
IsFollowed bool
|
||||
IsContentDownloaded bool
|
||||
IsNeedingFakeID bool
|
||||
IsIdFake bool
|
||||
IsFollowed bool
|
||||
IsContentDownloaded bool
|
||||
IsNeedingFakeID bool
|
||||
IsIdFake bool
|
||||
}
|
||||
|
||||
func (u User) String() string {
|
||||
var verified string
|
||||
if u.IsVerified {
|
||||
verified = "[\u2713]"
|
||||
}
|
||||
ret := fmt.Sprintf(
|
||||
`%s%s
|
||||
var verified string
|
||||
if u.IsVerified {
|
||||
verified = "[\u2713]"
|
||||
}
|
||||
ret := fmt.Sprintf(
|
||||
`%s%s
|
||||
@%s
|
||||
%s
|
||||
|
||||
@ -65,115 +66,112 @@ Joined %s
|
||||
%s
|
||||
%s
|
||||
`,
|
||||
u.DisplayName,
|
||||
verified,
|
||||
u.Handle,
|
||||
terminal_utils.WrapText(u.Bio, 60),
|
||||
u.FollowingCount,
|
||||
u.FollowersCount,
|
||||
terminal_utils.FormatDate(u.JoinDate.Time),
|
||||
u.Location,
|
||||
u.Website,
|
||||
)
|
||||
if u.PinnedTweet != nil {
|
||||
ret += "\n" + terminal_utils.WrapText(u.PinnedTweet.Text, 60)
|
||||
} else {
|
||||
println("Pinned tweet id:", u.PinnedTweetID)
|
||||
}
|
||||
return ret
|
||||
u.DisplayName,
|
||||
verified,
|
||||
u.Handle,
|
||||
terminal_utils.WrapText(u.Bio, 60),
|
||||
u.FollowingCount,
|
||||
u.FollowersCount,
|
||||
terminal_utils.FormatDate(u.JoinDate.Time),
|
||||
u.Location,
|
||||
u.Website,
|
||||
)
|
||||
if u.PinnedTweet != nil {
|
||||
ret += "\n" + terminal_utils.WrapText(u.PinnedTweet.Text, 60)
|
||||
} else {
|
||||
println("Pinned tweet id:", u.PinnedTweetID)
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Unknown Users with handles are only created by direct GetUser calls (either `twitter fetch_user`
|
||||
* subcommand or as part of tombstone user fetching.)
|
||||
*/
|
||||
func GetUnknownUserWithHandle(handle UserHandle) User {
|
||||
return User{
|
||||
ID: UserID(0), // 2^62 + 1...
|
||||
DisplayName: string(handle),
|
||||
Handle: handle,
|
||||
Bio: "<blank>",
|
||||
FollowersCount: 0,
|
||||
FollowingCount: 0,
|
||||
Location: "<blank>",
|
||||
Website:"<blank>",
|
||||
JoinDate: TimestampFromUnix(0),
|
||||
IsVerified: false,
|
||||
IsPrivate: false,
|
||||
IsNeedingFakeID: true,
|
||||
IsIdFake: true,
|
||||
}
|
||||
return User{
|
||||
ID: UserID(0), // 2^62 + 1...
|
||||
DisplayName: string(handle),
|
||||
Handle: handle,
|
||||
Bio: "<blank>",
|
||||
FollowersCount: 0,
|
||||
FollowingCount: 0,
|
||||
Location: "<blank>",
|
||||
Website: "<blank>",
|
||||
JoinDate: TimestampFromUnix(0),
|
||||
IsVerified: false,
|
||||
IsPrivate: false,
|
||||
IsNeedingFakeID: true,
|
||||
IsIdFake: true,
|
||||
}
|
||||
}
|
||||
|
||||
// Turn an APIUser, as returned from the scraper, into a properly structured User object
|
||||
func ParseSingleUser(apiUser APIUser) (ret User, err error) {
|
||||
if apiUser.DoesntExist {
|
||||
// User may have been deleted, or there was a typo. There's no data to parse
|
||||
if apiUser.ScreenName == "" {
|
||||
panic("ScreenName is empty!")
|
||||
}
|
||||
ret = GetUnknownUserWithHandle(UserHandle(apiUser.ScreenName))
|
||||
return
|
||||
}
|
||||
ret.ID = UserID(apiUser.ID)
|
||||
ret.Handle = UserHandle(apiUser.ScreenName)
|
||||
if apiUser.IsBanned {
|
||||
// Banned users won't have any further info, so just return here
|
||||
ret.IsBanned = true
|
||||
return
|
||||
}
|
||||
ret.DisplayName = apiUser.Name
|
||||
ret.Bio = apiUser.Description
|
||||
ret.FollowingCount = apiUser.FriendsCount
|
||||
ret.FollowersCount = apiUser.FollowersCount
|
||||
ret.Location = apiUser.Location
|
||||
if len(apiUser.Entities.URL.Urls) > 0 {
|
||||
ret.Website = apiUser.Entities.URL.Urls[0].ExpandedURL
|
||||
}
|
||||
ret.JoinDate, err = TimestampFromString(apiUser.CreatedAt)
|
||||
if err != nil {
|
||||
err = fmt.Errorf("Error parsing time on user ID %d: %w", ret.ID, err)
|
||||
return
|
||||
}
|
||||
ret.IsPrivate = apiUser.Protected
|
||||
ret.IsVerified = apiUser.Verified
|
||||
ret.ProfileImageUrl = apiUser.ProfileImageURLHTTPS
|
||||
if apiUser.DoesntExist {
|
||||
// User may have been deleted, or there was a typo. There's no data to parse
|
||||
if apiUser.ScreenName == "" {
|
||||
panic("ScreenName is empty!")
|
||||
}
|
||||
ret = GetUnknownUserWithHandle(UserHandle(apiUser.ScreenName))
|
||||
return
|
||||
}
|
||||
ret.ID = UserID(apiUser.ID)
|
||||
ret.Handle = UserHandle(apiUser.ScreenName)
|
||||
if apiUser.IsBanned {
|
||||
// Banned users won't have any further info, so just return here
|
||||
ret.IsBanned = true
|
||||
return
|
||||
}
|
||||
ret.DisplayName = apiUser.Name
|
||||
ret.Bio = apiUser.Description
|
||||
ret.FollowingCount = apiUser.FriendsCount
|
||||
ret.FollowersCount = apiUser.FollowersCount
|
||||
ret.Location = apiUser.Location
|
||||
if len(apiUser.Entities.URL.Urls) > 0 {
|
||||
ret.Website = apiUser.Entities.URL.Urls[0].ExpandedURL
|
||||
}
|
||||
ret.JoinDate, err = TimestampFromString(apiUser.CreatedAt)
|
||||
if err != nil {
|
||||
err = fmt.Errorf("Error parsing time on user ID %d: %w", ret.ID, err)
|
||||
return
|
||||
}
|
||||
ret.IsPrivate = apiUser.Protected
|
||||
ret.IsVerified = apiUser.Verified
|
||||
ret.ProfileImageUrl = apiUser.ProfileImageURLHTTPS
|
||||
|
||||
if regexp.MustCompile(`_normal\.\w{2,4}`).MatchString(ret.ProfileImageUrl) {
|
||||
ret.ProfileImageUrl = strings.ReplaceAll(ret.ProfileImageUrl, "_normal.", ".")
|
||||
}
|
||||
ret.BannerImageUrl = apiUser.ProfileBannerURL
|
||||
|
||||
if regexp.MustCompile(`_normal\.\w{2,4}`).MatchString(ret.ProfileImageUrl) {
|
||||
ret.ProfileImageUrl = strings.ReplaceAll(ret.ProfileImageUrl, "_normal.", ".")
|
||||
}
|
||||
ret.BannerImageUrl = apiUser.ProfileBannerURL
|
||||
ret.ProfileImageLocalPath = ret.compute_profile_image_local_path()
|
||||
ret.BannerImageLocalPath = ret.compute_banner_image_local_path()
|
||||
|
||||
ret.ProfileImageLocalPath = ret.compute_profile_image_local_path()
|
||||
ret.BannerImageLocalPath = ret.compute_banner_image_local_path()
|
||||
|
||||
if len(apiUser.PinnedTweetIdsStr) > 0 {
|
||||
ret.PinnedTweetID = TweetID(idstr_to_int(apiUser.PinnedTweetIdsStr[0]))
|
||||
}
|
||||
return
|
||||
if len(apiUser.PinnedTweetIdsStr) > 0 {
|
||||
ret.PinnedTweetID = TweetID(idstr_to_int(apiUser.PinnedTweetIdsStr[0]))
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Calls API#GetUser and returns the parsed result
|
||||
func GetUser(handle UserHandle) (User, error) {
|
||||
api := API{}
|
||||
apiUser, err := api.GetUser(handle)
|
||||
if apiUser.ScreenName == "" {
|
||||
apiUser.ScreenName = string(handle)
|
||||
}
|
||||
if err != nil {
|
||||
return User{}, err
|
||||
}
|
||||
return ParseSingleUser(apiUser)
|
||||
api := API{}
|
||||
apiUser, err := api.GetUser(handle)
|
||||
if apiUser.ScreenName == "" {
|
||||
apiUser.ScreenName = string(handle)
|
||||
}
|
||||
if err != nil {
|
||||
return User{}, err
|
||||
}
|
||||
return ParseSingleUser(apiUser)
|
||||
}
|
||||
|
||||
/**
|
||||
* Make a filename for the profile image, that hopefully won't clobber other ones
|
||||
*/
|
||||
func (u User) compute_profile_image_local_path() string {
|
||||
return string(u.Handle) + "_profile_" + path.Base(u.ProfileImageUrl)
|
||||
return string(u.Handle) + "_profile_" + path.Base(u.ProfileImageUrl)
|
||||
}
|
||||
|
||||
/**
|
||||
@ -182,34 +180,34 @@ func (u User) compute_profile_image_local_path() string {
|
||||
* If there is no banner image, just return nothing.
|
||||
*/
|
||||
func (u User) compute_banner_image_local_path() string {
|
||||
if u.BannerImageUrl == "" {
|
||||
return ""
|
||||
}
|
||||
base_name := path.Base(u.BannerImageUrl)
|
||||
if u.BannerImageUrl == "" {
|
||||
return ""
|
||||
}
|
||||
base_name := path.Base(u.BannerImageUrl)
|
||||
|
||||
// Check if it has an extension (e.g., ".png" or ".jpeg")
|
||||
if !regexp.MustCompile(`\.\w{2,4}$`).MatchString(base_name) {
|
||||
// If it doesn't have an extension, add one
|
||||
base_name += ".jpg"
|
||||
}
|
||||
return string(u.Handle) + "_banner_" + base_name
|
||||
// Check if it has an extension (e.g., ".png" or ".jpeg")
|
||||
if !regexp.MustCompile(`\.\w{2,4}$`).MatchString(base_name) {
|
||||
// If it doesn't have an extension, add one
|
||||
base_name += ".jpg"
|
||||
}
|
||||
return string(u.Handle) + "_banner_" + base_name
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the URL where we would expect to find a User's tiny profile image
|
||||
*/
|
||||
func (u User) GetTinyProfileImageUrl() string {
|
||||
// If profile image is empty, then just use the default profile image
|
||||
if u.ProfileImageUrl == "" {
|
||||
return DEFAULT_PROFILE_IMAGE_URL
|
||||
}
|
||||
// If profile image is empty, then just use the default profile image
|
||||
if u.ProfileImageUrl == "" {
|
||||
return DEFAULT_PROFILE_IMAGE_URL
|
||||
}
|
||||
|
||||
// Check that the format is as expected
|
||||
r := regexp.MustCompile(`(\.\w{2,4})$`)
|
||||
if !r.MatchString(u.ProfileImageUrl) {
|
||||
panic(fmt.Errorf("Weird profile image url (here is the file extension?): %s", u.ProfileImageUrl))
|
||||
}
|
||||
return r.ReplaceAllString(u.ProfileImageUrl, "_normal$1")
|
||||
// Check that the format is as expected
|
||||
r := regexp.MustCompile(`(\.\w{2,4})$`)
|
||||
if !r.MatchString(u.ProfileImageUrl) {
|
||||
panic(fmt.Errorf("Weird profile image url (here is the file extension?): %s", u.ProfileImageUrl))
|
||||
}
|
||||
return r.ReplaceAllString(u.ProfileImageUrl, "_normal$1")
|
||||
}
|
||||
|
||||
/**
|
||||
@ -217,8 +215,8 @@ func (u User) GetTinyProfileImageUrl() string {
|
||||
* If user has a blank or default profile image, return a non-personalized default path.
|
||||
*/
|
||||
func (u User) GetTinyProfileImageLocalPath() string {
|
||||
if u.ProfileImageUrl == "" {
|
||||
return path.Base(u.GetTinyProfileImageUrl())
|
||||
}
|
||||
return string(u.Handle) + "_profile_" + path.Base(u.GetTinyProfileImageUrl())
|
||||
if u.ProfileImageUrl == "" {
|
||||
return path.Base(u.GetTinyProfileImageUrl())
|
||||
}
|
||||
return string(u.Handle) + "_profile_" + path.Base(u.GetTinyProfileImageUrl())
|
||||
}
|
||||
|
@ -1,8 +1,8 @@
|
||||
package scraper
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"errors"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
/**
|
||||
@ -33,7 +33,6 @@ func GetUserFeedFor(user_id UserID, min_tweets int) (trove TweetTrove, err error
|
||||
return ParseTweetResponse(tweet_response)
|
||||
}
|
||||
|
||||
|
||||
func GetUserFeedGraphqlFor(user_id UserID, min_tweets int) (trove TweetTrove, err error) {
|
||||
api := API{}
|
||||
api_response, err := api.GetGraphqlFeedFor(user_id, "")
|
||||
|
@ -1,14 +1,14 @@
|
||||
package scraper_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"encoding/json"
|
||||
"os"
|
||||
"net/http"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/jarcoal/httpmock"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
. "offline_twitter/scraper"
|
||||
)
|
||||
@ -31,7 +31,7 @@ func TestParseSingleUser(t *testing.T) {
|
||||
assert.Equal(UserID(44067298), user.ID)
|
||||
assert.Equal("Michael Malice", user.DisplayName)
|
||||
assert.Equal(UserHandle("michaelmalice"), user.Handle)
|
||||
assert.Equal("Author of Dear Reader, The New Right & The Anarchist Handbook\nHost of \"YOUR WELCOME\" \nSubject of Ego & Hubris by " +
|
||||
assert.Equal("Author of Dear Reader, The New Right & The Anarchist Handbook\nHost of \"YOUR WELCOME\" \nSubject of Ego & Hubris by "+
|
||||
"Harvey Pekar\nUnderwear Model\nHe/Him ⚑", user.Bio)
|
||||
assert.Equal(941, user.FollowingCount)
|
||||
assert.Equal(208589, user.FollowersCount)
|
||||
@ -39,7 +39,7 @@ func TestParseSingleUser(t *testing.T) {
|
||||
assert.Equal("https://amzn.to/3oInafv", user.Website)
|
||||
assert.Equal(int64(1243920952), user.JoinDate.Unix())
|
||||
assert.False(user.IsPrivate)
|
||||
assert.True (user.IsVerified)
|
||||
assert.True(user.IsVerified)
|
||||
assert.False(user.IsBanned)
|
||||
assert.Equal("https://pbs.twimg.com/profile_images/1064051934812913664/Lbwdb_C9.jpg", user.ProfileImageUrl)
|
||||
assert.Equal("https://pbs.twimg.com/profile_images/1064051934812913664/Lbwdb_C9_normal.jpg", user.GetTinyProfileImageUrl())
|
||||
@ -90,7 +90,7 @@ func TestParseDeletedUser(t *testing.T) {
|
||||
handle := "Some Random Deleted User"
|
||||
|
||||
apiUser := user_resp.ConvertToAPIUser()
|
||||
apiUser.ScreenName = string(handle) // This is done in scraper.GetUser, since users are retrieved by handle anyway
|
||||
apiUser.ScreenName = string(handle) // This is done in scraper.GetUser, since users are retrieved by handle anyway
|
||||
|
||||
user, err := ParseSingleUser(apiUser)
|
||||
require.NoError(t, err)
|
||||
|
@ -1,9 +1,9 @@
|
||||
package scraper
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"path"
|
||||
"fmt"
|
||||
"path"
|
||||
"sort"
|
||||
)
|
||||
|
||||
type VideoID int64
|
||||
@ -12,61 +12,61 @@ type VideoID int64
|
||||
// from someone else).
|
||||
|
||||
type Video struct {
|
||||
ID VideoID
|
||||
TweetID TweetID
|
||||
Width int
|
||||
Height int
|
||||
RemoteURL string
|
||||
LocalFilename string
|
||||
ID VideoID
|
||||
TweetID TweetID
|
||||
Width int
|
||||
Height int
|
||||
RemoteURL string
|
||||
LocalFilename string
|
||||
|
||||
ThumbnailRemoteUrl string
|
||||
ThumbnailLocalPath string `db:"thumbnail_local_filename"`
|
||||
Duration int // milliseconds
|
||||
ViewCount int
|
||||
ThumbnailRemoteUrl string
|
||||
ThumbnailLocalPath string `db:"thumbnail_local_filename"`
|
||||
Duration int // milliseconds
|
||||
ViewCount int
|
||||
|
||||
IsDownloaded bool
|
||||
IsGif bool
|
||||
IsDownloaded bool
|
||||
IsGif bool
|
||||
}
|
||||
|
||||
func ParseAPIVideo(apiVideo APIExtendedMedia, tweet_id TweetID) Video {
|
||||
variants := apiVideo.VideoInfo.Variants
|
||||
sort.Sort(variants)
|
||||
variants := apiVideo.VideoInfo.Variants
|
||||
sort.Sort(variants)
|
||||
|
||||
var view_count int
|
||||
var view_count int
|
||||
|
||||
r := apiVideo.Ext.MediaStats.R
|
||||
r := apiVideo.Ext.MediaStats.R
|
||||
|
||||
switch r.(type) {
|
||||
case string:
|
||||
view_count = 0
|
||||
case map[string]interface{}:
|
||||
OK_entry, ok := r.(map[string]interface{})["ok"]
|
||||
if !ok {
|
||||
panic("No 'ok' value found in the R!")
|
||||
}
|
||||
view_count_str, ok := OK_entry.(map[string]interface{})["viewCount"]
|
||||
view_count = int_or_panic(view_count_str.(string))
|
||||
if !ok {
|
||||
panic("No 'viewCount' value found in the OK!")
|
||||
}
|
||||
}
|
||||
switch r.(type) {
|
||||
case string:
|
||||
view_count = 0
|
||||
case map[string]interface{}:
|
||||
OK_entry, ok := r.(map[string]interface{})["ok"]
|
||||
if !ok {
|
||||
panic("No 'ok' value found in the R!")
|
||||
}
|
||||
view_count_str, ok := OK_entry.(map[string]interface{})["viewCount"]
|
||||
view_count = int_or_panic(view_count_str.(string))
|
||||
if !ok {
|
||||
panic("No 'viewCount' value found in the OK!")
|
||||
}
|
||||
}
|
||||
|
||||
local_filename := fmt.Sprintf("%d.mp4", tweet_id)
|
||||
local_filename := fmt.Sprintf("%d.mp4", tweet_id)
|
||||
|
||||
return Video{
|
||||
ID: VideoID(apiVideo.ID),
|
||||
TweetID: tweet_id,
|
||||
Width: apiVideo.OriginalInfo.Width,
|
||||
Height: apiVideo.OriginalInfo.Height,
|
||||
RemoteURL: variants[0].URL,
|
||||
LocalFilename: local_filename,
|
||||
return Video{
|
||||
ID: VideoID(apiVideo.ID),
|
||||
TweetID: tweet_id,
|
||||
Width: apiVideo.OriginalInfo.Width,
|
||||
Height: apiVideo.OriginalInfo.Height,
|
||||
RemoteURL: variants[0].URL,
|
||||
LocalFilename: local_filename,
|
||||
|
||||
ThumbnailRemoteUrl: apiVideo.MediaURLHttps,
|
||||
ThumbnailLocalPath: path.Base(apiVideo.MediaURLHttps),
|
||||
Duration: apiVideo.VideoInfo.Duration,
|
||||
ViewCount: view_count,
|
||||
ThumbnailRemoteUrl: apiVideo.MediaURLHttps,
|
||||
ThumbnailLocalPath: path.Base(apiVideo.MediaURLHttps),
|
||||
Duration: apiVideo.VideoInfo.Duration,
|
||||
ViewCount: view_count,
|
||||
|
||||
IsDownloaded: false,
|
||||
IsGif: apiVideo.Type == "animated_gif",
|
||||
}
|
||||
IsDownloaded: false,
|
||||
IsGif: apiVideo.Type == "animated_gif",
|
||||
}
|
||||
}
|
||||
|
@ -1,37 +1,37 @@
|
||||
package scraper_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"os"
|
||||
"encoding/json"
|
||||
"encoding/json"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
. "offline_twitter/scraper"
|
||||
. "offline_twitter/scraper"
|
||||
)
|
||||
|
||||
func TestParseAPIVideo(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
data, err := os.ReadFile("test_responses/tweet_content/video.json")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
var apivideo APIExtendedMedia
|
||||
err = json.Unmarshal(data, &apivideo)
|
||||
require.NoError(t, err)
|
||||
assert := assert.New(t)
|
||||
data, err := os.ReadFile("test_responses/tweet_content/video.json")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
var apivideo APIExtendedMedia
|
||||
err = json.Unmarshal(data, &apivideo)
|
||||
require.NoError(t, err)
|
||||
|
||||
tweet_id := TweetID(28)
|
||||
video := ParseAPIVideo(apivideo, tweet_id)
|
||||
assert.Equal(VideoID(1418951950020845568), video.ID)
|
||||
assert.Equal(tweet_id, video.TweetID)
|
||||
assert.Equal(1280, video.Height)
|
||||
assert.Equal(720, video.Width)
|
||||
assert.Equal("https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12", video.RemoteURL)
|
||||
assert.Equal("28.mp4", video.LocalFilename)
|
||||
assert.Equal("https://pbs.twimg.com/ext_tw_video_thumb/1418951950020845568/pu/img/eUTaYYfuAJ8FyjUi.jpg", video.ThumbnailRemoteUrl)
|
||||
assert.Equal("eUTaYYfuAJ8FyjUi.jpg", video.ThumbnailLocalPath)
|
||||
assert.Equal(275952, video.ViewCount)
|
||||
assert.Equal(88300, video.Duration)
|
||||
assert.False(video.IsDownloaded)
|
||||
tweet_id := TweetID(28)
|
||||
video := ParseAPIVideo(apivideo, tweet_id)
|
||||
assert.Equal(VideoID(1418951950020845568), video.ID)
|
||||
assert.Equal(tweet_id, video.TweetID)
|
||||
assert.Equal(1280, video.Height)
|
||||
assert.Equal(720, video.Width)
|
||||
assert.Equal("https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12", video.RemoteURL)
|
||||
assert.Equal("28.mp4", video.LocalFilename)
|
||||
assert.Equal("https://pbs.twimg.com/ext_tw_video_thumb/1418951950020845568/pu/img/eUTaYYfuAJ8FyjUi.jpg", video.ThumbnailRemoteUrl)
|
||||
assert.Equal("eUTaYYfuAJ8FyjUi.jpg", video.ThumbnailLocalPath)
|
||||
assert.Equal(275952, video.ViewCount)
|
||||
assert.Equal(88300, video.Duration)
|
||||
assert.False(video.IsDownloaded)
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user