From d1d80a91cdaebdaa0165110d64a60a66416402e5 Mon Sep 17 00:00:00 2001 From: Alessio Date: Sun, 13 Mar 2022 17:09:43 -0700 Subject: [PATCH] Add 'gofmt' linter --- .golangci.yaml | 7 +- persistence/user_queries_test.go | 4 +- persistence/utils.go | 8 +- persistence/utils_test.go | 42 ++--- persistence/versions.go | 32 ++-- persistence/versions_test.go | 5 +- scraper/api_errors.go | 8 +- scraper/api_types.go | 130 +++++++------- scraper/api_types_test.go | 27 ++- scraper/api_types_v2.go | 61 +++---- scraper/api_types_v2_test.go | 28 ++- scraper/image.go | 34 ++-- scraper/image_test.go | 42 ++--- scraper/link_expander.go | 5 +- scraper/link_expander_test.go | 3 +- scraper/poll.go | 112 ++++++------ scraper/poll_test.go | 96 +++++----- scraper/retweet.go | 12 +- scraper/retweet_test.go | 4 +- scraper/search.go | 2 +- scraper/tweet.go | 52 +++--- scraper/tweet_test.go | 18 +- scraper/tweet_trove.go | 8 +- scraper/url.go | 26 +-- scraper/url_test.go | 222 +++++++++++------------ scraper/user.go | 292 +++++++++++++++---------------- scraper/user_feed.go | 3 +- scraper/user_test.go | 14 +- scraper/video.go | 96 +++++----- scraper/video_test.go | 54 +++--- 30 files changed, 714 insertions(+), 733 deletions(-) diff --git a/.golangci.yaml b/.golangci.yaml index 38ebfff..0c541e6 100644 --- a/.golangci.yaml +++ b/.golangci.yaml @@ -27,6 +27,7 @@ linters: - wrapcheck - lll - godox + - gofmt - errorlint - nolintlint @@ -203,9 +204,9 @@ linters-settings: keywords: # default keywords are TODO, BUG, and FIXME, these can be overwritten by this setting - XXX -# gofmt: -# # simplify code: gofmt with `-s` option, true by default -# simplify: true + gofmt: + # simplify code: gofmt with `-s` option, true by default + simplify: true # gofumpt: # # Select the Go version to target. The default is `1.15`. diff --git a/persistence/user_queries_test.go b/persistence/user_queries_test.go index 1f8fef3..2439a23 100644 --- a/persistence/user_queries_test.go +++ b/persistence/user_queries_test.go @@ -6,9 +6,9 @@ import ( "fmt" "math/rand" + "github.com/go-test/deep" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "github.com/go-test/deep" "offline_twitter/scraper" ) @@ -76,7 +76,7 @@ func TestModifyUser(t *testing.T) { fake_user.FollowersCount = 2000 fake_user.JoinDate = scraper.TimestampFromUnix(2000) fake_user.ProfileImageUrl = "asdf2" - fake_user.IsContentDownloaded = false // test No Worsening + fake_user.IsContentDownloaded = false // test No Worsening // Save the modified user err = profile.SaveUser(&fake_user) diff --git a/persistence/utils.go b/persistence/utils.go index 44a8199..ff2f067 100644 --- a/persistence/utils.go +++ b/persistence/utils.go @@ -3,9 +3,9 @@ package persistence import ( "errors" "fmt" + "os" "regexp" "strings" - "os" ) var NotInDatabase = errors.New("Not in database") @@ -35,7 +35,7 @@ func file_exists(path string) bool { * https://stackoverflow.com/questions/56616196/how-to-convert-camel-case-string-to-snake-case#56616250 */ func ToSnakeCase(str string) string { - snake := regexp.MustCompile("(.)_?([A-Z][a-z]+)").ReplaceAllString(str, "${1}_${2}") - snake = regexp.MustCompile("([a-z0-9])_?([A-Z])").ReplaceAllString(snake, "${1}_${2}") - return strings.ToLower(snake) + snake := regexp.MustCompile("(.)_?([A-Z][a-z]+)").ReplaceAllString(str, "${1}_${2}") + snake = regexp.MustCompile("([a-z0-9])_?([A-Z])").ReplaceAllString(snake, "${1}_${2}") + return strings.ToLower(snake) } diff --git a/persistence/utils_test.go b/persistence/utils_test.go index 42e9728..2a0fa50 100644 --- a/persistence/utils_test.go +++ b/persistence/utils_test.go @@ -70,13 +70,13 @@ func create_stable_user() scraper.User { func create_image_from_id(id int) scraper.Image { filename := fmt.Sprintf("image%d.jpg", id) return scraper.Image{ - ID: scraper.ImageID(id), - TweetID: -1, - Width: id * 10, - Height: id * 5, - RemoteURL: filename, + ID: scraper.ImageID(id), + TweetID: -1, + Width: id * 10, + Height: id * 5, + RemoteURL: filename, LocalFilename: filename, - IsDownloaded: false, + IsDownloaded: false, } } @@ -86,18 +86,18 @@ func create_image_from_id(id int) scraper.Image { func create_video_from_id(id int) scraper.Video { filename := fmt.Sprintf("video%d.jpg", id) return scraper.Video{ - ID: scraper.VideoID(id), - TweetID: -1, - Width: id * 10, - Height: id * 5, - RemoteURL: filename, - LocalFilename: filename, + ID: scraper.VideoID(id), + TweetID: -1, + Width: id * 10, + Height: id * 5, + RemoteURL: filename, + LocalFilename: filename, ThumbnailRemoteUrl: filename, ThumbnailLocalPath: filename, - Duration: 10000, - ViewCount: 200, - IsDownloaded: false, - IsGif: false, + Duration: 10000, + ViewCount: 200, + IsDownloaded: false, + IsGif: false, } } @@ -265,13 +265,13 @@ func create_dummy_tombstone() scraper.Tweet { tweet_id := scraper.TweetID(rand.Int()) return scraper.Tweet{ - ID: tweet_id, - UserID: -1, + ID: tweet_id, + UserID: -1, TombstoneType: "deleted", - IsStub: true, - Mentions: []scraper.UserHandle{}, + IsStub: true, + Mentions: []scraper.UserHandle{}, ReplyMentions: []scraper.UserHandle{}, - Hashtags: []string{}, + Hashtags: []string{}, } } diff --git a/persistence/versions.go b/persistence/versions.go index b9515b0..718cd2d 100644 --- a/persistence/versions.go +++ b/persistence/versions.go @@ -7,29 +7,27 @@ import ( "offline_twitter/terminal_utils" ) - const ENGINE_DATABASE_VERSION = 11 - type VersionMismatchError struct { - EngineVersion int + EngineVersion int DatabaseVersion int } + func (e VersionMismatchError) Error() string { return fmt.Sprintf( -`This profile was created with database schema version %d, which is newer than this application's database schema version, %d. + `This profile was created with database schema version %d, which is newer than this application's database schema version, %d. Please upgrade this application to a newer version to use this profile. Or downgrade the profile's schema version, somehow.`, - e.DatabaseVersion, e.EngineVersion, + e.DatabaseVersion, e.EngineVersion, ) } - /** * The Nth entry is the migration that moves you from version N to version N+1. * `len(MIGRATIONS)` should always equal `ENGINE_DATABASE_VERSION`. */ var MIGRATIONS = []string{ -`create table polls (rowid integer primary key, + `create table polls (rowid integer primary key, id integer unique not null check(typeof(id) = 'integer'), tweet_id integer not null, num_choices integer not null, @@ -50,25 +48,25 @@ var MIGRATIONS = []string{ foreign key(tweet_id) references tweets(id) );`, -`alter table tweets add column is_conversation_scraped boolean default 0; + `alter table tweets add column is_conversation_scraped boolean default 0; alter table tweets add column last_scraped_at integer not null default 0`, -`update tombstone_types set tombstone_text = 'This Tweet is from a suspended account' where rowid = 2; + `update tombstone_types set tombstone_text = 'This Tweet is from a suspended account' where rowid = 2; insert into tombstone_types (rowid, short_name, tombstone_text) values (5, 'violated', 'This Tweet violated the Twitter Rules'), (6, 'no longer exists', 'This Tweet is from an account that no longer exists')`, -`alter table videos add column thumbnail_remote_url text not null default "missing"; + `alter table videos add column thumbnail_remote_url text not null default "missing"; alter table videos add column thumbnail_local_filename text not null default "missing"`, -`alter table videos add column duration integer not null default 0; + `alter table videos add column duration integer not null default 0; alter table videos add column view_count integer not null default 0`, -`alter table users add column is_banned boolean default 0`, -`alter table urls add column short_text text not null default ""`, -`insert into tombstone_types (rowid, short_name, tombstone_text) values (7, 'age-restricted', 'Age-restricted adult content. ' + `alter table users add column is_banned boolean default 0`, + `alter table urls add column short_text text not null default ""`, + `insert into tombstone_types (rowid, short_name, tombstone_text) values (7, 'age-restricted', 'Age-restricted adult content. ' || 'This content might not be appropriate for people under 18 years old. To view this media, you’ll need to log in to Twitter')`, -`alter table users add column is_followed boolean default 0`, -`create table fake_user_sequence(latest_fake_id integer not null); + `alter table users add column is_followed boolean default 0`, + `create table fake_user_sequence(latest_fake_id integer not null); insert into fake_user_sequence values(0x4000000000000000); alter table users add column is_id_fake boolean default 0;`, -`delete from urls where rowid in (select urls.rowid from tweets join urls on tweets.id = urls.tweet_id where urls.text like + `delete from urls where rowid in (select urls.rowid from tweets join urls on tweets.id = urls.tweet_id where urls.text like 'https://twitter.com/%/status/' || tweets.quoted_tweet_id || "%")`, } diff --git a/persistence/versions_test.go b/persistence/versions_test.go index 7410d37..728b639 100644 --- a/persistence/versions_test.go +++ b/persistence/versions_test.go @@ -2,12 +2,13 @@ package persistence_test import ( "testing" + "os" "github.com/stretchr/testify/require" - "offline_twitter/scraper" "offline_twitter/persistence" + "offline_twitter/scraper" ) func TestVersionUpgrade(t *testing.T) { @@ -25,7 +26,7 @@ func TestVersionUpgrade(t *testing.T) { require.False(profile.IsTweetInDatabase(test_tweet_id), "Test tweet shouldn't be in db yet") persistence.MIGRATIONS = append(persistence.MIGRATIONS, test_migration) - err := profile.UpgradeFromXToY(persistence.ENGINE_DATABASE_VERSION, persistence.ENGINE_DATABASE_VERSION + 1) + err := profile.UpgradeFromXToY(persistence.ENGINE_DATABASE_VERSION, persistence.ENGINE_DATABASE_VERSION+1) require.NoError(err) require.True(profile.IsTweetInDatabase(test_tweet_id), "Migration should have created the tweet, but it didn't") diff --git a/scraper/api_errors.go b/scraper/api_errors.go index cd69a67..e6692e5 100644 --- a/scraper/api_errors.go +++ b/scraper/api_errors.go @@ -5,8 +5,8 @@ import ( ) var ( - END_OF_FEED = fmt.Errorf("End of feed") - DOESNT_EXIST = fmt.Errorf("Doesn't exist") - EXTERNAL_API_ERROR = fmt.Errorf("Unexpected result from external API") - API_PARSE_ERROR = fmt.Errorf("Couldn't parse the result returned from the API") + END_OF_FEED = fmt.Errorf("End of feed") + DOESNT_EXIST = fmt.Errorf("Doesn't exist") + EXTERNAL_API_ERROR = fmt.Errorf("Unexpected result from external API") + API_PARSE_ERROR = fmt.Errorf("Couldn't parse the result returned from the API") ) diff --git a/scraper/api_types.go b/scraper/api_types.go index ccd780f..0c49a97 100644 --- a/scraper/api_types.go +++ b/scraper/api_types.go @@ -1,33 +1,33 @@ package scraper import ( + "encoding/json" "fmt" "html" - "time" - "strings" - "encoding/json" - "strconv" "sort" + "strconv" + "strings" + "time" ) - type APIMedia struct { ID int64 `json:"id_str,string"` MediaURLHttps string `json:"media_url_https"` Type string `json:"type"` URL string `json:"url"` - OriginalInfo struct { - Width int `json:"width"` - Height int `json:"height"` + OriginalInfo struct { + Width int `json:"width"` + Height int `json:"height"` } `json:"original_info"` } type SortableVariants []struct { - Bitrate int `json:"bitrate,omitempty"` - URL string `json:"url"` + Bitrate int `json:"bitrate,omitempty"` + URL string `json:"url"` } -func (v SortableVariants) Len() int { return len(v) } -func (v SortableVariants) Swap(i, j int) { v[i], v[j] = v[j], v[i] } + +func (v SortableVariants) Len() int { return len(v) } +func (v SortableVariants) Swap(i, j int) { v[i], v[j] = v[j], v[i] } func (v SortableVariants) Less(i, j int) bool { return v[i].Bitrate > v[j].Bitrate } type APIExtendedMedia struct { @@ -35,12 +35,12 @@ type APIExtendedMedia struct { MediaURLHttps string `json:"media_url_https"` Type string `json:"type"` VideoInfo struct { - Variants SortableVariants `json:"variants"` - Duration int `json:"duration_millis"` + Variants SortableVariants `json:"variants"` + Duration int `json:"duration_millis"` } `json:"video_info"` OriginalInfo struct { - Width int `json:"width"` - Height int `json:"height"` + Width int `json:"width"` + Height int `json:"height"` } `json:"original_info"` Ext struct { MediaStats struct { @@ -74,9 +74,9 @@ type APICard struct { } `json:"description"` Thumbnail struct { ImageValue struct { - Url string `json:"url"` - Width int `json:"width"` - Height int `json:"height"` + Url string `json:"url"` + Width int `json:"width"` + Height int `json:"height"` } `json:"image_value"` } `json:"thumbnail_image_large"` PlayerImage struct { @@ -128,18 +128,18 @@ type APICard struct { } type APITweet struct { - ID int64 `json:"id_str,string"` - ConversationID int64 `json:"conversation_id_str,string"` - CreatedAt string `json:"created_at"` - FavoriteCount int `json:"favorite_count"` - FullText string `json:"full_text"` - DisplayTextRange []int `json:"display_text_range"` - Entities struct { + ID int64 `json:"id_str,string"` + ConversationID int64 `json:"conversation_id_str,string"` + CreatedAt string `json:"created_at"` + FavoriteCount int `json:"favorite_count"` + FullText string `json:"full_text"` + DisplayTextRange []int `json:"display_text_range"` + Entities struct { Hashtags []struct { Text string `json:"text"` } `json:"hashtags"` Media []APIMedia `json:"media"` - URLs []struct { + URLs []struct { ExpandedURL string `json:"expanded_url"` ShortenedUrl string `json:"url"` } `json:"urls"` @@ -147,30 +147,30 @@ type APITweet struct { UserName string `json:"screen_name"` UserID int64 `json:"id_str,string"` } `json:"user_mentions"` - ReplyMentions string // The leading part of the text which is cut off by "DisplayTextRange" + ReplyMentions string // The leading part of the text which is cut off by "DisplayTextRange" } `json:"entities"` ExtendedEntities struct { Media []APIExtendedMedia `json:"media"` } `json:"extended_entities"` - InReplyToStatusID int64 `json:"in_reply_to_status_id_str,string"` - InReplyToUserID int64 `json:"in_reply_to_user_id_str,string"` - InReplyToScreenName string `json:"in_reply_to_screen_name"` - ReplyCount int `json:"reply_count"` - RetweetCount int `json:"retweet_count"` - QuoteCount int `json:"quote_count"` - RetweetedStatusIDStr string `json:"retweeted_status_id_str"` // Can be empty string + InReplyToStatusID int64 `json:"in_reply_to_status_id_str,string"` + InReplyToUserID int64 `json:"in_reply_to_user_id_str,string"` + InReplyToScreenName string `json:"in_reply_to_screen_name"` + ReplyCount int `json:"reply_count"` + RetweetCount int `json:"retweet_count"` + QuoteCount int `json:"quote_count"` + RetweetedStatusIDStr string `json:"retweeted_status_id_str"` // Can be empty string RetweetedStatusID int64 - QuotedStatusIDStr string `json:"quoted_status_id_str"` // Can be empty string + QuotedStatusIDStr string `json:"quoted_status_id_str"` // Can be empty string QuotedStatusID int64 QuotedStatusPermalink struct { ShortURL string `json:"url"` ExpandedURL string `json:"expanded"` } `json:"quoted_status_permalink"` - Time time.Time `json:"time"` - UserID int64 `json:"user_id_str,string"` - UserHandle string - Card APICard `json:"card"` - TombstoneText string + Time time.Time `json:"time"` + UserID int64 `json:"user_id_str,string"` + UserHandle string + Card APICard `json:"card"` + TombstoneText string } func (t *APITweet) NormalizeContent() { @@ -183,7 +183,7 @@ func (t *APITweet) NormalizeContent() { t.RetweetedStatusID = int64(id) } - if (len(t.DisplayTextRange) == 2) { + if len(t.DisplayTextRange) == 2 { t.Entities.ReplyMentions = strings.TrimSpace(string([]rune(t.FullText)[0:t.DisplayTextRange[0]])) t.FullText = string([]rune(t.FullText)[t.DisplayTextRange[0]:t.DisplayTextRange[1]]) } @@ -217,7 +217,6 @@ func (t APITweet) String() string { return string(data) } - type APIUser struct { CreatedAt string `json:"created_at"` Description string `json:"description"` @@ -235,7 +234,7 @@ type APIUser struct { ListedCount int `json:"listed_count"` Name string `json:"name"` Location string `json:"location"` - PinnedTweetIdsStr []string `json:"pinned_tweet_ids_str"` // Dunno how to type-convert an array + PinnedTweetIdsStr []string `json:"pinned_tweet_ids_str"` // Dunno how to type-convert an array ProfileBannerURL string `json:"profile_banner_url"` ProfileImageURLHTTPS string `json:"profile_image_url_https"` Protected bool `json:"protected"` @@ -246,7 +245,6 @@ type APIUser struct { DoesntExist bool } - type UserResponse struct { Data struct { User struct { @@ -255,11 +253,12 @@ type UserResponse struct { } `json:"user"` } `json:"data"` Errors []struct { - Message string `json:"message"` - Name string `json:"name"` - Code int `json:"code"` + Message string `json:"message"` + Name string `json:"name"` + Code int `json:"code"` } `json:"errors"` } + func (u UserResponse) ConvertToAPIUser() APIUser { ret := u.Data.User.Legacy ret.ID = u.Data.User.ID @@ -279,9 +278,9 @@ func (u UserResponse) ConvertToAPIUser() APIUser { } type Entry struct { - EntryID string `json:"entryId"` - SortIndex int64 `json:"sortIndex,string"` - Content struct { + EntryID string `json:"entryId"` + SortIndex int64 `json:"sortIndex,string"` + Content struct { Item struct { Content struct { Tombstone struct { @@ -303,12 +302,15 @@ type Entry struct { } `json:"operation"` } `json:"content"` } + func (e Entry) GetTombstoneText() string { return e.Content.Item.Content.Tombstone.TombstoneInfo.RichText.Text } + type SortableEntries []Entry -func (e SortableEntries) Len() int { return len(e) } -func (e SortableEntries) Swap(i, j int) { e[i], e[j] = e[j], e[i] } + +func (e SortableEntries) Len() int { return len(e) } +func (e SortableEntries) Swap(i, j int) { e[i], e[j] = e[j], e[i] } func (e SortableEntries) Less(i, j int) bool { return e[i].SortIndex > e[j].SortIndex } type TweetResponse struct { @@ -329,15 +331,16 @@ type TweetResponse struct { } var tombstone_types = map[string]string{ - "This Tweet was deleted by the Tweet author. Learn more": "deleted", - "This Tweet is from a suspended account. Learn more": "suspended", + "This Tweet was deleted by the Tweet author. Learn more": "deleted", + "This Tweet is from a suspended account. Learn more": "suspended", "You’re unable to view this Tweet because this account owner limits who can view their Tweets. Learn more": "hidden", - "This Tweet is unavailable. Learn more": "unavailable", - "This Tweet violated the Twitter Rules. Learn more": "violated", - "This Tweet is from an account that no longer exists. Learn more": "no longer exists", + "This Tweet is unavailable. Learn more": "unavailable", + "This Tweet violated the Twitter Rules. Learn more": "violated", + "This Tweet is from an account that no longer exists. Learn more": "no longer exists", "Age-restricted adult content. This content might not be appropriate for people under 18 years old. To view this media, " + "you’ll need to log in to Twitter. Learn more": "age-restricted", } + /** * Insert tweets into GlobalObjects for each tombstone. Returns a list of users that need to * be fetched for tombstones. @@ -379,8 +382,8 @@ func (t *TweetResponse) HandleTombstones() []UserHandle { if entry.GetTombstoneText() != "" { // Try to reconstruct the tombstone tweet var tombstoned_tweet APITweet - tombstoned_tweet.ID = int64(i) // Set a default to prevent clobbering other tombstones - if i + 1 < len(entries) && entries[i+1].Content.Item.Content.Tweet.ID != 0 { + tombstoned_tweet.ID = int64(i) // Set a default to prevent clobbering other tombstones + if i+1 < len(entries) && entries[i+1].Content.Item.Content.Tweet.ID != 0 { next_tweet_id := entries[i+1].Content.Item.Content.Tweet.ID api_tweet, ok := t.GlobalObjects.Tweets[fmt.Sprint(next_tweet_id)] if !ok { @@ -390,7 +393,7 @@ func (t *TweetResponse) HandleTombstones() []UserHandle { tombstoned_tweet.UserID = api_tweet.InReplyToUserID ret = append(ret, UserHandle(api_tweet.InReplyToScreenName)) } - if i - 1 >= 0 && entries[i-1].Content.Item.Content.Tweet.ID != 0 { + if i-1 >= 0 && entries[i-1].Content.Item.Content.Tweet.ID != 0 { prev_tweet_id := entries[i-1].Content.Item.Content.Tweet.ID _, ok := t.GlobalObjects.Tweets[fmt.Sprint(prev_tweet_id)] if !ok { @@ -416,7 +419,7 @@ func (t *TweetResponse) HandleTombstones() []UserHandle { func (t *TweetResponse) GetCursor() string { entries := t.Timeline.Instructions[0].AddEntries.Entries if len(entries) > 0 { - last_entry := entries[len(entries) - 1] + last_entry := entries[len(entries)-1] if strings.Contains(last_entry.EntryID, "cursor") { return last_entry.Content.Operation.Cursor.Value } @@ -424,7 +427,7 @@ func (t *TweetResponse) GetCursor() string { // Next, try the other format ("replaceEntry") instructions := t.Timeline.Instructions - last_replace_entry := instructions[len(instructions) - 1].ReplaceEntry.Entry + last_replace_entry := instructions[len(instructions)-1].ReplaceEntry.Entry if strings.Contains(last_replace_entry.EntryID, "cursor") { return last_replace_entry.Content.Operation.Cursor.Value } @@ -450,7 +453,6 @@ func (t *TweetResponse) IsEndOfFeed() bool { return true } - func idstr_to_int(idstr string) int64 { id, err := strconv.Atoi(idstr) if err != nil { diff --git a/scraper/api_types_test.go b/scraper/api_types_test.go index 10e8bf6..3408ca0 100644 --- a/scraper/api_types_test.go +++ b/scraper/api_types_test.go @@ -1,9 +1,9 @@ package scraper_test import ( - "testing" - "os" "encoding/json" + "os" + "testing" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -11,17 +11,16 @@ import ( . "offline_twitter/scraper" ) - func TestNormalizeContent(t *testing.T) { assert := assert.New(t) test_cases := []struct { - filename string - eventual_full_text string - quoted_status_id TweetID - in_reply_to_id TweetID + filename string + eventual_full_text string + quoted_status_id TweetID + in_reply_to_id TweetID retweeted_status_id TweetID - reply_mentions string - } { + reply_mentions string + }{ {"test_responses/single_tweets/tweet_that_is_a_reply_with_gif.json", "", 0, 1395882872729477131, 0, "@michaelmalice"}, {"test_responses/single_tweets/tweet_with_image.json", "this saddens me every time", 0, 0, 0, ""}, {"test_responses/single_tweets/tweet_that_is_a_reply.json", "Noted", 0, 1396194494710788100, 0, "@RvaTeddy @michaelmalice"}, @@ -48,7 +47,7 @@ func TestNormalizeContent(t *testing.T) { } var tweet APITweet err = json.Unmarshal(data, &tweet) - assert.NoError(err, "Failed at " + v.filename) + assert.NoError(err, "Failed at "+v.filename) tweet.NormalizeContent() @@ -60,7 +59,6 @@ func TestNormalizeContent(t *testing.T) { } } - func TestUserProfileToAPIUser(t *testing.T) { assert := assert.New(t) data, err := os.ReadFile("test_responses/michael_malice_user_profile.json") @@ -76,7 +74,6 @@ func TestUserProfileToAPIUser(t *testing.T) { assert.Equal(user_resp.Data.User.Legacy.FollowersCount, result.FollowersCount) } - func TestGetCursor(t *testing.T) { assert := assert.New(t) data, err := os.ReadFile("test_responses/midriffs_anarchist_cookbook.json") @@ -91,13 +88,12 @@ func TestGetCursor(t *testing.T) { tweet_resp.GetCursor()) } - func TestIsEndOfFeed(t *testing.T) { assert := assert.New(t) test_cases := []struct { - filename string + filename string is_end_of_feed bool - } { + }{ {"test_responses/michael_malice_feed.json", false}, {"test_responses/kwiber_end_of_feed.json", true}, } @@ -113,7 +109,6 @@ func TestIsEndOfFeed(t *testing.T) { } } - func TestHandleTombstonesHidden(t *testing.T) { assert := assert.New(t) data, err := os.ReadFile("test_responses/tombstones/tombstone_hidden_1.json") diff --git a/scraper/api_types_v2.go b/scraper/api_types_v2.go index 7ec0b9c..7880a62 100644 --- a/scraper/api_types_v2.go +++ b/scraper/api_types_v2.go @@ -13,13 +13,13 @@ import ( ) type CardValue struct { - Type string `json:"type"` + Type string `json:"type"` StringValue string `json:"string_value"` - ImageValue struct { + ImageValue struct { AltText string `json:"alt"` - Height int `json:"height"` - Width int `json:"width"` - Url string `json:"url"` + Height int `json:"height"` + Width int `json:"width"` + Url string `json:"url"` } `json:"image_value"` UserValue struct { ID int64 `json:"id_str,string"` @@ -30,13 +30,14 @@ type CardValue struct { type APIV2Card struct { Legacy struct { BindingValues []struct { - Key string `json:"key"` + Key string `json:"key"` Value CardValue `json:"value"` } `json:"binding_values"` Name string `json:"name"` - Url string `json:"url"` + Url string `json:"url"` } `json:"legacy"` } + func (card APIV2Card) ParseAsUrl() Url { values := make(map[string]CardValue) for _, obj := range card.Legacy.BindingValues { @@ -121,6 +122,7 @@ type APIV2UserResult struct { } `json:"result"` } `json:"user_results"` } + func (u APIV2UserResult) ToUser() User { user, err := ParseSingleUser(u.UserResults.Result.Legacy) if err != nil { @@ -131,16 +133,16 @@ func (u APIV2UserResult) ToUser() User { } type _Result struct { - ID int64 `json:"rest_id,string"` - Legacy APIV2Tweet `json:"legacy"` + ID int64 `json:"rest_id,string"` + Legacy APIV2Tweet `json:"legacy"` Tombstone *struct { Text struct { Text string `json:"text"` } `json:"text"` } `json:"tombstone"` - Core *APIV2UserResult `json:"core"` - Card APIV2Card `json:"card"` - QuotedStatusResult *APIV2Result `json:"quoted_status_result"` + Core *APIV2UserResult `json:"core"` + Card APIV2Card `json:"card"` + QuotedStatusResult *APIV2Result `json:"quoted_status_result"` } type APIV2Result struct { @@ -149,11 +151,12 @@ type APIV2Result struct { Tweet _Result `json:"tweet"` } `json:"result"` } + func (api_result APIV2Result) ToTweetTrove(ignore_null_entries bool) TweetTrove { ret := NewTweetTrove() // Start by checking if this is a null entry in a feed - if api_result.Result.Tombstone != nil && ignore_null_entries{ + if api_result.Result.Tombstone != nil && ignore_null_entries { // TODO: this is becoming really spaghetti. Why do we need a separate execution path for this? return ret } @@ -221,7 +224,7 @@ func (api_result APIV2Result) ToTweetTrove(ignore_null_entries bool) TweetTrove continue } found = true - url.Text = main_tweet.Urls[i].Text // Copy the expanded URL over, since the card doesn't have it in the new API + url.Text = main_tweet.Urls[i].Text // Copy the expanded URL over, since the card doesn't have it in the new API main_tweet.Urls[i] = url } if !found { @@ -245,6 +248,7 @@ type APIV2Tweet struct { RetweetedStatusResult *APIV2Result `json:"retweeted_status_result"` APITweet } + func (api_v2_tweet APIV2Tweet) ToTweetTrove() TweetTrove { ret := NewTweetTrove() @@ -253,7 +257,6 @@ func (api_v2_tweet APIV2Tweet) ToTweetTrove() TweetTrove { orig_tweet_trove := api_v2_tweet.RetweetedStatusResult.ToTweetTrove(false) ret.MergeWith(orig_tweet_trove) - retweet := Retweet{} var err error retweet.RetweetID = TweetID(api_v2_tweet.ID) @@ -277,25 +280,24 @@ func (api_v2_tweet APIV2Tweet) ToTweetTrove() TweetTrove { } type APIV2Entry struct { - EntryID string `json:"entryId"` - SortIndex int64 `json:"sortIndex,string"` - Content struct { + EntryID string `json:"entryId"` + SortIndex int64 `json:"sortIndex,string"` + Content struct { ItemContent struct { - EntryType string `json:"entryType"` + EntryType string `json:"entryType"` TweetResults APIV2Result `json:"tweet_results"` } `json:"itemContent"` // Cursors - EntryType string `json:"entryType"` - Value string `json:"value"` + EntryType string `json:"entryType"` + Value string `json:"value"` CursorType string `json:"cursorType"` - } `json:"content"` } type APIV2Instruction struct { - Type string `json:"type"` - Entries []APIV2Entry`json:"entries"` + Type string `json:"type"` + Entries []APIV2Entry `json:"entries"` } type APIV2Response struct { @@ -324,7 +326,7 @@ func (api_response APIV2Response) GetMainInstruction() *APIV2Instruction { func (api_response APIV2Response) GetCursorBottom() string { entries := api_response.GetMainInstruction().Entries - last_entry := entries[len(entries) - 1] + last_entry := entries[len(entries)-1] if last_entry.Content.CursorType != "Bottom" { panic("No bottom cursor found") } @@ -349,7 +351,7 @@ func (api_response APIV2Response) IsEmpty() bool { */ func (api_response APIV2Response) ToTweetTrove() (TweetTrove, error) { ret := NewTweetTrove() - for _, entry := range api_response.GetMainInstruction().Entries { // TODO: the second Instruction is the pinned tweet + for _, entry := range api_response.GetMainInstruction().Entries { // TODO: the second Instruction is the pinned tweet if !strings.HasPrefix(entry.EntryID, "tweet-") { continue } @@ -363,12 +365,11 @@ func (api_response APIV2Response) ToTweetTrove() (TweetTrove, error) { return ret, nil } - func get_graphql_user_timeline_url(user_id UserID, cursor string) string { if cursor != "" { - return "https://twitter.com/i/api/graphql/CwLU7qTfeu0doqhSr6tW4A/UserTweetsAndReplies?variables=%7B%22userId%22%3A%22" + fmt.Sprint(user_id) + "%22%2C%22count%22%3A40%2C%22cursor%22%3A%22" + url.QueryEscape(cursor) + "%22%2C%22includePromotedContent%22%3Atrue%2C%22withCommunity%22%3Atrue%2C%22withSuperFollowsUserFields%22%3Atrue%2C%22withBirdwatchPivots%22%3Afalse%2C%22withDownvotePerspective%22%3Afalse%2C%22withReactionsMetadata%22%3Afalse%2C%22withReactionsPerspective%22%3Afalse%2C%22withSuperFollowsTweetFields%22%3Atrue%2C%22withVoice%22%3Atrue%2C%22withV2Timeline%22%3Afalse%2C%22__fs_interactive_text%22%3Afalse%2C%22__fs_responsive_web_uc_gql_enabled%22%3Afalse%2C%22__fs_dont_mention_me_view_api_enabled%22%3Afalse%7D" // nolint:lll // It's a URL, come on + return "https://twitter.com/i/api/graphql/CwLU7qTfeu0doqhSr6tW4A/UserTweetsAndReplies?variables=%7B%22userId%22%3A%22" + fmt.Sprint(user_id) + "%22%2C%22count%22%3A40%2C%22cursor%22%3A%22" + url.QueryEscape(cursor) + "%22%2C%22includePromotedContent%22%3Atrue%2C%22withCommunity%22%3Atrue%2C%22withSuperFollowsUserFields%22%3Atrue%2C%22withBirdwatchPivots%22%3Afalse%2C%22withDownvotePerspective%22%3Afalse%2C%22withReactionsMetadata%22%3Afalse%2C%22withReactionsPerspective%22%3Afalse%2C%22withSuperFollowsTweetFields%22%3Atrue%2C%22withVoice%22%3Atrue%2C%22withV2Timeline%22%3Afalse%2C%22__fs_interactive_text%22%3Afalse%2C%22__fs_responsive_web_uc_gql_enabled%22%3Afalse%2C%22__fs_dont_mention_me_view_api_enabled%22%3Afalse%7D" // nolint:lll // It's a URL, come on } - return "https://twitter.com/i/api/graphql/CwLU7qTfeu0doqhSr6tW4A/UserTweetsAndReplies?variables=%7B%22userId%22%3A%22" + fmt.Sprint(user_id) + "%22%2C%22count%22%3A40%2C%22includePromotedContent%22%3Afalse%2C%22withCommunity%22%3Atrue%2C%22withSuperFollowsUserFields%22%3Atrue%2C%22withBirdwatchPivots%22%3Afalse%2C%22withDownvotePerspective%22%3Afalse%2C%22withReactionsMetadata%22%3Afalse%2C%22withReactionsPerspective%22%3Afalse%2C%22withSuperFollowsTweetFields%22%3Atrue%2C%22withVoice%22%3Atrue%2C%22withV2Timeline%22%3Afalse%2C%22__fs_interactive_text%22%3Afalse%2C%22__fs_dont_mention_me_view_api_enabled%22%3Afalse%7D" // nolint:lll // It's a URL, come on + return "https://twitter.com/i/api/graphql/CwLU7qTfeu0doqhSr6tW4A/UserTweetsAndReplies?variables=%7B%22userId%22%3A%22" + fmt.Sprint(user_id) + "%22%2C%22count%22%3A40%2C%22includePromotedContent%22%3Afalse%2C%22withCommunity%22%3Atrue%2C%22withSuperFollowsUserFields%22%3Atrue%2C%22withBirdwatchPivots%22%3Afalse%2C%22withDownvotePerspective%22%3Afalse%2C%22withReactionsMetadata%22%3Afalse%2C%22withReactionsPerspective%22%3Afalse%2C%22withSuperFollowsTweetFields%22%3Atrue%2C%22withVoice%22%3Atrue%2C%22withV2Timeline%22%3Afalse%2C%22__fs_interactive_text%22%3Afalse%2C%22__fs_dont_mention_me_view_api_enabled%22%3Afalse%7D" // nolint:lll // It's a URL, come on } /** @@ -446,7 +447,7 @@ func (api API) GetMoreTweetsFromGraphqlFeed(user_id UserID, response *APIV2Respo } if fresh_response.IsEmpty() { // Response has a pinned tweet, but no other content: end of feed has been reached - return END_OF_FEED // TODO: check that there actually is a pinned tweet and the request didn't just fail lol + return END_OF_FEED // TODO: check that there actually is a pinned tweet and the request didn't just fail lol } last_response = &fresh_response diff --git a/scraper/api_types_v2_test.go b/scraper/api_types_v2_test.go index 7dc2697..3d633e5 100644 --- a/scraper/api_types_v2_test.go +++ b/scraper/api_types_v2_test.go @@ -1,10 +1,10 @@ package scraper_test import ( - "testing" - "os" "encoding/json" "fmt" + "os" + "testing" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -34,7 +34,7 @@ func TestAPIV2ParseUser(t *testing.T) { assert.Equal(user.ID, UserID(44067298)) assert.Equal(user.DisplayName, "Michael Malice") assert.Equal(user.Handle, UserHandle("michaelmalice")) - assert.Equal(user.Bio, "Author of Dear Reader, The New Right & The Anarchist Handbook\nHost of \"YOUR WELCOME\" \nSubject of Ego & " + + assert.Equal(user.Bio, "Author of Dear Reader, The New Right & The Anarchist Handbook\nHost of \"YOUR WELCOME\" \nSubject of Ego & "+ "Hubris by Harvey Pekar\nHe/Him ⚑\n@SheathUnderwear Model") assert.Equal(user.FollowingCount, 964) assert.Equal(user.FollowersCount, 334571) @@ -70,7 +70,7 @@ func TestAPIV2ParseTweet(t *testing.T) { assert.True(ok) assert.Equal(tweet.ID, TweetID(1485708879174508550)) assert.Equal(tweet.UserID, UserID(44067298)) - assert.Equal(tweet.Text, "If Boris Johnson is driven out of office, it wouldn't mark the first time the Tories had four PMs in a " + + assert.Equal(tweet.Text, "If Boris Johnson is driven out of office, it wouldn't mark the first time the Tories had four PMs in a "+ "row\nThey had previously governed the UK for 13 years with 4 PMs, from 1951-1964") assert.Equal(tweet.PostedAt.Unix(), int64(1643055574)) assert.Equal(tweet.QuotedTweetID, TweetID(0)) @@ -133,7 +133,7 @@ func TestAPIV2ParseTweetWithQuotedTweet(t *testing.T) { assert.True(ok) assert.Equal(TweetID(1485690410899021826), quote_tweet.ID) assert.Equal(TweetID(1485690069079846915), quote_tweet.QuotedTweetID) - assert.Equal("Hatred is powerless in and of itself despite all the agitprop to the contrary\nHatred didnt stop Trump's election, " + + assert.Equal("Hatred is powerless in and of itself despite all the agitprop to the contrary\nHatred didnt stop Trump's election, "+ "for example", quote_tweet.Text) // Should be 2 users: quoter and quoted @@ -182,7 +182,7 @@ func TestAPIV2ParseRetweet(t *testing.T) { // Check the video v := tweet.Videos[0] assert.Equal("https://pbs.twimg.com/ext_tw_video_thumb/1485627274594590721/pu/img/O6mMKrsqWl8WcMy1.jpg", v.ThumbnailRemoteUrl) - assert.Equal(0, v.ViewCount) // TODO: make this work + assert.Equal(0, v.ViewCount) // TODO: make this work assert.Equal(720, v.Height) assert.Equal(720, v.Width) assert.Equal(30066, v.Duration) @@ -200,7 +200,6 @@ func TestAPIV2ParseRetweet(t *testing.T) { assert.Equal(UserID(44067298), retweeting_user.ID) assert.Equal(UserHandle("michaelmalice"), retweeting_user.Handle) - // Should be 1 retweet assert.Equal(1, len(trove.Retweets)) retweet, ok := trove.Retweets[1485699748514476037] @@ -270,7 +269,6 @@ func TestAPIV2ParseRetweetedQuoteTweet(t *testing.T) { assert.Equal(UserID(599817378), retweet.RetweetedByID) } - /** * Parse tweet with quoted tombstone */ @@ -300,13 +298,12 @@ func TestAPIV2ParseTweetWithQuotedTombstone(t *testing.T) { assert.True(ok) assert.Equal(TweetID(1485774025347371008), tombstoned_tweet.ID) assert.Equal("no longer exists", tombstoned_tweet.TombstoneType) - assert.True (tombstoned_tweet.IsStub) + assert.True(tombstoned_tweet.IsStub) assert.Equal(UserHandle("coltnkat"), tombstoned_tweet.UserHandle) assert.Equal(0, len(trove.Retweets)) } - /** * Parse a tweet with a link */ @@ -326,7 +323,7 @@ func TestAPIV2ParseTweetWithURL(t *testing.T) { assert.Equal(1, len(trove.Tweets)) tweet, ok := trove.Tweets[1485695695025803264] assert.True(ok) - assert.Equal("This led to what I discussed as \"anguish signaling,\" where progs competed in proclaiming their distress both to " + + assert.Equal("This led to what I discussed as \"anguish signaling,\" where progs competed in proclaiming their distress both to "+ "show they were the Good Guys but also to get the pack to regroup, akin to wolves howling.", tweet.Text) assert.Equal(1, len(tweet.Urls)) @@ -335,7 +332,7 @@ func TestAPIV2ParseTweetWithURL(t *testing.T) { assert.Equal("observer.com", url.Domain) assert.Equal("Why Evangelical Progressives Need to Demonstrate Anguish Publicly", url.Title) assert.Equal("https://observer.com/2016/12/why-evangelical-progressives-need-to-demonstrate-anguish-publicly/", url.Text) - assert.Equal("The concept of “virtue signaling” gained a great deal of currency in this past year. It’s a way to demonstrate to " + + assert.Equal("The concept of “virtue signaling” gained a great deal of currency in this past year. It’s a way to demonstrate to "+ "others that one is a good person without having to do anything", url.Description) assert.Equal("https://pbs.twimg.com/card_img/1485694664640507911/WsproWyP?format=jpg&name=600x600", url.ThumbnailRemoteUrl) assert.Equal(600, url.ThumbnailWidth) @@ -439,10 +436,9 @@ func TestAPIV2ParseTweetWithPoll(t *testing.T) { assert.Equal(int64(1643137976), poll.VotingEndsAt.Unix()) assert.Equal(int64(1643055638), poll.LastUpdatedAt.Unix()) - assert.Equal(1440 * 60, poll.VotingDuration) + assert.Equal(1440*60, poll.VotingDuration) } - func TestParseAPIV2UserFeed(t *testing.T) { data, err := os.ReadFile("test_responses/api_v2/user_feed_apiv2.json") if err != nil { @@ -495,7 +491,6 @@ func TestParseAPIV2UserFeed(t *testing.T) { fmt.Printf("%d Users, %d Tweets, %d Retweets\n", len(tweet_trove.Users), len(tweet_trove.Tweets), len(tweet_trove.Retweets)) } - /** * Should correctly identify an "empty" response */ @@ -562,13 +557,12 @@ func TestAPIV2TombstoneEntry(t *testing.T) { err = json.Unmarshal(data, &tweet_result) require.NoError(t, err) - trove := tweet_result.ToTweetTrove(true) // 'true' indicates to ignore empty entries + trove := tweet_result.ToTweetTrove(true) // 'true' indicates to ignore empty entries assert.Len(trove.Tweets, 0) assert.Len(trove.Users, 0) assert.Len(trove.Retweets, 0) } - func TestTweetWithWarning(t *testing.T) { assert := assert.New(t) data, err := os.ReadFile("test_responses/api_v2/tweet_with_warning.json") diff --git a/scraper/image.go b/scraper/image.go index c7ed727..3e043cb 100644 --- a/scraper/image.go +++ b/scraper/image.go @@ -1,29 +1,29 @@ package scraper import ( - "path" + "path" ) type ImageID int64 type Image struct { - ID ImageID - TweetID TweetID - Width int - Height int - RemoteURL string - LocalFilename string - IsDownloaded bool + ID ImageID + TweetID TweetID + Width int + Height int + RemoteURL string + LocalFilename string + IsDownloaded bool } func ParseAPIMedia(apiMedia APIMedia) Image { - local_filename := path.Base(apiMedia.MediaURLHttps) - return Image{ - ID: ImageID(apiMedia.ID), - RemoteURL: apiMedia.MediaURLHttps, - Width: apiMedia.OriginalInfo.Width, - Height: apiMedia.OriginalInfo.Height, - LocalFilename: local_filename, - IsDownloaded: false, - } + local_filename := path.Base(apiMedia.MediaURLHttps) + return Image{ + ID: ImageID(apiMedia.ID), + RemoteURL: apiMedia.MediaURLHttps, + Width: apiMedia.OriginalInfo.Width, + Height: apiMedia.OriginalInfo.Height, + LocalFilename: local_filename, + IsDownloaded: false, + } } diff --git a/scraper/image_test.go b/scraper/image_test.go index e4ee7d4..ef4bb37 100644 --- a/scraper/image_test.go +++ b/scraper/image_test.go @@ -1,31 +1,31 @@ package scraper_test import ( - "testing" - "os" - "encoding/json" + "encoding/json" + "os" + "testing" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" - . "offline_twitter/scraper" + . "offline_twitter/scraper" ) func TestParseAPIMedia(t *testing.T) { - assert := assert.New(t) - data, err := os.ReadFile("test_responses/tweet_content/image.json") - if err != nil { - panic(err) - } - var apimedia APIMedia - err = json.Unmarshal(data, &apimedia) - require.NoError(t, err) + assert := assert.New(t) + data, err := os.ReadFile("test_responses/tweet_content/image.json") + if err != nil { + panic(err) + } + var apimedia APIMedia + err = json.Unmarshal(data, &apimedia) + require.NoError(t, err) - image := ParseAPIMedia(apimedia) - assert.Equal(ImageID(1395882862289772553), image.ID) - assert.Equal("https://pbs.twimg.com/media/E18sEUrWYAk8dBl.jpg", image.RemoteURL) - assert.Equal(593, image.Width) - assert.Equal(239, image.Height) - assert.Equal("E18sEUrWYAk8dBl.jpg", image.LocalFilename) - assert.False(image.IsDownloaded) + image := ParseAPIMedia(apimedia) + assert.Equal(ImageID(1395882862289772553), image.ID) + assert.Equal("https://pbs.twimg.com/media/E18sEUrWYAk8dBl.jpg", image.RemoteURL) + assert.Equal(593, image.Width) + assert.Equal(239, image.Height) + assert.Equal("E18sEUrWYAk8dBl.jpg", image.LocalFilename) + assert.False(image.IsDownloaded) } diff --git a/scraper/link_expander.go b/scraper/link_expander.go index 97122a2..74d175a 100644 --- a/scraper/link_expander.go +++ b/scraper/link_expander.go @@ -2,11 +2,10 @@ package scraper import ( "fmt" - "time" "net/http" + "time" ) - /** * Return the expanded version of a short URL. Input must be a real short URL. */ @@ -21,7 +20,7 @@ func ExpandShortUrl(short_url string) string { resp, err := client.Get(short_url) if err != nil { - panic(err) // TODO: handle timeouts + panic(err) // TODO: handle timeouts } if resp.StatusCode != 301 { panic(fmt.Errorf("Unknown status code returned when expanding short url %q: %s\n %w", short_url, resp.Status, EXTERNAL_API_ERROR)) diff --git a/scraper/link_expander_test.go b/scraper/link_expander_test.go index 68e9f7b..202ab68 100644 --- a/scraper/link_expander_test.go +++ b/scraper/link_expander_test.go @@ -6,12 +6,11 @@ import ( "net/http" "net/http/httptest" - "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/assert" . "offline_twitter/scraper" ) - func TestExpandShortUrl(t *testing.T) { redirecting_to := "redirect target" srvr := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { diff --git a/scraper/poll.go b/scraper/poll.go index 6acd166..eb6de0e 100644 --- a/scraper/poll.go +++ b/scraper/poll.go @@ -1,82 +1,82 @@ package scraper import ( - "strings" - "strconv" - "net/url" + "net/url" + "strconv" + "strings" ) type PollID int64 type Poll struct { - ID PollID - TweetID TweetID - NumChoices int + ID PollID + TweetID TweetID + NumChoices int - Choice1 string - Choice1_Votes int - Choice2 string - Choice2_Votes int - Choice3 string - Choice3_Votes int - Choice4 string - Choice4_Votes int + Choice1 string + Choice1_Votes int + Choice2 string + Choice2_Votes int + Choice3 string + Choice3_Votes int + Choice4 string + Choice4_Votes int - VotingDuration int // In seconds - VotingEndsAt Timestamp + VotingDuration int // In seconds + VotingEndsAt Timestamp - LastUpdatedAt Timestamp `db:"last_scraped_at"` + LastUpdatedAt Timestamp `db:"last_scraped_at"` } func ParseAPIPoll(apiCard APICard) Poll { - card_url, err := url.Parse(apiCard.ShortenedUrl) - if err != nil { - panic(err) - } - id := int_or_panic(card_url.Hostname()) + card_url, err := url.Parse(apiCard.ShortenedUrl) + if err != nil { + panic(err) + } + id := int_or_panic(card_url.Hostname()) - ret := Poll{} - ret.ID = PollID(id) - ret.NumChoices = parse_num_choices(apiCard.Name) - ret.VotingDuration = int_or_panic(apiCard.BindingValues.DurationMinutes.StringValue) * 60 - ret.VotingEndsAt, err = TimestampFromString(apiCard.BindingValues.EndDatetimeUTC.StringValue) - if err != nil { - panic(err) - } - ret.LastUpdatedAt, err = TimestampFromString(apiCard.BindingValues.LastUpdatedAt.StringValue) - if err != nil { - panic(err) - } + ret := Poll{} + ret.ID = PollID(id) + ret.NumChoices = parse_num_choices(apiCard.Name) + ret.VotingDuration = int_or_panic(apiCard.BindingValues.DurationMinutes.StringValue) * 60 + ret.VotingEndsAt, err = TimestampFromString(apiCard.BindingValues.EndDatetimeUTC.StringValue) + if err != nil { + panic(err) + } + ret.LastUpdatedAt, err = TimestampFromString(apiCard.BindingValues.LastUpdatedAt.StringValue) + if err != nil { + panic(err) + } - ret.Choice1 = apiCard.BindingValues.Choice1.StringValue - ret.Choice1_Votes = int_or_panic(apiCard.BindingValues.Choice1_Count.StringValue) - ret.Choice2 = apiCard.BindingValues.Choice2.StringValue - ret.Choice2_Votes = int_or_panic(apiCard.BindingValues.Choice2_Count.StringValue) + ret.Choice1 = apiCard.BindingValues.Choice1.StringValue + ret.Choice1_Votes = int_or_panic(apiCard.BindingValues.Choice1_Count.StringValue) + ret.Choice2 = apiCard.BindingValues.Choice2.StringValue + ret.Choice2_Votes = int_or_panic(apiCard.BindingValues.Choice2_Count.StringValue) - if ret.NumChoices > 2 { - ret.Choice3 = apiCard.BindingValues.Choice3.StringValue - ret.Choice3_Votes = int_or_panic(apiCard.BindingValues.Choice3_Count.StringValue) - } - if ret.NumChoices > 3 { - ret.Choice4 = apiCard.BindingValues.Choice4.StringValue - ret.Choice4_Votes = int_or_panic(apiCard.BindingValues.Choice4_Count.StringValue) - } + if ret.NumChoices > 2 { + ret.Choice3 = apiCard.BindingValues.Choice3.StringValue + ret.Choice3_Votes = int_or_panic(apiCard.BindingValues.Choice3_Count.StringValue) + } + if ret.NumChoices > 3 { + ret.Choice4 = apiCard.BindingValues.Choice4.StringValue + ret.Choice4_Votes = int_or_panic(apiCard.BindingValues.Choice4_Count.StringValue) + } - return ret + return ret } func parse_num_choices(card_name string) int { - if strings.Index(card_name, "poll") != 0 || strings.Index(card_name, "choice") != 5 { - panic("Not valid card name: " + card_name) - } + if strings.Index(card_name, "poll") != 0 || strings.Index(card_name, "choice") != 5 { + panic("Not valid card name: " + card_name) + } - return int_or_panic(card_name[4:5]) + return int_or_panic(card_name[4:5]) } func int_or_panic(s string) int { - result, err := strconv.Atoi(s) - if err != nil { - panic(err) - } - return result + result, err := strconv.Atoi(s) + if err != nil { + panic(err) + } + return result } diff --git a/scraper/poll_test.go b/scraper/poll_test.go index bf3e442..ab1ee84 100644 --- a/scraper/poll_test.go +++ b/scraper/poll_test.go @@ -1,67 +1,67 @@ package scraper_test import ( - "testing" - "os" - "encoding/json" + "encoding/json" + "os" + "testing" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" - . "offline_twitter/scraper" + . "offline_twitter/scraper" ) func TestParsePoll2Choices(t *testing.T) { - assert := assert.New(t) - data, err := os.ReadFile("test_responses/tweet_content/poll_card_2_options.json") - if err != nil { - panic(err) - } - var apiCard APICard - err = json.Unmarshal(data, &apiCard) - require.NoError(t, err) + assert := assert.New(t) + data, err := os.ReadFile("test_responses/tweet_content/poll_card_2_options.json") + if err != nil { + panic(err) + } + var apiCard APICard + err = json.Unmarshal(data, &apiCard) + require.NoError(t, err) - poll := ParseAPIPoll(apiCard) - assert.Equal(PollID(1457419248461131776), poll.ID) - assert.Equal(2, poll.NumChoices) - assert.Equal(60 * 60 * 24, poll.VotingDuration) - assert.Equal(int64(1636397201), poll.VotingEndsAt.Unix()) - assert.Equal(int64(1636318755), poll.LastUpdatedAt.Unix()) + poll := ParseAPIPoll(apiCard) + assert.Equal(PollID(1457419248461131776), poll.ID) + assert.Equal(2, poll.NumChoices) + assert.Equal(60*60*24, poll.VotingDuration) + assert.Equal(int64(1636397201), poll.VotingEndsAt.Unix()) + assert.Equal(int64(1636318755), poll.LastUpdatedAt.Unix()) - assert.Less(poll.LastUpdatedAt.Unix(), poll.VotingEndsAt.Unix()) - assert.Equal("Yes", poll.Choice1) - assert.Equal("No", poll.Choice2) - assert.Equal(529, poll.Choice1_Votes) - assert.Equal(2182, poll.Choice2_Votes) + assert.Less(poll.LastUpdatedAt.Unix(), poll.VotingEndsAt.Unix()) + assert.Equal("Yes", poll.Choice1) + assert.Equal("No", poll.Choice2) + assert.Equal(529, poll.Choice1_Votes) + assert.Equal(2182, poll.Choice2_Votes) } func TestParsePoll4Choices(t *testing.T) { - assert := assert.New(t) - data, err := os.ReadFile("test_responses/tweet_content/poll_card_4_options_ended.json") - if err != nil { - panic(err) - } - var apiCard APICard - err = json.Unmarshal(data, &apiCard) - require.NoError(t, err) + assert := assert.New(t) + data, err := os.ReadFile("test_responses/tweet_content/poll_card_4_options_ended.json") + if err != nil { + panic(err) + } + var apiCard APICard + err = json.Unmarshal(data, &apiCard) + require.NoError(t, err) - poll := ParseAPIPoll(apiCard) - assert.Equal(PollID(1455611588854140929), poll.ID) - assert.Equal(4, poll.NumChoices) - assert.Equal(60 * 60 * 24, poll.VotingDuration) - assert.Equal(int64(1635966221), poll.VotingEndsAt.Unix()) - assert.Equal(int64(1635966226), poll.LastUpdatedAt.Unix()) - assert.Greater(poll.LastUpdatedAt.Unix(), poll.VotingEndsAt.Unix()) + poll := ParseAPIPoll(apiCard) + assert.Equal(PollID(1455611588854140929), poll.ID) + assert.Equal(4, poll.NumChoices) + assert.Equal(60*60*24, poll.VotingDuration) + assert.Equal(int64(1635966221), poll.VotingEndsAt.Unix()) + assert.Equal(int64(1635966226), poll.LastUpdatedAt.Unix()) + assert.Greater(poll.LastUpdatedAt.Unix(), poll.VotingEndsAt.Unix()) - assert.Equal("Alec Baldwin", poll.Choice1) - assert.Equal(1669, poll.Choice1_Votes) + assert.Equal("Alec Baldwin", poll.Choice1) + assert.Equal(1669, poll.Choice1_Votes) - assert.Equal("Andew Cuomo", poll.Choice2) - assert.Equal(272, poll.Choice2_Votes) + assert.Equal("Andew Cuomo", poll.Choice2) + assert.Equal(272, poll.Choice2_Votes) - assert.Equal("George Floyd", poll.Choice3) - assert.Equal(829, poll.Choice3_Votes) + assert.Equal("George Floyd", poll.Choice3) + assert.Equal(829, poll.Choice3_Votes) - assert.Equal("Derek Chauvin", poll.Choice4) - assert.Equal(2397, poll.Choice4_Votes) + assert.Equal("Derek Chauvin", poll.Choice4) + assert.Equal(2397, poll.Choice4_Votes) } diff --git a/scraper/retweet.go b/scraper/retweet.go index 39b3979..a4ca028 100644 --- a/scraper/retweet.go +++ b/scraper/retweet.go @@ -1,12 +1,12 @@ package scraper type Retweet struct { - RetweetID TweetID - TweetID TweetID - Tweet *Tweet - RetweetedByID UserID `db:"retweeted_by"` - RetweetedBy *User - RetweetedAt Timestamp + RetweetID TweetID + TweetID TweetID + Tweet *Tweet + RetweetedByID UserID `db:"retweeted_by"` + RetweetedBy *User + RetweetedAt Timestamp } func ParseSingleRetweet(apiTweet APITweet) (ret Retweet, err error) { diff --git a/scraper/retweet_test.go b/scraper/retweet_test.go index eb9b57e..9770c6a 100644 --- a/scraper/retweet_test.go +++ b/scraper/retweet_test.go @@ -5,8 +5,8 @@ import ( "os" "testing" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" . "offline_twitter/scraper" ) diff --git a/scraper/search.go b/scraper/search.go index 44fe8e1..696c735 100644 --- a/scraper/search.go +++ b/scraper/search.go @@ -5,7 +5,7 @@ import ( ) func TimestampToDateString(timestamp int) string { - panic("???") // TODO + panic("???") // TODO } /** diff --git a/scraper/tweet.go b/scraper/tweet.go index 9b7c42e..0f677ef 100644 --- a/scraper/tweet.go +++ b/scraper/tweet.go @@ -1,9 +1,9 @@ package scraper import ( - "time" "fmt" "strings" + "time" "offline_twitter/terminal_utils" ) @@ -13,18 +13,18 @@ const DEFAULT_MAX_REPLIES_EAGER_LOAD = 50 type TweetID int64 type Tweet struct { - ID TweetID - UserID UserID - UserHandle UserHandle // For processing tombstones - User *User - Text string - PostedAt Timestamp - NumLikes int - NumRetweets int - NumReplies int - NumQuoteTweets int - InReplyToID TweetID - QuotedTweetID TweetID + ID TweetID + UserID UserID + UserHandle UserHandle // For processing tombstones + User *User + Text string + PostedAt Timestamp + NumLikes int + NumRetweets int + NumReplies int + NumQuoteTweets int + InReplyToID TweetID + QuotedTweetID TweetID Images []Image Videos []Video @@ -35,14 +35,13 @@ type Tweet struct { Polls []Poll TombstoneType string - IsStub bool + IsStub bool - IsContentDownloaded bool + IsContentDownloaded bool IsConversationScraped bool - LastScrapedAt Timestamp + LastScrapedAt Timestamp } - func (t Tweet) String() string { var author string if t.User != nil { @@ -52,7 +51,7 @@ func (t Tweet) String() string { } ret := fmt.Sprintf( -`%s + `%s %s %s Replies: %d RT: %d QT: %d Likes: %d @@ -67,11 +66,11 @@ Replies: %d RT: %d QT: %d Likes: %d ) if len(t.Images) > 0 { - ret += fmt.Sprintf(terminal_utils.COLOR_GREEN + "images: %d\n" + terminal_utils.COLOR_RESET, len(t.Images)) + ret += fmt.Sprintf(terminal_utils.COLOR_GREEN+"images: %d\n"+terminal_utils.COLOR_RESET, len(t.Images)) } if len(t.Urls) > 0 { ret += "urls: [\n" - for _, url := range(t.Urls) { + for _, url := range t.Urls { ret += " " + url.Text + "\n" } ret += "]" @@ -90,7 +89,7 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) { ret.Text = apiTweet.FullText // Process "posted-at" date and time - if apiTweet.TombstoneText == "" { // Skip time parsing for tombstones + if apiTweet.TombstoneText == "" { // Skip time parsing for tombstones ret.PostedAt, err = TimestampFromString(apiTweet.CreatedAt) if err != nil { return Tweet{}, fmt.Errorf("Error parsing time on tweet ID %d:\n %w", ret.ID, err) @@ -125,7 +124,7 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) { // Process images for _, media := range apiTweet.Entities.Media { - if media.Type != "photo" { // TODO: remove this eventually + if media.Type != "photo" { // TODO: remove this eventually panic(fmt.Errorf("Unknown media type %q:\n %w", media.Type, EXTERNAL_API_ERROR)) } new_image := ParseAPIMedia(media) @@ -151,7 +150,6 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) { } } - // Process videos for _, entity := range apiTweet.ExtendedEntities.Media { if entity.Type != "video" && entity.Type != "animated_gif" { @@ -175,13 +173,12 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) { // Process tombstones and other metadata ret.TombstoneType = apiTweet.TombstoneText ret.IsStub = !(ret.TombstoneType == "") - ret.LastScrapedAt = TimestampFromUnix(0) // Caller will change this for the tweet that was actually scraped - ret.IsConversationScraped = false // Safe due to the "No Worsening" principle + ret.LastScrapedAt = TimestampFromUnix(0) // Caller will change this for the tweet that was actually scraped + ret.IsConversationScraped = false // Safe due to the "No Worsening" principle return } - /** * Get a single tweet with no replies from the API. * @@ -206,7 +203,6 @@ func GetTweet(id TweetID) (Tweet, error) { return ParseSingleTweet(single_tweet) } - /** * Return a list of tweets, including the original and the rest of its thread, * along with a list of associated users. @@ -227,7 +223,7 @@ func GetTweetFull(id TweetID) (trove TweetTrove, err error) { return } if len(tweet_response.GlobalObjects.Tweets) < DEFAULT_MAX_REPLIES_EAGER_LOAD && - tweet_response.GetCursor() != "" { + tweet_response.GetCursor() != "" { err = api.GetMoreReplies(id, &tweet_response, DEFAULT_MAX_REPLIES_EAGER_LOAD) if err != nil { err = fmt.Errorf("Error getting more tweet replies: %d\n %w", id, err) diff --git a/scraper/tweet_test.go b/scraper/tweet_test.go index 8491494..133ae53 100644 --- a/scraper/tweet_test.go +++ b/scraper/tweet_test.go @@ -5,13 +5,13 @@ import ( "os" "testing" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" . "offline_twitter/scraper" ) -func load_tweet_from_file(filename string) Tweet{ +func load_tweet_from_file(filename string) Tweet { data, err := os.ReadFile(filename) if err != nil { panic(err) @@ -28,12 +28,11 @@ func load_tweet_from_file(filename string) Tweet{ return tweet } - func TestParseSingleTweet(t *testing.T) { assert := assert.New(t) tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_unicode_chars.json") - assert.Equal("The fact that @michaelmalice new book ‘The Anarchist Handbook’ is just absolutely destroying on the charts is the " + + assert.Equal("The fact that @michaelmalice new book ‘The Anarchist Handbook’ is just absolutely destroying on the charts is the "+ "largest white pill I’ve swallowed in years.", tweet.Text) assert.Len(tweet.Mentions, 1) assert.Contains(tweet.Mentions, UserHandle("michaelmalice")) @@ -73,7 +72,7 @@ func TestParseTweetWithQuotedTweetAndLink(t *testing.T) { assert := assert.New(t) tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_quoted_tweet_and_url.json") - assert.Equal("This is video he’s talking about. Please watch. Is there a single US politician capable of doing this with the " + + assert.Equal("This is video he’s talking about. Please watch. Is there a single US politician capable of doing this with the "+ "weasels and rats running American industry today?", tweet.Text) assert.Equal(TweetID(1497997890999898115), tweet.QuotedTweetID) @@ -135,7 +134,7 @@ func TestParseTweetWithMultipleUrls(t *testing.T) { assert.False(tweet.Urls[0].HasCard) assert.False(tweet.Urls[1].HasCard) - assert.True (tweet.Urls[2].HasCard) + assert.True(tweet.Urls[2].HasCard) assert.Equal("Biden’s victory came from the suburbs", tweet.Urls[2].Title) } @@ -166,12 +165,11 @@ func TestTweetWithPoll(t *testing.T) { assert.Equal(624, p.Choice2_Votes) assert.Equal(778, p.Choice3_Votes) assert.Equal(1138, p.Choice4_Votes) - assert.Equal(1440 * 60, p.VotingDuration) + assert.Equal(1440*60, p.VotingDuration) assert.Equal(int64(1638331934), p.VotingEndsAt.Unix()) assert.Equal(int64(1638331935), p.LastUpdatedAt.Unix()) } - func TestParseTweetResponse(t *testing.T) { assert := assert.New(t) data, err := os.ReadFile("test_responses/michael_malice_feed.json") @@ -186,7 +184,7 @@ func TestParseTweetResponse(t *testing.T) { require.NoError(t, err) tweets, retweets, users := trove.Transform() - assert.Len(tweets, 29 - 3) + assert.Len(tweets, 29-3) assert.Len(retweets, 3) assert.Len(users, 9) } diff --git a/scraper/tweet_trove.go b/scraper/tweet_trove.go index 749ccc1..741e79e 100644 --- a/scraper/tweet_trove.go +++ b/scraper/tweet_trove.go @@ -8,9 +8,9 @@ import ( ) type TweetTrove struct { - Tweets map[TweetID]Tweet - Users map[UserID]User - Retweets map[TweetID]Retweet + Tweets map[TweetID]Tweet + Users map[UserID]User + Retweets map[TweetID]Retweet TombstoneUsers []UserHandle } @@ -38,7 +38,7 @@ func (trove TweetTrove) Transform() (tweets []Tweet, retweets []Retweet, users [ retweets = append(retweets, val) } return -} // TODO: refactor until this function isn't needed anymore +} // TODO: refactor until this function isn't needed anymore /** * Search for a user by handle. Second param is whether the user was found or not. diff --git a/scraper/url.go b/scraper/url.go index 71b3d65..1a933d6 100644 --- a/scraper/url.go +++ b/scraper/url.go @@ -2,28 +2,28 @@ package scraper import ( "fmt" + "net/url" "path" "regexp" - "net/url" ) type Url struct { TweetID TweetID - Domain string - Text string - ShortText string - Title string - Description string - ThumbnailWidth int - ThumbnailHeight int + Domain string + Text string + ShortText string + Title string + Description string + ThumbnailWidth int + ThumbnailHeight int ThumbnailRemoteUrl string ThumbnailLocalPath string - CreatorID UserID - SiteID UserID + CreatorID UserID + SiteID UserID - HasCard bool - HasThumbnail bool + HasCard bool + HasThumbnail bool IsContentDownloaded bool } @@ -86,7 +86,7 @@ func TryParseTweetUrl(url string) (UserHandle, TweetID, bool) { if matches == nil { return UserHandle(""), TweetID(0), false } - if len(matches) != 3 { // matches[0] is the full string + if len(matches) != 3 { // matches[0] is the full string panic(matches) } return UserHandle(matches[1]), TweetID(int_or_panic(matches[2])), true diff --git a/scraper/url_test.go b/scraper/url_test.go index e186935..689e06e 100644 --- a/scraper/url_test.go +++ b/scraper/url_test.go @@ -1,153 +1,153 @@ package scraper_test import ( - "testing" - "os" - "encoding/json" + "encoding/json" + "os" + "testing" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" - . "offline_twitter/scraper" + . "offline_twitter/scraper" ) func TestParseAPIUrlCard(t *testing.T) { - assert := assert.New(t) - data, err := os.ReadFile("test_responses/tweet_content/url_card.json") - if err != nil { - panic(err) - } - var apiCard APICard - err = json.Unmarshal(data, &apiCard) - require.NoError(t, err) + assert := assert.New(t) + data, err := os.ReadFile("test_responses/tweet_content/url_card.json") + if err != nil { + panic(err) + } + var apiCard APICard + err = json.Unmarshal(data, &apiCard) + require.NoError(t, err) - url := ParseAPIUrlCard(apiCard) - assert.Equal("reason.com", url.Domain) - assert.Equal("L.A. Teachers Union Leader: 'There's No Such Thing As Learning Loss'", url.Title) - assert.Equal("\"It’s OK that our babies may not have learned all their times tables,\" says Cecily Myart-Cruz. \"They learned " + - "resilience.\"", url.Description) - assert.Equal(600, url.ThumbnailWidth) - assert.Equal(315, url.ThumbnailHeight) - assert.Equal("https://pbs.twimg.com/card_img/1434998862305968129/odDi9EqO?format=jpg&name=600x600", url.ThumbnailRemoteUrl) - assert.Equal("odDi9EqO_600x600.jpg", url.ThumbnailLocalPath) - assert.Equal(UserID(155581583), url.CreatorID) - assert.Equal(UserID(16467567), url.SiteID) - assert.True(url.HasThumbnail) - assert.False(url.IsContentDownloaded) + url := ParseAPIUrlCard(apiCard) + assert.Equal("reason.com", url.Domain) + assert.Equal("L.A. Teachers Union Leader: 'There's No Such Thing As Learning Loss'", url.Title) + assert.Equal("\"It’s OK that our babies may not have learned all their times tables,\" says Cecily Myart-Cruz. \"They learned "+ + "resilience.\"", url.Description) + assert.Equal(600, url.ThumbnailWidth) + assert.Equal(315, url.ThumbnailHeight) + assert.Equal("https://pbs.twimg.com/card_img/1434998862305968129/odDi9EqO?format=jpg&name=600x600", url.ThumbnailRemoteUrl) + assert.Equal("odDi9EqO_600x600.jpg", url.ThumbnailLocalPath) + assert.Equal(UserID(155581583), url.CreatorID) + assert.Equal(UserID(16467567), url.SiteID) + assert.True(url.HasThumbnail) + assert.False(url.IsContentDownloaded) } func TestParseAPIUrlCardWithPlayer(t *testing.T) { - assert := assert.New(t) - data, err := os.ReadFile("test_responses/tweet_content/url_card_with_player.json") - if err != nil { - panic(err) - } - var apiCard APICard - err = json.Unmarshal(data, &apiCard) - require.NoError(t, err) + assert := assert.New(t) + data, err := os.ReadFile("test_responses/tweet_content/url_card_with_player.json") + if err != nil { + panic(err) + } + var apiCard APICard + err = json.Unmarshal(data, &apiCard) + require.NoError(t, err) - url := ParseAPIUrlCard(apiCard) - assert.Equal("www.youtube.com", url.Domain) - assert.Equal("The Politically Incorrect Guide to the Constitution (Starring Tom...", url.Title) - assert.Equal("Watch this episode on LBRY/Odysee: https://odysee.com/@capitalresearch:5/the-politically-incorrect-guide-to-the:8" + - "Watch this episode on Rumble: https://rumble...", url.Description) - assert.Equal("https://pbs.twimg.com/card_img/1437849456423194639/_1t0btyt?format=jpg&name=800x320_1", url.ThumbnailRemoteUrl) - assert.Equal("_1t0btyt_800x320_1.jpg", url.ThumbnailLocalPath) - assert.Equal(UserID(10228272), url.SiteID) - assert.True(url.HasThumbnail) - assert.False(url.IsContentDownloaded) + url := ParseAPIUrlCard(apiCard) + assert.Equal("www.youtube.com", url.Domain) + assert.Equal("The Politically Incorrect Guide to the Constitution (Starring Tom...", url.Title) + assert.Equal("Watch this episode on LBRY/Odysee: https://odysee.com/@capitalresearch:5/the-politically-incorrect-guide-to-the:8"+ + "Watch this episode on Rumble: https://rumble...", url.Description) + assert.Equal("https://pbs.twimg.com/card_img/1437849456423194639/_1t0btyt?format=jpg&name=800x320_1", url.ThumbnailRemoteUrl) + assert.Equal("_1t0btyt_800x320_1.jpg", url.ThumbnailLocalPath) + assert.Equal(UserID(10228272), url.SiteID) + assert.True(url.HasThumbnail) + assert.False(url.IsContentDownloaded) } func TestParseAPIUrlCardWithPlayerAndPlaceholderThumbnail(t *testing.T) { - assert := assert.New(t) - data, err := os.ReadFile("test_responses/tweet_content/url_card_with_player_placeholder_image.json") - if err != nil { - panic(err) - } - var apiCard APICard - err = json.Unmarshal(data, &apiCard) - require.NoError(t, err) + assert := assert.New(t) + data, err := os.ReadFile("test_responses/tweet_content/url_card_with_player_placeholder_image.json") + if err != nil { + panic(err) + } + var apiCard APICard + err = json.Unmarshal(data, &apiCard) + require.NoError(t, err) - url := ParseAPIUrlCard(apiCard) - assert.Equal("www.youtube.com", url.Domain) - assert.Equal("Did Michael Malice Turn Me into an Anarchist? | Ep 181", url.Title) - assert.Equal("SUBSCRIBE TO THE NEW SHOW W/ ELIJAH & SYDNEY: \"YOU ARE HERE\"YT: https://www.youtube.com/youareheredaily____________" + - "__________________________________________...", url.Description) - assert.Equal("https://pbs.twimg.com/cards/player-placeholder.png", url.ThumbnailRemoteUrl) - assert.Equal("player-placeholder.png", url.ThumbnailLocalPath) - assert.Equal(UserID(10228272), url.SiteID) - assert.True(url.HasThumbnail) - assert.False(url.IsContentDownloaded) + url := ParseAPIUrlCard(apiCard) + assert.Equal("www.youtube.com", url.Domain) + assert.Equal("Did Michael Malice Turn Me into an Anarchist? | Ep 181", url.Title) + assert.Equal("SUBSCRIBE TO THE NEW SHOW W/ ELIJAH & SYDNEY: \"YOU ARE HERE\"YT: https://www.youtube.com/youareheredaily____________"+ + "__________________________________________...", url.Description) + assert.Equal("https://pbs.twimg.com/cards/player-placeholder.png", url.ThumbnailRemoteUrl) + assert.Equal("player-placeholder.png", url.ThumbnailLocalPath) + assert.Equal(UserID(10228272), url.SiteID) + assert.True(url.HasThumbnail) + assert.False(url.IsContentDownloaded) } func TestParseAPIUrlCardWithoutThumbnail(t *testing.T) { - assert := assert.New(t) - data, err := os.ReadFile("test_responses/tweet_content/url_card_without_thumbnail.json") - if err != nil { - panic(err) - } - var apiCard APICard - err = json.Unmarshal(data, &apiCard) - require.NoError(t, err) + assert := assert.New(t) + data, err := os.ReadFile("test_responses/tweet_content/url_card_without_thumbnail.json") + if err != nil { + panic(err) + } + var apiCard APICard + err = json.Unmarshal(data, &apiCard) + require.NoError(t, err) - url := ParseAPIUrlCard(apiCard) - assert.Equal("en.m.wikipedia.org", url.Domain) - assert.Equal("Entryism - Wikipedia", url.Title) - assert.Equal("", url.Description) - assert.True(url.HasCard) - assert.False(url.HasThumbnail) + url := ParseAPIUrlCard(apiCard) + assert.Equal("en.m.wikipedia.org", url.Domain) + assert.Equal("Entryism - Wikipedia", url.Title) + assert.Equal("", url.Description) + assert.True(url.HasCard) + assert.False(url.HasThumbnail) } /** * Should check if a url is a tweet url, and if so, parse it */ func TestParseTweetUrl(t *testing.T) { - assert:= assert.New(t) + assert := assert.New(t) - // Test valid tweet url - url := "https://twitter.com/kanesays23/status/1429583672827465730" - handle, id, is_ok := TryParseTweetUrl(url) - assert.True(is_ok) - assert.Equal(UserHandle("kanesays23"), handle) - assert.Equal(TweetID(1429583672827465730), id) + // Test valid tweet url + url := "https://twitter.com/kanesays23/status/1429583672827465730" + handle, id, is_ok := TryParseTweetUrl(url) + assert.True(is_ok) + assert.Equal(UserHandle("kanesays23"), handle) + assert.Equal(TweetID(1429583672827465730), id) - // Test url with GET params - handle, id, is_ok = TryParseTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720?s=20") - assert.True(is_ok) - assert.Equal(UserHandle("NerdNoticing"), handle) - assert.Equal(TweetID(1263192389050654720), id) + // Test url with GET params + handle, id, is_ok = TryParseTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720?s=20") + assert.True(is_ok) + assert.Equal(UserHandle("NerdNoticing"), handle) + assert.Equal(TweetID(1263192389050654720), id) - // Test invalid url - _, _, is_ok = TryParseTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720s=20") - assert.False(is_ok) + // Test invalid url + _, _, is_ok = TryParseTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720s=20") + assert.False(is_ok) - // Test empty string - _, _, is_ok = TryParseTweetUrl("") - assert.False(is_ok) + // Test empty string + _, _, is_ok = TryParseTweetUrl("") + assert.False(is_ok) } /** * Should extract a user handle from a tweet URL, or fail if URL is invalid */ func TestParseHandleFromTweetUrl(t *testing.T) { - assert := assert.New(t) + assert := assert.New(t) - // Test valid tweet url - url := "https://twitter.com/kanesays23/status/1429583672827465730" - result, err := ParseHandleFromTweetUrl(url) - assert.NoError(err) - assert.Equal(UserHandle("kanesays23"), result) + // Test valid tweet url + url := "https://twitter.com/kanesays23/status/1429583672827465730" + result, err := ParseHandleFromTweetUrl(url) + assert.NoError(err) + assert.Equal(UserHandle("kanesays23"), result) - // Test url with GET params - result, err = ParseHandleFromTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720?s=20") - assert.NoError(err) - assert.Equal(UserHandle("NerdNoticing"), result) + // Test url with GET params + result, err = ParseHandleFromTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720?s=20") + assert.NoError(err) + assert.Equal(UserHandle("NerdNoticing"), result) - // Test invalid url - _, err = ParseHandleFromTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720s=20") - assert.Error(err) + // Test invalid url + _, err = ParseHandleFromTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720s=20") + assert.Error(err) - // Test empty string - _, err = ParseHandleFromTweetUrl("") - assert.Error(err) + // Test empty string + _, err = ParseHandleFromTweetUrl("") + assert.Error(err) } diff --git a/scraper/user.go b/scraper/user.go index 8f941cf..a5ed2fd 100644 --- a/scraper/user.go +++ b/scraper/user.go @@ -1,12 +1,12 @@ package scraper import ( - "fmt" - "strings" - "regexp" - "path" + "fmt" + "path" + "regexp" + "strings" - "offline_twitter/terminal_utils" + "offline_twitter/terminal_utils" ) const DEFAULT_PROFILE_IMAGE_URL = "https://abs.twimg.com/sticky/default_profile_images/default_profile.png" @@ -15,47 +15,48 @@ type UserID int64 type UserHandle string func JoinArrayOfHandles(handles []UserHandle) string { - ret := []string{} - for _, h := range handles { - ret = append(ret, string(h)) - } - return strings.Join(ret, ",") + ret := []string{} + for _, h := range handles { + ret = append(ret, string(h)) + } + return strings.Join(ret, ",") } type User struct { - ID UserID - DisplayName string - Handle UserHandle - Bio string - FollowingCount int - FollowersCount int - Location string - Website string - JoinDate Timestamp - IsPrivate bool - IsVerified bool - IsBanned bool - ProfileImageUrl string - ProfileImageLocalPath string - BannerImageUrl string - BannerImageLocalPath string + ID UserID + DisplayName string + Handle UserHandle + Bio string + FollowingCount int + FollowersCount int + Location string + Website string + JoinDate Timestamp + IsPrivate bool + IsVerified bool + IsBanned bool + IsDeleted bool + ProfileImageUrl string + ProfileImageLocalPath string + BannerImageUrl string + BannerImageLocalPath string - PinnedTweetID TweetID - PinnedTweet *Tweet + PinnedTweetID TweetID + PinnedTweet *Tweet - IsFollowed bool - IsContentDownloaded bool - IsNeedingFakeID bool - IsIdFake bool + IsFollowed bool + IsContentDownloaded bool + IsNeedingFakeID bool + IsIdFake bool } func (u User) String() string { - var verified string - if u.IsVerified { - verified = "[\u2713]" - } - ret := fmt.Sprintf( -`%s%s + var verified string + if u.IsVerified { + verified = "[\u2713]" + } + ret := fmt.Sprintf( + `%s%s @%s %s @@ -65,115 +66,112 @@ Joined %s %s %s `, - u.DisplayName, - verified, - u.Handle, - terminal_utils.WrapText(u.Bio, 60), - u.FollowingCount, - u.FollowersCount, - terminal_utils.FormatDate(u.JoinDate.Time), - u.Location, - u.Website, - ) - if u.PinnedTweet != nil { - ret += "\n" + terminal_utils.WrapText(u.PinnedTweet.Text, 60) - } else { - println("Pinned tweet id:", u.PinnedTweetID) - } - return ret + u.DisplayName, + verified, + u.Handle, + terminal_utils.WrapText(u.Bio, 60), + u.FollowingCount, + u.FollowersCount, + terminal_utils.FormatDate(u.JoinDate.Time), + u.Location, + u.Website, + ) + if u.PinnedTweet != nil { + ret += "\n" + terminal_utils.WrapText(u.PinnedTweet.Text, 60) + } else { + println("Pinned tweet id:", u.PinnedTweetID) + } + return ret } - - /** * Unknown Users with handles are only created by direct GetUser calls (either `twitter fetch_user` * subcommand or as part of tombstone user fetching.) */ func GetUnknownUserWithHandle(handle UserHandle) User { - return User{ - ID: UserID(0), // 2^62 + 1... - DisplayName: string(handle), - Handle: handle, - Bio: "", - FollowersCount: 0, - FollowingCount: 0, - Location: "", - Website:"", - JoinDate: TimestampFromUnix(0), - IsVerified: false, - IsPrivate: false, - IsNeedingFakeID: true, - IsIdFake: true, - } + return User{ + ID: UserID(0), // 2^62 + 1... + DisplayName: string(handle), + Handle: handle, + Bio: "", + FollowersCount: 0, + FollowingCount: 0, + Location: "", + Website: "", + JoinDate: TimestampFromUnix(0), + IsVerified: false, + IsPrivate: false, + IsNeedingFakeID: true, + IsIdFake: true, + } } // Turn an APIUser, as returned from the scraper, into a properly structured User object func ParseSingleUser(apiUser APIUser) (ret User, err error) { - if apiUser.DoesntExist { - // User may have been deleted, or there was a typo. There's no data to parse - if apiUser.ScreenName == "" { - panic("ScreenName is empty!") - } - ret = GetUnknownUserWithHandle(UserHandle(apiUser.ScreenName)) - return - } - ret.ID = UserID(apiUser.ID) - ret.Handle = UserHandle(apiUser.ScreenName) - if apiUser.IsBanned { - // Banned users won't have any further info, so just return here - ret.IsBanned = true - return - } - ret.DisplayName = apiUser.Name - ret.Bio = apiUser.Description - ret.FollowingCount = apiUser.FriendsCount - ret.FollowersCount = apiUser.FollowersCount - ret.Location = apiUser.Location - if len(apiUser.Entities.URL.Urls) > 0 { - ret.Website = apiUser.Entities.URL.Urls[0].ExpandedURL - } - ret.JoinDate, err = TimestampFromString(apiUser.CreatedAt) - if err != nil { - err = fmt.Errorf("Error parsing time on user ID %d: %w", ret.ID, err) - return - } - ret.IsPrivate = apiUser.Protected - ret.IsVerified = apiUser.Verified - ret.ProfileImageUrl = apiUser.ProfileImageURLHTTPS + if apiUser.DoesntExist { + // User may have been deleted, or there was a typo. There's no data to parse + if apiUser.ScreenName == "" { + panic("ScreenName is empty!") + } + ret = GetUnknownUserWithHandle(UserHandle(apiUser.ScreenName)) + return + } + ret.ID = UserID(apiUser.ID) + ret.Handle = UserHandle(apiUser.ScreenName) + if apiUser.IsBanned { + // Banned users won't have any further info, so just return here + ret.IsBanned = true + return + } + ret.DisplayName = apiUser.Name + ret.Bio = apiUser.Description + ret.FollowingCount = apiUser.FriendsCount + ret.FollowersCount = apiUser.FollowersCount + ret.Location = apiUser.Location + if len(apiUser.Entities.URL.Urls) > 0 { + ret.Website = apiUser.Entities.URL.Urls[0].ExpandedURL + } + ret.JoinDate, err = TimestampFromString(apiUser.CreatedAt) + if err != nil { + err = fmt.Errorf("Error parsing time on user ID %d: %w", ret.ID, err) + return + } + ret.IsPrivate = apiUser.Protected + ret.IsVerified = apiUser.Verified + ret.ProfileImageUrl = apiUser.ProfileImageURLHTTPS + if regexp.MustCompile(`_normal\.\w{2,4}`).MatchString(ret.ProfileImageUrl) { + ret.ProfileImageUrl = strings.ReplaceAll(ret.ProfileImageUrl, "_normal.", ".") + } + ret.BannerImageUrl = apiUser.ProfileBannerURL - if regexp.MustCompile(`_normal\.\w{2,4}`).MatchString(ret.ProfileImageUrl) { - ret.ProfileImageUrl = strings.ReplaceAll(ret.ProfileImageUrl, "_normal.", ".") - } - ret.BannerImageUrl = apiUser.ProfileBannerURL + ret.ProfileImageLocalPath = ret.compute_profile_image_local_path() + ret.BannerImageLocalPath = ret.compute_banner_image_local_path() - ret.ProfileImageLocalPath = ret.compute_profile_image_local_path() - ret.BannerImageLocalPath = ret.compute_banner_image_local_path() - - if len(apiUser.PinnedTweetIdsStr) > 0 { - ret.PinnedTweetID = TweetID(idstr_to_int(apiUser.PinnedTweetIdsStr[0])) - } - return + if len(apiUser.PinnedTweetIdsStr) > 0 { + ret.PinnedTweetID = TweetID(idstr_to_int(apiUser.PinnedTweetIdsStr[0])) + } + return } // Calls API#GetUser and returns the parsed result func GetUser(handle UserHandle) (User, error) { - api := API{} - apiUser, err := api.GetUser(handle) - if apiUser.ScreenName == "" { - apiUser.ScreenName = string(handle) - } - if err != nil { - return User{}, err - } - return ParseSingleUser(apiUser) + api := API{} + apiUser, err := api.GetUser(handle) + if apiUser.ScreenName == "" { + apiUser.ScreenName = string(handle) + } + if err != nil { + return User{}, err + } + return ParseSingleUser(apiUser) } /** * Make a filename for the profile image, that hopefully won't clobber other ones */ func (u User) compute_profile_image_local_path() string { - return string(u.Handle) + "_profile_" + path.Base(u.ProfileImageUrl) + return string(u.Handle) + "_profile_" + path.Base(u.ProfileImageUrl) } /** @@ -182,34 +180,34 @@ func (u User) compute_profile_image_local_path() string { * If there is no banner image, just return nothing. */ func (u User) compute_banner_image_local_path() string { - if u.BannerImageUrl == "" { - return "" - } - base_name := path.Base(u.BannerImageUrl) + if u.BannerImageUrl == "" { + return "" + } + base_name := path.Base(u.BannerImageUrl) - // Check if it has an extension (e.g., ".png" or ".jpeg") - if !regexp.MustCompile(`\.\w{2,4}$`).MatchString(base_name) { - // If it doesn't have an extension, add one - base_name += ".jpg" - } - return string(u.Handle) + "_banner_" + base_name + // Check if it has an extension (e.g., ".png" or ".jpeg") + if !regexp.MustCompile(`\.\w{2,4}$`).MatchString(base_name) { + // If it doesn't have an extension, add one + base_name += ".jpg" + } + return string(u.Handle) + "_banner_" + base_name } /** * Get the URL where we would expect to find a User's tiny profile image */ func (u User) GetTinyProfileImageUrl() string { - // If profile image is empty, then just use the default profile image - if u.ProfileImageUrl == "" { - return DEFAULT_PROFILE_IMAGE_URL - } + // If profile image is empty, then just use the default profile image + if u.ProfileImageUrl == "" { + return DEFAULT_PROFILE_IMAGE_URL + } - // Check that the format is as expected - r := regexp.MustCompile(`(\.\w{2,4})$`) - if !r.MatchString(u.ProfileImageUrl) { - panic(fmt.Errorf("Weird profile image url (here is the file extension?): %s", u.ProfileImageUrl)) - } - return r.ReplaceAllString(u.ProfileImageUrl, "_normal$1") + // Check that the format is as expected + r := regexp.MustCompile(`(\.\w{2,4})$`) + if !r.MatchString(u.ProfileImageUrl) { + panic(fmt.Errorf("Weird profile image url (here is the file extension?): %s", u.ProfileImageUrl)) + } + return r.ReplaceAllString(u.ProfileImageUrl, "_normal$1") } /** @@ -217,8 +215,8 @@ func (u User) GetTinyProfileImageUrl() string { * If user has a blank or default profile image, return a non-personalized default path. */ func (u User) GetTinyProfileImageLocalPath() string { - if u.ProfileImageUrl == "" { - return path.Base(u.GetTinyProfileImageUrl()) - } - return string(u.Handle) + "_profile_" + path.Base(u.GetTinyProfileImageUrl()) + if u.ProfileImageUrl == "" { + return path.Base(u.GetTinyProfileImageUrl()) + } + return string(u.Handle) + "_profile_" + path.Base(u.GetTinyProfileImageUrl()) } diff --git a/scraper/user_feed.go b/scraper/user_feed.go index aeffdb5..4e75068 100644 --- a/scraper/user_feed.go +++ b/scraper/user_feed.go @@ -1,8 +1,8 @@ package scraper import ( - "fmt" "errors" + "fmt" ) /** @@ -33,7 +33,6 @@ func GetUserFeedFor(user_id UserID, min_tweets int) (trove TweetTrove, err error return ParseTweetResponse(tweet_response) } - func GetUserFeedGraphqlFor(user_id UserID, min_tweets int) (trove TweetTrove, err error) { api := API{} api_response, err := api.GetGraphqlFeedFor(user_id, "") diff --git a/scraper/user_test.go b/scraper/user_test.go index 05e9d09..1a66072 100644 --- a/scraper/user_test.go +++ b/scraper/user_test.go @@ -1,14 +1,14 @@ package scraper_test import ( - "testing" "encoding/json" - "os" "net/http" + "os" + "testing" "github.com/jarcoal/httpmock" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" . "offline_twitter/scraper" ) @@ -31,7 +31,7 @@ func TestParseSingleUser(t *testing.T) { assert.Equal(UserID(44067298), user.ID) assert.Equal("Michael Malice", user.DisplayName) assert.Equal(UserHandle("michaelmalice"), user.Handle) - assert.Equal("Author of Dear Reader, The New Right & The Anarchist Handbook\nHost of \"YOUR WELCOME\" \nSubject of Ego & Hubris by " + + assert.Equal("Author of Dear Reader, The New Right & The Anarchist Handbook\nHost of \"YOUR WELCOME\" \nSubject of Ego & Hubris by "+ "Harvey Pekar\nUnderwear Model\nHe/Him ⚑", user.Bio) assert.Equal(941, user.FollowingCount) assert.Equal(208589, user.FollowersCount) @@ -39,7 +39,7 @@ func TestParseSingleUser(t *testing.T) { assert.Equal("https://amzn.to/3oInafv", user.Website) assert.Equal(int64(1243920952), user.JoinDate.Unix()) assert.False(user.IsPrivate) - assert.True (user.IsVerified) + assert.True(user.IsVerified) assert.False(user.IsBanned) assert.Equal("https://pbs.twimg.com/profile_images/1064051934812913664/Lbwdb_C9.jpg", user.ProfileImageUrl) assert.Equal("https://pbs.twimg.com/profile_images/1064051934812913664/Lbwdb_C9_normal.jpg", user.GetTinyProfileImageUrl()) @@ -90,7 +90,7 @@ func TestParseDeletedUser(t *testing.T) { handle := "Some Random Deleted User" apiUser := user_resp.ConvertToAPIUser() - apiUser.ScreenName = string(handle) // This is done in scraper.GetUser, since users are retrieved by handle anyway + apiUser.ScreenName = string(handle) // This is done in scraper.GetUser, since users are retrieved by handle anyway user, err := ParseSingleUser(apiUser) require.NoError(t, err) diff --git a/scraper/video.go b/scraper/video.go index e2179f8..ff0380c 100644 --- a/scraper/video.go +++ b/scraper/video.go @@ -1,9 +1,9 @@ package scraper import ( - "fmt" - "sort" - "path" + "fmt" + "path" + "sort" ) type VideoID int64 @@ -12,61 +12,61 @@ type VideoID int64 // from someone else). type Video struct { - ID VideoID - TweetID TweetID - Width int - Height int - RemoteURL string - LocalFilename string + ID VideoID + TweetID TweetID + Width int + Height int + RemoteURL string + LocalFilename string - ThumbnailRemoteUrl string - ThumbnailLocalPath string `db:"thumbnail_local_filename"` - Duration int // milliseconds - ViewCount int + ThumbnailRemoteUrl string + ThumbnailLocalPath string `db:"thumbnail_local_filename"` + Duration int // milliseconds + ViewCount int - IsDownloaded bool - IsGif bool + IsDownloaded bool + IsGif bool } func ParseAPIVideo(apiVideo APIExtendedMedia, tweet_id TweetID) Video { - variants := apiVideo.VideoInfo.Variants - sort.Sort(variants) + variants := apiVideo.VideoInfo.Variants + sort.Sort(variants) - var view_count int + var view_count int - r := apiVideo.Ext.MediaStats.R + r := apiVideo.Ext.MediaStats.R - switch r.(type) { - case string: - view_count = 0 - case map[string]interface{}: - OK_entry, ok := r.(map[string]interface{})["ok"] - if !ok { - panic("No 'ok' value found in the R!") - } - view_count_str, ok := OK_entry.(map[string]interface{})["viewCount"] - view_count = int_or_panic(view_count_str.(string)) - if !ok { - panic("No 'viewCount' value found in the OK!") - } - } + switch r.(type) { + case string: + view_count = 0 + case map[string]interface{}: + OK_entry, ok := r.(map[string]interface{})["ok"] + if !ok { + panic("No 'ok' value found in the R!") + } + view_count_str, ok := OK_entry.(map[string]interface{})["viewCount"] + view_count = int_or_panic(view_count_str.(string)) + if !ok { + panic("No 'viewCount' value found in the OK!") + } + } - local_filename := fmt.Sprintf("%d.mp4", tweet_id) + local_filename := fmt.Sprintf("%d.mp4", tweet_id) - return Video{ - ID: VideoID(apiVideo.ID), - TweetID: tweet_id, - Width: apiVideo.OriginalInfo.Width, - Height: apiVideo.OriginalInfo.Height, - RemoteURL: variants[0].URL, - LocalFilename: local_filename, + return Video{ + ID: VideoID(apiVideo.ID), + TweetID: tweet_id, + Width: apiVideo.OriginalInfo.Width, + Height: apiVideo.OriginalInfo.Height, + RemoteURL: variants[0].URL, + LocalFilename: local_filename, - ThumbnailRemoteUrl: apiVideo.MediaURLHttps, - ThumbnailLocalPath: path.Base(apiVideo.MediaURLHttps), - Duration: apiVideo.VideoInfo.Duration, - ViewCount: view_count, + ThumbnailRemoteUrl: apiVideo.MediaURLHttps, + ThumbnailLocalPath: path.Base(apiVideo.MediaURLHttps), + Duration: apiVideo.VideoInfo.Duration, + ViewCount: view_count, - IsDownloaded: false, - IsGif: apiVideo.Type == "animated_gif", - } + IsDownloaded: false, + IsGif: apiVideo.Type == "animated_gif", + } } diff --git a/scraper/video_test.go b/scraper/video_test.go index 79adce8..5ef2ea4 100644 --- a/scraper/video_test.go +++ b/scraper/video_test.go @@ -1,37 +1,37 @@ package scraper_test import ( - "testing" - "os" - "encoding/json" + "encoding/json" + "os" + "testing" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" - . "offline_twitter/scraper" + . "offline_twitter/scraper" ) func TestParseAPIVideo(t *testing.T) { - assert := assert.New(t) - data, err := os.ReadFile("test_responses/tweet_content/video.json") - if err != nil { - panic(err) - } - var apivideo APIExtendedMedia - err = json.Unmarshal(data, &apivideo) - require.NoError(t, err) + assert := assert.New(t) + data, err := os.ReadFile("test_responses/tweet_content/video.json") + if err != nil { + panic(err) + } + var apivideo APIExtendedMedia + err = json.Unmarshal(data, &apivideo) + require.NoError(t, err) - tweet_id := TweetID(28) - video := ParseAPIVideo(apivideo, tweet_id) - assert.Equal(VideoID(1418951950020845568), video.ID) - assert.Equal(tweet_id, video.TweetID) - assert.Equal(1280, video.Height) - assert.Equal(720, video.Width) - assert.Equal("https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12", video.RemoteURL) - assert.Equal("28.mp4", video.LocalFilename) - assert.Equal("https://pbs.twimg.com/ext_tw_video_thumb/1418951950020845568/pu/img/eUTaYYfuAJ8FyjUi.jpg", video.ThumbnailRemoteUrl) - assert.Equal("eUTaYYfuAJ8FyjUi.jpg", video.ThumbnailLocalPath) - assert.Equal(275952, video.ViewCount) - assert.Equal(88300, video.Duration) - assert.False(video.IsDownloaded) + tweet_id := TweetID(28) + video := ParseAPIVideo(apivideo, tweet_id) + assert.Equal(VideoID(1418951950020845568), video.ID) + assert.Equal(tweet_id, video.TweetID) + assert.Equal(1280, video.Height) + assert.Equal(720, video.Width) + assert.Equal("https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12", video.RemoteURL) + assert.Equal("28.mp4", video.LocalFilename) + assert.Equal("https://pbs.twimg.com/ext_tw_video_thumb/1418951950020845568/pu/img/eUTaYYfuAJ8FyjUi.jpg", video.ThumbnailRemoteUrl) + assert.Equal("eUTaYYfuAJ8FyjUi.jpg", video.ThumbnailLocalPath) + assert.Equal(275952, video.ViewCount) + assert.Equal(88300, video.Duration) + assert.False(video.IsDownloaded) }