Add 'gofmt' linter

This commit is contained in:
Alessio 2022-03-13 17:09:43 -07:00
parent 223734d001
commit d1d80a91cd
30 changed files with 714 additions and 733 deletions

View File

@ -27,6 +27,7 @@ linters:
- wrapcheck - wrapcheck
- lll - lll
- godox - godox
- gofmt
- errorlint - errorlint
- nolintlint - nolintlint
@ -203,9 +204,9 @@ linters-settings:
keywords: # default keywords are TODO, BUG, and FIXME, these can be overwritten by this setting keywords: # default keywords are TODO, BUG, and FIXME, these can be overwritten by this setting
- XXX - XXX
# gofmt: gofmt:
# # simplify code: gofmt with `-s` option, true by default # simplify code: gofmt with `-s` option, true by default
# simplify: true simplify: true
# gofumpt: # gofumpt:
# # Select the Go version to target. The default is `1.15`. # # Select the Go version to target. The default is `1.15`.

View File

@ -6,9 +6,9 @@ import (
"fmt" "fmt"
"math/rand" "math/rand"
"github.com/go-test/deep"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"github.com/go-test/deep"
"offline_twitter/scraper" "offline_twitter/scraper"
) )
@ -76,7 +76,7 @@ func TestModifyUser(t *testing.T) {
fake_user.FollowersCount = 2000 fake_user.FollowersCount = 2000
fake_user.JoinDate = scraper.TimestampFromUnix(2000) fake_user.JoinDate = scraper.TimestampFromUnix(2000)
fake_user.ProfileImageUrl = "asdf2" fake_user.ProfileImageUrl = "asdf2"
fake_user.IsContentDownloaded = false // test No Worsening fake_user.IsContentDownloaded = false // test No Worsening
// Save the modified user // Save the modified user
err = profile.SaveUser(&fake_user) err = profile.SaveUser(&fake_user)

View File

@ -3,9 +3,9 @@ package persistence
import ( import (
"errors" "errors"
"fmt" "fmt"
"os"
"regexp" "regexp"
"strings" "strings"
"os"
) )
var NotInDatabase = errors.New("Not in database") var NotInDatabase = errors.New("Not in database")
@ -35,7 +35,7 @@ func file_exists(path string) bool {
* https://stackoverflow.com/questions/56616196/how-to-convert-camel-case-string-to-snake-case#56616250 * https://stackoverflow.com/questions/56616196/how-to-convert-camel-case-string-to-snake-case#56616250
*/ */
func ToSnakeCase(str string) string { func ToSnakeCase(str string) string {
snake := regexp.MustCompile("(.)_?([A-Z][a-z]+)").ReplaceAllString(str, "${1}_${2}") snake := regexp.MustCompile("(.)_?([A-Z][a-z]+)").ReplaceAllString(str, "${1}_${2}")
snake = regexp.MustCompile("([a-z0-9])_?([A-Z])").ReplaceAllString(snake, "${1}_${2}") snake = regexp.MustCompile("([a-z0-9])_?([A-Z])").ReplaceAllString(snake, "${1}_${2}")
return strings.ToLower(snake) return strings.ToLower(snake)
} }

View File

@ -70,13 +70,13 @@ func create_stable_user() scraper.User {
func create_image_from_id(id int) scraper.Image { func create_image_from_id(id int) scraper.Image {
filename := fmt.Sprintf("image%d.jpg", id) filename := fmt.Sprintf("image%d.jpg", id)
return scraper.Image{ return scraper.Image{
ID: scraper.ImageID(id), ID: scraper.ImageID(id),
TweetID: -1, TweetID: -1,
Width: id * 10, Width: id * 10,
Height: id * 5, Height: id * 5,
RemoteURL: filename, RemoteURL: filename,
LocalFilename: filename, LocalFilename: filename,
IsDownloaded: false, IsDownloaded: false,
} }
} }
@ -86,18 +86,18 @@ func create_image_from_id(id int) scraper.Image {
func create_video_from_id(id int) scraper.Video { func create_video_from_id(id int) scraper.Video {
filename := fmt.Sprintf("video%d.jpg", id) filename := fmt.Sprintf("video%d.jpg", id)
return scraper.Video{ return scraper.Video{
ID: scraper.VideoID(id), ID: scraper.VideoID(id),
TweetID: -1, TweetID: -1,
Width: id * 10, Width: id * 10,
Height: id * 5, Height: id * 5,
RemoteURL: filename, RemoteURL: filename,
LocalFilename: filename, LocalFilename: filename,
ThumbnailRemoteUrl: filename, ThumbnailRemoteUrl: filename,
ThumbnailLocalPath: filename, ThumbnailLocalPath: filename,
Duration: 10000, Duration: 10000,
ViewCount: 200, ViewCount: 200,
IsDownloaded: false, IsDownloaded: false,
IsGif: false, IsGif: false,
} }
} }
@ -265,13 +265,13 @@ func create_dummy_tombstone() scraper.Tweet {
tweet_id := scraper.TweetID(rand.Int()) tweet_id := scraper.TweetID(rand.Int())
return scraper.Tweet{ return scraper.Tweet{
ID: tweet_id, ID: tweet_id,
UserID: -1, UserID: -1,
TombstoneType: "deleted", TombstoneType: "deleted",
IsStub: true, IsStub: true,
Mentions: []scraper.UserHandle{}, Mentions: []scraper.UserHandle{},
ReplyMentions: []scraper.UserHandle{}, ReplyMentions: []scraper.UserHandle{},
Hashtags: []string{}, Hashtags: []string{},
} }
} }

View File

@ -7,29 +7,27 @@ import (
"offline_twitter/terminal_utils" "offline_twitter/terminal_utils"
) )
const ENGINE_DATABASE_VERSION = 11 const ENGINE_DATABASE_VERSION = 11
type VersionMismatchError struct { type VersionMismatchError struct {
EngineVersion int EngineVersion int
DatabaseVersion int DatabaseVersion int
} }
func (e VersionMismatchError) Error() string { func (e VersionMismatchError) Error() string {
return fmt.Sprintf( return fmt.Sprintf(
`This profile was created with database schema version %d, which is newer than this application's database schema version, %d. `This profile was created with database schema version %d, which is newer than this application's database schema version, %d.
Please upgrade this application to a newer version to use this profile. Or downgrade the profile's schema version, somehow.`, Please upgrade this application to a newer version to use this profile. Or downgrade the profile's schema version, somehow.`,
e.DatabaseVersion, e.EngineVersion, e.DatabaseVersion, e.EngineVersion,
) )
} }
/** /**
* The Nth entry is the migration that moves you from version N to version N+1. * The Nth entry is the migration that moves you from version N to version N+1.
* `len(MIGRATIONS)` should always equal `ENGINE_DATABASE_VERSION`. * `len(MIGRATIONS)` should always equal `ENGINE_DATABASE_VERSION`.
*/ */
var MIGRATIONS = []string{ var MIGRATIONS = []string{
`create table polls (rowid integer primary key, `create table polls (rowid integer primary key,
id integer unique not null check(typeof(id) = 'integer'), id integer unique not null check(typeof(id) = 'integer'),
tweet_id integer not null, tweet_id integer not null,
num_choices integer not null, num_choices integer not null,
@ -50,25 +48,25 @@ var MIGRATIONS = []string{
foreign key(tweet_id) references tweets(id) foreign key(tweet_id) references tweets(id)
);`, );`,
`alter table tweets add column is_conversation_scraped boolean default 0; `alter table tweets add column is_conversation_scraped boolean default 0;
alter table tweets add column last_scraped_at integer not null default 0`, alter table tweets add column last_scraped_at integer not null default 0`,
`update tombstone_types set tombstone_text = 'This Tweet is from a suspended account' where rowid = 2; `update tombstone_types set tombstone_text = 'This Tweet is from a suspended account' where rowid = 2;
insert into tombstone_types (rowid, short_name, tombstone_text) insert into tombstone_types (rowid, short_name, tombstone_text)
values (5, 'violated', 'This Tweet violated the Twitter Rules'), values (5, 'violated', 'This Tweet violated the Twitter Rules'),
(6, 'no longer exists', 'This Tweet is from an account that no longer exists')`, (6, 'no longer exists', 'This Tweet is from an account that no longer exists')`,
`alter table videos add column thumbnail_remote_url text not null default "missing"; `alter table videos add column thumbnail_remote_url text not null default "missing";
alter table videos add column thumbnail_local_filename text not null default "missing"`, alter table videos add column thumbnail_local_filename text not null default "missing"`,
`alter table videos add column duration integer not null default 0; `alter table videos add column duration integer not null default 0;
alter table videos add column view_count integer not null default 0`, alter table videos add column view_count integer not null default 0`,
`alter table users add column is_banned boolean default 0`, `alter table users add column is_banned boolean default 0`,
`alter table urls add column short_text text not null default ""`, `alter table urls add column short_text text not null default ""`,
`insert into tombstone_types (rowid, short_name, tombstone_text) values (7, 'age-restricted', 'Age-restricted adult content. ' `insert into tombstone_types (rowid, short_name, tombstone_text) values (7, 'age-restricted', 'Age-restricted adult content. '
|| 'This content might not be appropriate for people under 18 years old. To view this media, youll need to log in to Twitter')`, || 'This content might not be appropriate for people under 18 years old. To view this media, youll need to log in to Twitter')`,
`alter table users add column is_followed boolean default 0`, `alter table users add column is_followed boolean default 0`,
`create table fake_user_sequence(latest_fake_id integer not null); `create table fake_user_sequence(latest_fake_id integer not null);
insert into fake_user_sequence values(0x4000000000000000); insert into fake_user_sequence values(0x4000000000000000);
alter table users add column is_id_fake boolean default 0;`, alter table users add column is_id_fake boolean default 0;`,
`delete from urls where rowid in (select urls.rowid from tweets join urls on tweets.id = urls.tweet_id where urls.text like `delete from urls where rowid in (select urls.rowid from tweets join urls on tweets.id = urls.tweet_id where urls.text like
'https://twitter.com/%/status/' || tweets.quoted_tweet_id || "%")`, 'https://twitter.com/%/status/' || tweets.quoted_tweet_id || "%")`,
} }

View File

@ -2,12 +2,13 @@ package persistence_test
import ( import (
"testing" "testing"
"os" "os"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"offline_twitter/scraper"
"offline_twitter/persistence" "offline_twitter/persistence"
"offline_twitter/scraper"
) )
func TestVersionUpgrade(t *testing.T) { func TestVersionUpgrade(t *testing.T) {
@ -25,7 +26,7 @@ func TestVersionUpgrade(t *testing.T) {
require.False(profile.IsTweetInDatabase(test_tweet_id), "Test tweet shouldn't be in db yet") require.False(profile.IsTweetInDatabase(test_tweet_id), "Test tweet shouldn't be in db yet")
persistence.MIGRATIONS = append(persistence.MIGRATIONS, test_migration) persistence.MIGRATIONS = append(persistence.MIGRATIONS, test_migration)
err := profile.UpgradeFromXToY(persistence.ENGINE_DATABASE_VERSION, persistence.ENGINE_DATABASE_VERSION + 1) err := profile.UpgradeFromXToY(persistence.ENGINE_DATABASE_VERSION, persistence.ENGINE_DATABASE_VERSION+1)
require.NoError(err) require.NoError(err)
require.True(profile.IsTweetInDatabase(test_tweet_id), "Migration should have created the tweet, but it didn't") require.True(profile.IsTweetInDatabase(test_tweet_id), "Migration should have created the tweet, but it didn't")

View File

@ -5,8 +5,8 @@ import (
) )
var ( var (
END_OF_FEED = fmt.Errorf("End of feed") END_OF_FEED = fmt.Errorf("End of feed")
DOESNT_EXIST = fmt.Errorf("Doesn't exist") DOESNT_EXIST = fmt.Errorf("Doesn't exist")
EXTERNAL_API_ERROR = fmt.Errorf("Unexpected result from external API") EXTERNAL_API_ERROR = fmt.Errorf("Unexpected result from external API")
API_PARSE_ERROR = fmt.Errorf("Couldn't parse the result returned from the API") API_PARSE_ERROR = fmt.Errorf("Couldn't parse the result returned from the API")
) )

View File

@ -1,33 +1,33 @@
package scraper package scraper
import ( import (
"encoding/json"
"fmt" "fmt"
"html" "html"
"time"
"strings"
"encoding/json"
"strconv"
"sort" "sort"
"strconv"
"strings"
"time"
) )
type APIMedia struct { type APIMedia struct {
ID int64 `json:"id_str,string"` ID int64 `json:"id_str,string"`
MediaURLHttps string `json:"media_url_https"` MediaURLHttps string `json:"media_url_https"`
Type string `json:"type"` Type string `json:"type"`
URL string `json:"url"` URL string `json:"url"`
OriginalInfo struct { OriginalInfo struct {
Width int `json:"width"` Width int `json:"width"`
Height int `json:"height"` Height int `json:"height"`
} `json:"original_info"` } `json:"original_info"`
} }
type SortableVariants []struct { type SortableVariants []struct {
Bitrate int `json:"bitrate,omitempty"` Bitrate int `json:"bitrate,omitempty"`
URL string `json:"url"` URL string `json:"url"`
} }
func (v SortableVariants) Len() int { return len(v) }
func (v SortableVariants) Swap(i, j int) { v[i], v[j] = v[j], v[i] } func (v SortableVariants) Len() int { return len(v) }
func (v SortableVariants) Swap(i, j int) { v[i], v[j] = v[j], v[i] }
func (v SortableVariants) Less(i, j int) bool { return v[i].Bitrate > v[j].Bitrate } func (v SortableVariants) Less(i, j int) bool { return v[i].Bitrate > v[j].Bitrate }
type APIExtendedMedia struct { type APIExtendedMedia struct {
@ -35,12 +35,12 @@ type APIExtendedMedia struct {
MediaURLHttps string `json:"media_url_https"` MediaURLHttps string `json:"media_url_https"`
Type string `json:"type"` Type string `json:"type"`
VideoInfo struct { VideoInfo struct {
Variants SortableVariants `json:"variants"` Variants SortableVariants `json:"variants"`
Duration int `json:"duration_millis"` Duration int `json:"duration_millis"`
} `json:"video_info"` } `json:"video_info"`
OriginalInfo struct { OriginalInfo struct {
Width int `json:"width"` Width int `json:"width"`
Height int `json:"height"` Height int `json:"height"`
} `json:"original_info"` } `json:"original_info"`
Ext struct { Ext struct {
MediaStats struct { MediaStats struct {
@ -74,9 +74,9 @@ type APICard struct {
} `json:"description"` } `json:"description"`
Thumbnail struct { Thumbnail struct {
ImageValue struct { ImageValue struct {
Url string `json:"url"` Url string `json:"url"`
Width int `json:"width"` Width int `json:"width"`
Height int `json:"height"` Height int `json:"height"`
} `json:"image_value"` } `json:"image_value"`
} `json:"thumbnail_image_large"` } `json:"thumbnail_image_large"`
PlayerImage struct { PlayerImage struct {
@ -128,18 +128,18 @@ type APICard struct {
} }
type APITweet struct { type APITweet struct {
ID int64 `json:"id_str,string"` ID int64 `json:"id_str,string"`
ConversationID int64 `json:"conversation_id_str,string"` ConversationID int64 `json:"conversation_id_str,string"`
CreatedAt string `json:"created_at"` CreatedAt string `json:"created_at"`
FavoriteCount int `json:"favorite_count"` FavoriteCount int `json:"favorite_count"`
FullText string `json:"full_text"` FullText string `json:"full_text"`
DisplayTextRange []int `json:"display_text_range"` DisplayTextRange []int `json:"display_text_range"`
Entities struct { Entities struct {
Hashtags []struct { Hashtags []struct {
Text string `json:"text"` Text string `json:"text"`
} `json:"hashtags"` } `json:"hashtags"`
Media []APIMedia `json:"media"` Media []APIMedia `json:"media"`
URLs []struct { URLs []struct {
ExpandedURL string `json:"expanded_url"` ExpandedURL string `json:"expanded_url"`
ShortenedUrl string `json:"url"` ShortenedUrl string `json:"url"`
} `json:"urls"` } `json:"urls"`
@ -147,30 +147,30 @@ type APITweet struct {
UserName string `json:"screen_name"` UserName string `json:"screen_name"`
UserID int64 `json:"id_str,string"` UserID int64 `json:"id_str,string"`
} `json:"user_mentions"` } `json:"user_mentions"`
ReplyMentions string // The leading part of the text which is cut off by "DisplayTextRange" ReplyMentions string // The leading part of the text which is cut off by "DisplayTextRange"
} `json:"entities"` } `json:"entities"`
ExtendedEntities struct { ExtendedEntities struct {
Media []APIExtendedMedia `json:"media"` Media []APIExtendedMedia `json:"media"`
} `json:"extended_entities"` } `json:"extended_entities"`
InReplyToStatusID int64 `json:"in_reply_to_status_id_str,string"` InReplyToStatusID int64 `json:"in_reply_to_status_id_str,string"`
InReplyToUserID int64 `json:"in_reply_to_user_id_str,string"` InReplyToUserID int64 `json:"in_reply_to_user_id_str,string"`
InReplyToScreenName string `json:"in_reply_to_screen_name"` InReplyToScreenName string `json:"in_reply_to_screen_name"`
ReplyCount int `json:"reply_count"` ReplyCount int `json:"reply_count"`
RetweetCount int `json:"retweet_count"` RetweetCount int `json:"retweet_count"`
QuoteCount int `json:"quote_count"` QuoteCount int `json:"quote_count"`
RetweetedStatusIDStr string `json:"retweeted_status_id_str"` // Can be empty string RetweetedStatusIDStr string `json:"retweeted_status_id_str"` // Can be empty string
RetweetedStatusID int64 RetweetedStatusID int64
QuotedStatusIDStr string `json:"quoted_status_id_str"` // Can be empty string QuotedStatusIDStr string `json:"quoted_status_id_str"` // Can be empty string
QuotedStatusID int64 QuotedStatusID int64
QuotedStatusPermalink struct { QuotedStatusPermalink struct {
ShortURL string `json:"url"` ShortURL string `json:"url"`
ExpandedURL string `json:"expanded"` ExpandedURL string `json:"expanded"`
} `json:"quoted_status_permalink"` } `json:"quoted_status_permalink"`
Time time.Time `json:"time"` Time time.Time `json:"time"`
UserID int64 `json:"user_id_str,string"` UserID int64 `json:"user_id_str,string"`
UserHandle string UserHandle string
Card APICard `json:"card"` Card APICard `json:"card"`
TombstoneText string TombstoneText string
} }
func (t *APITweet) NormalizeContent() { func (t *APITweet) NormalizeContent() {
@ -183,7 +183,7 @@ func (t *APITweet) NormalizeContent() {
t.RetweetedStatusID = int64(id) t.RetweetedStatusID = int64(id)
} }
if (len(t.DisplayTextRange) == 2) { if len(t.DisplayTextRange) == 2 {
t.Entities.ReplyMentions = strings.TrimSpace(string([]rune(t.FullText)[0:t.DisplayTextRange[0]])) t.Entities.ReplyMentions = strings.TrimSpace(string([]rune(t.FullText)[0:t.DisplayTextRange[0]]))
t.FullText = string([]rune(t.FullText)[t.DisplayTextRange[0]:t.DisplayTextRange[1]]) t.FullText = string([]rune(t.FullText)[t.DisplayTextRange[0]:t.DisplayTextRange[1]])
} }
@ -217,7 +217,6 @@ func (t APITweet) String() string {
return string(data) return string(data)
} }
type APIUser struct { type APIUser struct {
CreatedAt string `json:"created_at"` CreatedAt string `json:"created_at"`
Description string `json:"description"` Description string `json:"description"`
@ -235,7 +234,7 @@ type APIUser struct {
ListedCount int `json:"listed_count"` ListedCount int `json:"listed_count"`
Name string `json:"name"` Name string `json:"name"`
Location string `json:"location"` Location string `json:"location"`
PinnedTweetIdsStr []string `json:"pinned_tweet_ids_str"` // Dunno how to type-convert an array PinnedTweetIdsStr []string `json:"pinned_tweet_ids_str"` // Dunno how to type-convert an array
ProfileBannerURL string `json:"profile_banner_url"` ProfileBannerURL string `json:"profile_banner_url"`
ProfileImageURLHTTPS string `json:"profile_image_url_https"` ProfileImageURLHTTPS string `json:"profile_image_url_https"`
Protected bool `json:"protected"` Protected bool `json:"protected"`
@ -246,7 +245,6 @@ type APIUser struct {
DoesntExist bool DoesntExist bool
} }
type UserResponse struct { type UserResponse struct {
Data struct { Data struct {
User struct { User struct {
@ -255,11 +253,12 @@ type UserResponse struct {
} `json:"user"` } `json:"user"`
} `json:"data"` } `json:"data"`
Errors []struct { Errors []struct {
Message string `json:"message"` Message string `json:"message"`
Name string `json:"name"` Name string `json:"name"`
Code int `json:"code"` Code int `json:"code"`
} `json:"errors"` } `json:"errors"`
} }
func (u UserResponse) ConvertToAPIUser() APIUser { func (u UserResponse) ConvertToAPIUser() APIUser {
ret := u.Data.User.Legacy ret := u.Data.User.Legacy
ret.ID = u.Data.User.ID ret.ID = u.Data.User.ID
@ -279,9 +278,9 @@ func (u UserResponse) ConvertToAPIUser() APIUser {
} }
type Entry struct { type Entry struct {
EntryID string `json:"entryId"` EntryID string `json:"entryId"`
SortIndex int64 `json:"sortIndex,string"` SortIndex int64 `json:"sortIndex,string"`
Content struct { Content struct {
Item struct { Item struct {
Content struct { Content struct {
Tombstone struct { Tombstone struct {
@ -303,12 +302,15 @@ type Entry struct {
} `json:"operation"` } `json:"operation"`
} `json:"content"` } `json:"content"`
} }
func (e Entry) GetTombstoneText() string { func (e Entry) GetTombstoneText() string {
return e.Content.Item.Content.Tombstone.TombstoneInfo.RichText.Text return e.Content.Item.Content.Tombstone.TombstoneInfo.RichText.Text
} }
type SortableEntries []Entry type SortableEntries []Entry
func (e SortableEntries) Len() int { return len(e) }
func (e SortableEntries) Swap(i, j int) { e[i], e[j] = e[j], e[i] } func (e SortableEntries) Len() int { return len(e) }
func (e SortableEntries) Swap(i, j int) { e[i], e[j] = e[j], e[i] }
func (e SortableEntries) Less(i, j int) bool { return e[i].SortIndex > e[j].SortIndex } func (e SortableEntries) Less(i, j int) bool { return e[i].SortIndex > e[j].SortIndex }
type TweetResponse struct { type TweetResponse struct {
@ -329,15 +331,16 @@ type TweetResponse struct {
} }
var tombstone_types = map[string]string{ var tombstone_types = map[string]string{
"This Tweet was deleted by the Tweet author. Learn more": "deleted", "This Tweet was deleted by the Tweet author. Learn more": "deleted",
"This Tweet is from a suspended account. Learn more": "suspended", "This Tweet is from a suspended account. Learn more": "suspended",
"Youre unable to view this Tweet because this account owner limits who can view their Tweets. Learn more": "hidden", "Youre unable to view this Tweet because this account owner limits who can view their Tweets. Learn more": "hidden",
"This Tweet is unavailable. Learn more": "unavailable", "This Tweet is unavailable. Learn more": "unavailable",
"This Tweet violated the Twitter Rules. Learn more": "violated", "This Tweet violated the Twitter Rules. Learn more": "violated",
"This Tweet is from an account that no longer exists. Learn more": "no longer exists", "This Tweet is from an account that no longer exists. Learn more": "no longer exists",
"Age-restricted adult content. This content might not be appropriate for people under 18 years old. To view this media, " + "Age-restricted adult content. This content might not be appropriate for people under 18 years old. To view this media, " +
"youll need to log in to Twitter. Learn more": "age-restricted", "youll need to log in to Twitter. Learn more": "age-restricted",
} }
/** /**
* Insert tweets into GlobalObjects for each tombstone. Returns a list of users that need to * Insert tweets into GlobalObjects for each tombstone. Returns a list of users that need to
* be fetched for tombstones. * be fetched for tombstones.
@ -379,8 +382,8 @@ func (t *TweetResponse) HandleTombstones() []UserHandle {
if entry.GetTombstoneText() != "" { if entry.GetTombstoneText() != "" {
// Try to reconstruct the tombstone tweet // Try to reconstruct the tombstone tweet
var tombstoned_tweet APITweet var tombstoned_tweet APITweet
tombstoned_tweet.ID = int64(i) // Set a default to prevent clobbering other tombstones tombstoned_tweet.ID = int64(i) // Set a default to prevent clobbering other tombstones
if i + 1 < len(entries) && entries[i+1].Content.Item.Content.Tweet.ID != 0 { if i+1 < len(entries) && entries[i+1].Content.Item.Content.Tweet.ID != 0 {
next_tweet_id := entries[i+1].Content.Item.Content.Tweet.ID next_tweet_id := entries[i+1].Content.Item.Content.Tweet.ID
api_tweet, ok := t.GlobalObjects.Tweets[fmt.Sprint(next_tweet_id)] api_tweet, ok := t.GlobalObjects.Tweets[fmt.Sprint(next_tweet_id)]
if !ok { if !ok {
@ -390,7 +393,7 @@ func (t *TweetResponse) HandleTombstones() []UserHandle {
tombstoned_tweet.UserID = api_tweet.InReplyToUserID tombstoned_tweet.UserID = api_tweet.InReplyToUserID
ret = append(ret, UserHandle(api_tweet.InReplyToScreenName)) ret = append(ret, UserHandle(api_tweet.InReplyToScreenName))
} }
if i - 1 >= 0 && entries[i-1].Content.Item.Content.Tweet.ID != 0 { if i-1 >= 0 && entries[i-1].Content.Item.Content.Tweet.ID != 0 {
prev_tweet_id := entries[i-1].Content.Item.Content.Tweet.ID prev_tweet_id := entries[i-1].Content.Item.Content.Tweet.ID
_, ok := t.GlobalObjects.Tweets[fmt.Sprint(prev_tweet_id)] _, ok := t.GlobalObjects.Tweets[fmt.Sprint(prev_tweet_id)]
if !ok { if !ok {
@ -416,7 +419,7 @@ func (t *TweetResponse) HandleTombstones() []UserHandle {
func (t *TweetResponse) GetCursor() string { func (t *TweetResponse) GetCursor() string {
entries := t.Timeline.Instructions[0].AddEntries.Entries entries := t.Timeline.Instructions[0].AddEntries.Entries
if len(entries) > 0 { if len(entries) > 0 {
last_entry := entries[len(entries) - 1] last_entry := entries[len(entries)-1]
if strings.Contains(last_entry.EntryID, "cursor") { if strings.Contains(last_entry.EntryID, "cursor") {
return last_entry.Content.Operation.Cursor.Value return last_entry.Content.Operation.Cursor.Value
} }
@ -424,7 +427,7 @@ func (t *TweetResponse) GetCursor() string {
// Next, try the other format ("replaceEntry") // Next, try the other format ("replaceEntry")
instructions := t.Timeline.Instructions instructions := t.Timeline.Instructions
last_replace_entry := instructions[len(instructions) - 1].ReplaceEntry.Entry last_replace_entry := instructions[len(instructions)-1].ReplaceEntry.Entry
if strings.Contains(last_replace_entry.EntryID, "cursor") { if strings.Contains(last_replace_entry.EntryID, "cursor") {
return last_replace_entry.Content.Operation.Cursor.Value return last_replace_entry.Content.Operation.Cursor.Value
} }
@ -450,7 +453,6 @@ func (t *TweetResponse) IsEndOfFeed() bool {
return true return true
} }
func idstr_to_int(idstr string) int64 { func idstr_to_int(idstr string) int64 {
id, err := strconv.Atoi(idstr) id, err := strconv.Atoi(idstr)
if err != nil { if err != nil {

View File

@ -1,9 +1,9 @@
package scraper_test package scraper_test
import ( import (
"testing"
"os"
"encoding/json" "encoding/json"
"os"
"testing"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
@ -11,17 +11,16 @@ import (
. "offline_twitter/scraper" . "offline_twitter/scraper"
) )
func TestNormalizeContent(t *testing.T) { func TestNormalizeContent(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)
test_cases := []struct { test_cases := []struct {
filename string filename string
eventual_full_text string eventual_full_text string
quoted_status_id TweetID quoted_status_id TweetID
in_reply_to_id TweetID in_reply_to_id TweetID
retweeted_status_id TweetID retweeted_status_id TweetID
reply_mentions string reply_mentions string
} { }{
{"test_responses/single_tweets/tweet_that_is_a_reply_with_gif.json", "", 0, 1395882872729477131, 0, "@michaelmalice"}, {"test_responses/single_tweets/tweet_that_is_a_reply_with_gif.json", "", 0, 1395882872729477131, 0, "@michaelmalice"},
{"test_responses/single_tweets/tweet_with_image.json", "this saddens me every time", 0, 0, 0, ""}, {"test_responses/single_tweets/tweet_with_image.json", "this saddens me every time", 0, 0, 0, ""},
{"test_responses/single_tweets/tweet_that_is_a_reply.json", "Noted", 0, 1396194494710788100, 0, "@RvaTeddy @michaelmalice"}, {"test_responses/single_tweets/tweet_that_is_a_reply.json", "Noted", 0, 1396194494710788100, 0, "@RvaTeddy @michaelmalice"},
@ -48,7 +47,7 @@ func TestNormalizeContent(t *testing.T) {
} }
var tweet APITweet var tweet APITweet
err = json.Unmarshal(data, &tweet) err = json.Unmarshal(data, &tweet)
assert.NoError(err, "Failed at " + v.filename) assert.NoError(err, "Failed at "+v.filename)
tweet.NormalizeContent() tweet.NormalizeContent()
@ -60,7 +59,6 @@ func TestNormalizeContent(t *testing.T) {
} }
} }
func TestUserProfileToAPIUser(t *testing.T) { func TestUserProfileToAPIUser(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)
data, err := os.ReadFile("test_responses/michael_malice_user_profile.json") data, err := os.ReadFile("test_responses/michael_malice_user_profile.json")
@ -76,7 +74,6 @@ func TestUserProfileToAPIUser(t *testing.T) {
assert.Equal(user_resp.Data.User.Legacy.FollowersCount, result.FollowersCount) assert.Equal(user_resp.Data.User.Legacy.FollowersCount, result.FollowersCount)
} }
func TestGetCursor(t *testing.T) { func TestGetCursor(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)
data, err := os.ReadFile("test_responses/midriffs_anarchist_cookbook.json") data, err := os.ReadFile("test_responses/midriffs_anarchist_cookbook.json")
@ -91,13 +88,12 @@ func TestGetCursor(t *testing.T) {
tweet_resp.GetCursor()) tweet_resp.GetCursor())
} }
func TestIsEndOfFeed(t *testing.T) { func TestIsEndOfFeed(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)
test_cases := []struct { test_cases := []struct {
filename string filename string
is_end_of_feed bool is_end_of_feed bool
} { }{
{"test_responses/michael_malice_feed.json", false}, {"test_responses/michael_malice_feed.json", false},
{"test_responses/kwiber_end_of_feed.json", true}, {"test_responses/kwiber_end_of_feed.json", true},
} }
@ -113,7 +109,6 @@ func TestIsEndOfFeed(t *testing.T) {
} }
} }
func TestHandleTombstonesHidden(t *testing.T) { func TestHandleTombstonesHidden(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)
data, err := os.ReadFile("test_responses/tombstones/tombstone_hidden_1.json") data, err := os.ReadFile("test_responses/tombstones/tombstone_hidden_1.json")

View File

@ -13,13 +13,13 @@ import (
) )
type CardValue struct { type CardValue struct {
Type string `json:"type"` Type string `json:"type"`
StringValue string `json:"string_value"` StringValue string `json:"string_value"`
ImageValue struct { ImageValue struct {
AltText string `json:"alt"` AltText string `json:"alt"`
Height int `json:"height"` Height int `json:"height"`
Width int `json:"width"` Width int `json:"width"`
Url string `json:"url"` Url string `json:"url"`
} `json:"image_value"` } `json:"image_value"`
UserValue struct { UserValue struct {
ID int64 `json:"id_str,string"` ID int64 `json:"id_str,string"`
@ -30,13 +30,14 @@ type CardValue struct {
type APIV2Card struct { type APIV2Card struct {
Legacy struct { Legacy struct {
BindingValues []struct { BindingValues []struct {
Key string `json:"key"` Key string `json:"key"`
Value CardValue `json:"value"` Value CardValue `json:"value"`
} `json:"binding_values"` } `json:"binding_values"`
Name string `json:"name"` Name string `json:"name"`
Url string `json:"url"` Url string `json:"url"`
} `json:"legacy"` } `json:"legacy"`
} }
func (card APIV2Card) ParseAsUrl() Url { func (card APIV2Card) ParseAsUrl() Url {
values := make(map[string]CardValue) values := make(map[string]CardValue)
for _, obj := range card.Legacy.BindingValues { for _, obj := range card.Legacy.BindingValues {
@ -121,6 +122,7 @@ type APIV2UserResult struct {
} `json:"result"` } `json:"result"`
} `json:"user_results"` } `json:"user_results"`
} }
func (u APIV2UserResult) ToUser() User { func (u APIV2UserResult) ToUser() User {
user, err := ParseSingleUser(u.UserResults.Result.Legacy) user, err := ParseSingleUser(u.UserResults.Result.Legacy)
if err != nil { if err != nil {
@ -131,16 +133,16 @@ func (u APIV2UserResult) ToUser() User {
} }
type _Result struct { type _Result struct {
ID int64 `json:"rest_id,string"` ID int64 `json:"rest_id,string"`
Legacy APIV2Tweet `json:"legacy"` Legacy APIV2Tweet `json:"legacy"`
Tombstone *struct { Tombstone *struct {
Text struct { Text struct {
Text string `json:"text"` Text string `json:"text"`
} `json:"text"` } `json:"text"`
} `json:"tombstone"` } `json:"tombstone"`
Core *APIV2UserResult `json:"core"` Core *APIV2UserResult `json:"core"`
Card APIV2Card `json:"card"` Card APIV2Card `json:"card"`
QuotedStatusResult *APIV2Result `json:"quoted_status_result"` QuotedStatusResult *APIV2Result `json:"quoted_status_result"`
} }
type APIV2Result struct { type APIV2Result struct {
@ -149,11 +151,12 @@ type APIV2Result struct {
Tweet _Result `json:"tweet"` Tweet _Result `json:"tweet"`
} `json:"result"` } `json:"result"`
} }
func (api_result APIV2Result) ToTweetTrove(ignore_null_entries bool) TweetTrove { func (api_result APIV2Result) ToTweetTrove(ignore_null_entries bool) TweetTrove {
ret := NewTweetTrove() ret := NewTweetTrove()
// Start by checking if this is a null entry in a feed // Start by checking if this is a null entry in a feed
if api_result.Result.Tombstone != nil && ignore_null_entries{ if api_result.Result.Tombstone != nil && ignore_null_entries {
// TODO: this is becoming really spaghetti. Why do we need a separate execution path for this? // TODO: this is becoming really spaghetti. Why do we need a separate execution path for this?
return ret return ret
} }
@ -221,7 +224,7 @@ func (api_result APIV2Result) ToTweetTrove(ignore_null_entries bool) TweetTrove
continue continue
} }
found = true found = true
url.Text = main_tweet.Urls[i].Text // Copy the expanded URL over, since the card doesn't have it in the new API url.Text = main_tweet.Urls[i].Text // Copy the expanded URL over, since the card doesn't have it in the new API
main_tweet.Urls[i] = url main_tweet.Urls[i] = url
} }
if !found { if !found {
@ -245,6 +248,7 @@ type APIV2Tweet struct {
RetweetedStatusResult *APIV2Result `json:"retweeted_status_result"` RetweetedStatusResult *APIV2Result `json:"retweeted_status_result"`
APITweet APITweet
} }
func (api_v2_tweet APIV2Tweet) ToTweetTrove() TweetTrove { func (api_v2_tweet APIV2Tweet) ToTweetTrove() TweetTrove {
ret := NewTweetTrove() ret := NewTweetTrove()
@ -253,7 +257,6 @@ func (api_v2_tweet APIV2Tweet) ToTweetTrove() TweetTrove {
orig_tweet_trove := api_v2_tweet.RetweetedStatusResult.ToTweetTrove(false) orig_tweet_trove := api_v2_tweet.RetweetedStatusResult.ToTweetTrove(false)
ret.MergeWith(orig_tweet_trove) ret.MergeWith(orig_tweet_trove)
retweet := Retweet{} retweet := Retweet{}
var err error var err error
retweet.RetweetID = TweetID(api_v2_tweet.ID) retweet.RetweetID = TweetID(api_v2_tweet.ID)
@ -277,25 +280,24 @@ func (api_v2_tweet APIV2Tweet) ToTweetTrove() TweetTrove {
} }
type APIV2Entry struct { type APIV2Entry struct {
EntryID string `json:"entryId"` EntryID string `json:"entryId"`
SortIndex int64 `json:"sortIndex,string"` SortIndex int64 `json:"sortIndex,string"`
Content struct { Content struct {
ItemContent struct { ItemContent struct {
EntryType string `json:"entryType"` EntryType string `json:"entryType"`
TweetResults APIV2Result `json:"tweet_results"` TweetResults APIV2Result `json:"tweet_results"`
} `json:"itemContent"` } `json:"itemContent"`
// Cursors // Cursors
EntryType string `json:"entryType"` EntryType string `json:"entryType"`
Value string `json:"value"` Value string `json:"value"`
CursorType string `json:"cursorType"` CursorType string `json:"cursorType"`
} `json:"content"` } `json:"content"`
} }
type APIV2Instruction struct { type APIV2Instruction struct {
Type string `json:"type"` Type string `json:"type"`
Entries []APIV2Entry`json:"entries"` Entries []APIV2Entry `json:"entries"`
} }
type APIV2Response struct { type APIV2Response struct {
@ -324,7 +326,7 @@ func (api_response APIV2Response) GetMainInstruction() *APIV2Instruction {
func (api_response APIV2Response) GetCursorBottom() string { func (api_response APIV2Response) GetCursorBottom() string {
entries := api_response.GetMainInstruction().Entries entries := api_response.GetMainInstruction().Entries
last_entry := entries[len(entries) - 1] last_entry := entries[len(entries)-1]
if last_entry.Content.CursorType != "Bottom" { if last_entry.Content.CursorType != "Bottom" {
panic("No bottom cursor found") panic("No bottom cursor found")
} }
@ -349,7 +351,7 @@ func (api_response APIV2Response) IsEmpty() bool {
*/ */
func (api_response APIV2Response) ToTweetTrove() (TweetTrove, error) { func (api_response APIV2Response) ToTweetTrove() (TweetTrove, error) {
ret := NewTweetTrove() ret := NewTweetTrove()
for _, entry := range api_response.GetMainInstruction().Entries { // TODO: the second Instruction is the pinned tweet for _, entry := range api_response.GetMainInstruction().Entries { // TODO: the second Instruction is the pinned tweet
if !strings.HasPrefix(entry.EntryID, "tweet-") { if !strings.HasPrefix(entry.EntryID, "tweet-") {
continue continue
} }
@ -363,12 +365,11 @@ func (api_response APIV2Response) ToTweetTrove() (TweetTrove, error) {
return ret, nil return ret, nil
} }
func get_graphql_user_timeline_url(user_id UserID, cursor string) string { func get_graphql_user_timeline_url(user_id UserID, cursor string) string {
if cursor != "" { if cursor != "" {
return "https://twitter.com/i/api/graphql/CwLU7qTfeu0doqhSr6tW4A/UserTweetsAndReplies?variables=%7B%22userId%22%3A%22" + fmt.Sprint(user_id) + "%22%2C%22count%22%3A40%2C%22cursor%22%3A%22" + url.QueryEscape(cursor) + "%22%2C%22includePromotedContent%22%3Atrue%2C%22withCommunity%22%3Atrue%2C%22withSuperFollowsUserFields%22%3Atrue%2C%22withBirdwatchPivots%22%3Afalse%2C%22withDownvotePerspective%22%3Afalse%2C%22withReactionsMetadata%22%3Afalse%2C%22withReactionsPerspective%22%3Afalse%2C%22withSuperFollowsTweetFields%22%3Atrue%2C%22withVoice%22%3Atrue%2C%22withV2Timeline%22%3Afalse%2C%22__fs_interactive_text%22%3Afalse%2C%22__fs_responsive_web_uc_gql_enabled%22%3Afalse%2C%22__fs_dont_mention_me_view_api_enabled%22%3Afalse%7D" // nolint:lll // It's a URL, come on return "https://twitter.com/i/api/graphql/CwLU7qTfeu0doqhSr6tW4A/UserTweetsAndReplies?variables=%7B%22userId%22%3A%22" + fmt.Sprint(user_id) + "%22%2C%22count%22%3A40%2C%22cursor%22%3A%22" + url.QueryEscape(cursor) + "%22%2C%22includePromotedContent%22%3Atrue%2C%22withCommunity%22%3Atrue%2C%22withSuperFollowsUserFields%22%3Atrue%2C%22withBirdwatchPivots%22%3Afalse%2C%22withDownvotePerspective%22%3Afalse%2C%22withReactionsMetadata%22%3Afalse%2C%22withReactionsPerspective%22%3Afalse%2C%22withSuperFollowsTweetFields%22%3Atrue%2C%22withVoice%22%3Atrue%2C%22withV2Timeline%22%3Afalse%2C%22__fs_interactive_text%22%3Afalse%2C%22__fs_responsive_web_uc_gql_enabled%22%3Afalse%2C%22__fs_dont_mention_me_view_api_enabled%22%3Afalse%7D" // nolint:lll // It's a URL, come on
} }
return "https://twitter.com/i/api/graphql/CwLU7qTfeu0doqhSr6tW4A/UserTweetsAndReplies?variables=%7B%22userId%22%3A%22" + fmt.Sprint(user_id) + "%22%2C%22count%22%3A40%2C%22includePromotedContent%22%3Afalse%2C%22withCommunity%22%3Atrue%2C%22withSuperFollowsUserFields%22%3Atrue%2C%22withBirdwatchPivots%22%3Afalse%2C%22withDownvotePerspective%22%3Afalse%2C%22withReactionsMetadata%22%3Afalse%2C%22withReactionsPerspective%22%3Afalse%2C%22withSuperFollowsTweetFields%22%3Atrue%2C%22withVoice%22%3Atrue%2C%22withV2Timeline%22%3Afalse%2C%22__fs_interactive_text%22%3Afalse%2C%22__fs_dont_mention_me_view_api_enabled%22%3Afalse%7D" // nolint:lll // It's a URL, come on return "https://twitter.com/i/api/graphql/CwLU7qTfeu0doqhSr6tW4A/UserTweetsAndReplies?variables=%7B%22userId%22%3A%22" + fmt.Sprint(user_id) + "%22%2C%22count%22%3A40%2C%22includePromotedContent%22%3Afalse%2C%22withCommunity%22%3Atrue%2C%22withSuperFollowsUserFields%22%3Atrue%2C%22withBirdwatchPivots%22%3Afalse%2C%22withDownvotePerspective%22%3Afalse%2C%22withReactionsMetadata%22%3Afalse%2C%22withReactionsPerspective%22%3Afalse%2C%22withSuperFollowsTweetFields%22%3Atrue%2C%22withVoice%22%3Atrue%2C%22withV2Timeline%22%3Afalse%2C%22__fs_interactive_text%22%3Afalse%2C%22__fs_dont_mention_me_view_api_enabled%22%3Afalse%7D" // nolint:lll // It's a URL, come on
} }
/** /**
@ -446,7 +447,7 @@ func (api API) GetMoreTweetsFromGraphqlFeed(user_id UserID, response *APIV2Respo
} }
if fresh_response.IsEmpty() { if fresh_response.IsEmpty() {
// Response has a pinned tweet, but no other content: end of feed has been reached // Response has a pinned tweet, but no other content: end of feed has been reached
return END_OF_FEED // TODO: check that there actually is a pinned tweet and the request didn't just fail lol return END_OF_FEED // TODO: check that there actually is a pinned tweet and the request didn't just fail lol
} }
last_response = &fresh_response last_response = &fresh_response

View File

@ -1,10 +1,10 @@
package scraper_test package scraper_test
import ( import (
"testing"
"os"
"encoding/json" "encoding/json"
"fmt" "fmt"
"os"
"testing"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
@ -34,7 +34,7 @@ func TestAPIV2ParseUser(t *testing.T) {
assert.Equal(user.ID, UserID(44067298)) assert.Equal(user.ID, UserID(44067298))
assert.Equal(user.DisplayName, "Michael Malice") assert.Equal(user.DisplayName, "Michael Malice")
assert.Equal(user.Handle, UserHandle("michaelmalice")) assert.Equal(user.Handle, UserHandle("michaelmalice"))
assert.Equal(user.Bio, "Author of Dear Reader, The New Right & The Anarchist Handbook\nHost of \"YOUR WELCOME\" \nSubject of Ego & " + assert.Equal(user.Bio, "Author of Dear Reader, The New Right & The Anarchist Handbook\nHost of \"YOUR WELCOME\" \nSubject of Ego & "+
"Hubris by Harvey Pekar\nHe/Him ⚑\n@SheathUnderwear Model") "Hubris by Harvey Pekar\nHe/Him ⚑\n@SheathUnderwear Model")
assert.Equal(user.FollowingCount, 964) assert.Equal(user.FollowingCount, 964)
assert.Equal(user.FollowersCount, 334571) assert.Equal(user.FollowersCount, 334571)
@ -70,7 +70,7 @@ func TestAPIV2ParseTweet(t *testing.T) {
assert.True(ok) assert.True(ok)
assert.Equal(tweet.ID, TweetID(1485708879174508550)) assert.Equal(tweet.ID, TweetID(1485708879174508550))
assert.Equal(tweet.UserID, UserID(44067298)) assert.Equal(tweet.UserID, UserID(44067298))
assert.Equal(tweet.Text, "If Boris Johnson is driven out of office, it wouldn't mark the first time the Tories had four PMs in a " + assert.Equal(tweet.Text, "If Boris Johnson is driven out of office, it wouldn't mark the first time the Tories had four PMs in a "+
"row\nThey had previously governed the UK for 13 years with 4 PMs, from 1951-1964") "row\nThey had previously governed the UK for 13 years with 4 PMs, from 1951-1964")
assert.Equal(tweet.PostedAt.Unix(), int64(1643055574)) assert.Equal(tweet.PostedAt.Unix(), int64(1643055574))
assert.Equal(tweet.QuotedTweetID, TweetID(0)) assert.Equal(tweet.QuotedTweetID, TweetID(0))
@ -133,7 +133,7 @@ func TestAPIV2ParseTweetWithQuotedTweet(t *testing.T) {
assert.True(ok) assert.True(ok)
assert.Equal(TweetID(1485690410899021826), quote_tweet.ID) assert.Equal(TweetID(1485690410899021826), quote_tweet.ID)
assert.Equal(TweetID(1485690069079846915), quote_tweet.QuotedTweetID) assert.Equal(TweetID(1485690069079846915), quote_tweet.QuotedTweetID)
assert.Equal("Hatred is powerless in and of itself despite all the agitprop to the contrary\nHatred didnt stop Trump's election, " + assert.Equal("Hatred is powerless in and of itself despite all the agitprop to the contrary\nHatred didnt stop Trump's election, "+
"for example", quote_tweet.Text) "for example", quote_tweet.Text)
// Should be 2 users: quoter and quoted // Should be 2 users: quoter and quoted
@ -182,7 +182,7 @@ func TestAPIV2ParseRetweet(t *testing.T) {
// Check the video // Check the video
v := tweet.Videos[0] v := tweet.Videos[0]
assert.Equal("https://pbs.twimg.com/ext_tw_video_thumb/1485627274594590721/pu/img/O6mMKrsqWl8WcMy1.jpg", v.ThumbnailRemoteUrl) assert.Equal("https://pbs.twimg.com/ext_tw_video_thumb/1485627274594590721/pu/img/O6mMKrsqWl8WcMy1.jpg", v.ThumbnailRemoteUrl)
assert.Equal(0, v.ViewCount) // TODO: make this work assert.Equal(0, v.ViewCount) // TODO: make this work
assert.Equal(720, v.Height) assert.Equal(720, v.Height)
assert.Equal(720, v.Width) assert.Equal(720, v.Width)
assert.Equal(30066, v.Duration) assert.Equal(30066, v.Duration)
@ -200,7 +200,6 @@ func TestAPIV2ParseRetweet(t *testing.T) {
assert.Equal(UserID(44067298), retweeting_user.ID) assert.Equal(UserID(44067298), retweeting_user.ID)
assert.Equal(UserHandle("michaelmalice"), retweeting_user.Handle) assert.Equal(UserHandle("michaelmalice"), retweeting_user.Handle)
// Should be 1 retweet // Should be 1 retweet
assert.Equal(1, len(trove.Retweets)) assert.Equal(1, len(trove.Retweets))
retweet, ok := trove.Retweets[1485699748514476037] retweet, ok := trove.Retweets[1485699748514476037]
@ -270,7 +269,6 @@ func TestAPIV2ParseRetweetedQuoteTweet(t *testing.T) {
assert.Equal(UserID(599817378), retweet.RetweetedByID) assert.Equal(UserID(599817378), retweet.RetweetedByID)
} }
/** /**
* Parse tweet with quoted tombstone * Parse tweet with quoted tombstone
*/ */
@ -300,13 +298,12 @@ func TestAPIV2ParseTweetWithQuotedTombstone(t *testing.T) {
assert.True(ok) assert.True(ok)
assert.Equal(TweetID(1485774025347371008), tombstoned_tweet.ID) assert.Equal(TweetID(1485774025347371008), tombstoned_tweet.ID)
assert.Equal("no longer exists", tombstoned_tweet.TombstoneType) assert.Equal("no longer exists", tombstoned_tweet.TombstoneType)
assert.True (tombstoned_tweet.IsStub) assert.True(tombstoned_tweet.IsStub)
assert.Equal(UserHandle("coltnkat"), tombstoned_tweet.UserHandle) assert.Equal(UserHandle("coltnkat"), tombstoned_tweet.UserHandle)
assert.Equal(0, len(trove.Retweets)) assert.Equal(0, len(trove.Retweets))
} }
/** /**
* Parse a tweet with a link * Parse a tweet with a link
*/ */
@ -326,7 +323,7 @@ func TestAPIV2ParseTweetWithURL(t *testing.T) {
assert.Equal(1, len(trove.Tweets)) assert.Equal(1, len(trove.Tweets))
tweet, ok := trove.Tweets[1485695695025803264] tweet, ok := trove.Tweets[1485695695025803264]
assert.True(ok) assert.True(ok)
assert.Equal("This led to what I discussed as \"anguish signaling,\" where progs competed in proclaiming their distress both to " + assert.Equal("This led to what I discussed as \"anguish signaling,\" where progs competed in proclaiming their distress both to "+
"show they were the Good Guys but also to get the pack to regroup, akin to wolves howling.", tweet.Text) "show they were the Good Guys but also to get the pack to regroup, akin to wolves howling.", tweet.Text)
assert.Equal(1, len(tweet.Urls)) assert.Equal(1, len(tweet.Urls))
@ -335,7 +332,7 @@ func TestAPIV2ParseTweetWithURL(t *testing.T) {
assert.Equal("observer.com", url.Domain) assert.Equal("observer.com", url.Domain)
assert.Equal("Why Evangelical Progressives Need to Demonstrate Anguish Publicly", url.Title) assert.Equal("Why Evangelical Progressives Need to Demonstrate Anguish Publicly", url.Title)
assert.Equal("https://observer.com/2016/12/why-evangelical-progressives-need-to-demonstrate-anguish-publicly/", url.Text) assert.Equal("https://observer.com/2016/12/why-evangelical-progressives-need-to-demonstrate-anguish-publicly/", url.Text)
assert.Equal("The concept of “virtue signaling” gained a great deal of currency in this past year. Its a way to demonstrate to " + assert.Equal("The concept of “virtue signaling” gained a great deal of currency in this past year. Its a way to demonstrate to "+
"others that one is a good person without having to do anything", url.Description) "others that one is a good person without having to do anything", url.Description)
assert.Equal("https://pbs.twimg.com/card_img/1485694664640507911/WsproWyP?format=jpg&name=600x600", url.ThumbnailRemoteUrl) assert.Equal("https://pbs.twimg.com/card_img/1485694664640507911/WsproWyP?format=jpg&name=600x600", url.ThumbnailRemoteUrl)
assert.Equal(600, url.ThumbnailWidth) assert.Equal(600, url.ThumbnailWidth)
@ -439,10 +436,9 @@ func TestAPIV2ParseTweetWithPoll(t *testing.T) {
assert.Equal(int64(1643137976), poll.VotingEndsAt.Unix()) assert.Equal(int64(1643137976), poll.VotingEndsAt.Unix())
assert.Equal(int64(1643055638), poll.LastUpdatedAt.Unix()) assert.Equal(int64(1643055638), poll.LastUpdatedAt.Unix())
assert.Equal(1440 * 60, poll.VotingDuration) assert.Equal(1440*60, poll.VotingDuration)
} }
func TestParseAPIV2UserFeed(t *testing.T) { func TestParseAPIV2UserFeed(t *testing.T) {
data, err := os.ReadFile("test_responses/api_v2/user_feed_apiv2.json") data, err := os.ReadFile("test_responses/api_v2/user_feed_apiv2.json")
if err != nil { if err != nil {
@ -495,7 +491,6 @@ func TestParseAPIV2UserFeed(t *testing.T) {
fmt.Printf("%d Users, %d Tweets, %d Retweets\n", len(tweet_trove.Users), len(tweet_trove.Tweets), len(tweet_trove.Retweets)) fmt.Printf("%d Users, %d Tweets, %d Retweets\n", len(tweet_trove.Users), len(tweet_trove.Tweets), len(tweet_trove.Retweets))
} }
/** /**
* Should correctly identify an "empty" response * Should correctly identify an "empty" response
*/ */
@ -562,13 +557,12 @@ func TestAPIV2TombstoneEntry(t *testing.T) {
err = json.Unmarshal(data, &tweet_result) err = json.Unmarshal(data, &tweet_result)
require.NoError(t, err) require.NoError(t, err)
trove := tweet_result.ToTweetTrove(true) // 'true' indicates to ignore empty entries trove := tweet_result.ToTweetTrove(true) // 'true' indicates to ignore empty entries
assert.Len(trove.Tweets, 0) assert.Len(trove.Tweets, 0)
assert.Len(trove.Users, 0) assert.Len(trove.Users, 0)
assert.Len(trove.Retweets, 0) assert.Len(trove.Retweets, 0)
} }
func TestTweetWithWarning(t *testing.T) { func TestTweetWithWarning(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)
data, err := os.ReadFile("test_responses/api_v2/tweet_with_warning.json") data, err := os.ReadFile("test_responses/api_v2/tweet_with_warning.json")

View File

@ -1,29 +1,29 @@
package scraper package scraper
import ( import (
"path" "path"
) )
type ImageID int64 type ImageID int64
type Image struct { type Image struct {
ID ImageID ID ImageID
TweetID TweetID TweetID TweetID
Width int Width int
Height int Height int
RemoteURL string RemoteURL string
LocalFilename string LocalFilename string
IsDownloaded bool IsDownloaded bool
} }
func ParseAPIMedia(apiMedia APIMedia) Image { func ParseAPIMedia(apiMedia APIMedia) Image {
local_filename := path.Base(apiMedia.MediaURLHttps) local_filename := path.Base(apiMedia.MediaURLHttps)
return Image{ return Image{
ID: ImageID(apiMedia.ID), ID: ImageID(apiMedia.ID),
RemoteURL: apiMedia.MediaURLHttps, RemoteURL: apiMedia.MediaURLHttps,
Width: apiMedia.OriginalInfo.Width, Width: apiMedia.OriginalInfo.Width,
Height: apiMedia.OriginalInfo.Height, Height: apiMedia.OriginalInfo.Height,
LocalFilename: local_filename, LocalFilename: local_filename,
IsDownloaded: false, IsDownloaded: false,
} }
} }

View File

@ -1,31 +1,31 @@
package scraper_test package scraper_test
import ( import (
"testing" "encoding/json"
"os" "os"
"encoding/json" "testing"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
. "offline_twitter/scraper" . "offline_twitter/scraper"
) )
func TestParseAPIMedia(t *testing.T) { func TestParseAPIMedia(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)
data, err := os.ReadFile("test_responses/tweet_content/image.json") data, err := os.ReadFile("test_responses/tweet_content/image.json")
if err != nil { if err != nil {
panic(err) panic(err)
} }
var apimedia APIMedia var apimedia APIMedia
err = json.Unmarshal(data, &apimedia) err = json.Unmarshal(data, &apimedia)
require.NoError(t, err) require.NoError(t, err)
image := ParseAPIMedia(apimedia) image := ParseAPIMedia(apimedia)
assert.Equal(ImageID(1395882862289772553), image.ID) assert.Equal(ImageID(1395882862289772553), image.ID)
assert.Equal("https://pbs.twimg.com/media/E18sEUrWYAk8dBl.jpg", image.RemoteURL) assert.Equal("https://pbs.twimg.com/media/E18sEUrWYAk8dBl.jpg", image.RemoteURL)
assert.Equal(593, image.Width) assert.Equal(593, image.Width)
assert.Equal(239, image.Height) assert.Equal(239, image.Height)
assert.Equal("E18sEUrWYAk8dBl.jpg", image.LocalFilename) assert.Equal("E18sEUrWYAk8dBl.jpg", image.LocalFilename)
assert.False(image.IsDownloaded) assert.False(image.IsDownloaded)
} }

View File

@ -2,11 +2,10 @@ package scraper
import ( import (
"fmt" "fmt"
"time"
"net/http" "net/http"
"time"
) )
/** /**
* Return the expanded version of a short URL. Input must be a real short URL. * Return the expanded version of a short URL. Input must be a real short URL.
*/ */
@ -21,7 +20,7 @@ func ExpandShortUrl(short_url string) string {
resp, err := client.Get(short_url) resp, err := client.Get(short_url)
if err != nil { if err != nil {
panic(err) // TODO: handle timeouts panic(err) // TODO: handle timeouts
} }
if resp.StatusCode != 301 { if resp.StatusCode != 301 {
panic(fmt.Errorf("Unknown status code returned when expanding short url %q: %s\n %w", short_url, resp.Status, EXTERNAL_API_ERROR)) panic(fmt.Errorf("Unknown status code returned when expanding short url %q: %s\n %w", short_url, resp.Status, EXTERNAL_API_ERROR))

View File

@ -6,12 +6,11 @@ import (
"net/http" "net/http"
"net/http/httptest" "net/http/httptest"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
. "offline_twitter/scraper" . "offline_twitter/scraper"
) )
func TestExpandShortUrl(t *testing.T) { func TestExpandShortUrl(t *testing.T) {
redirecting_to := "redirect target" redirecting_to := "redirect target"
srvr := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { srvr := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {

View File

@ -1,82 +1,82 @@
package scraper package scraper
import ( import (
"strings" "net/url"
"strconv" "strconv"
"net/url" "strings"
) )
type PollID int64 type PollID int64
type Poll struct { type Poll struct {
ID PollID ID PollID
TweetID TweetID TweetID TweetID
NumChoices int NumChoices int
Choice1 string Choice1 string
Choice1_Votes int Choice1_Votes int
Choice2 string Choice2 string
Choice2_Votes int Choice2_Votes int
Choice3 string Choice3 string
Choice3_Votes int Choice3_Votes int
Choice4 string Choice4 string
Choice4_Votes int Choice4_Votes int
VotingDuration int // In seconds VotingDuration int // In seconds
VotingEndsAt Timestamp VotingEndsAt Timestamp
LastUpdatedAt Timestamp `db:"last_scraped_at"` LastUpdatedAt Timestamp `db:"last_scraped_at"`
} }
func ParseAPIPoll(apiCard APICard) Poll { func ParseAPIPoll(apiCard APICard) Poll {
card_url, err := url.Parse(apiCard.ShortenedUrl) card_url, err := url.Parse(apiCard.ShortenedUrl)
if err != nil { if err != nil {
panic(err) panic(err)
} }
id := int_or_panic(card_url.Hostname()) id := int_or_panic(card_url.Hostname())
ret := Poll{} ret := Poll{}
ret.ID = PollID(id) ret.ID = PollID(id)
ret.NumChoices = parse_num_choices(apiCard.Name) ret.NumChoices = parse_num_choices(apiCard.Name)
ret.VotingDuration = int_or_panic(apiCard.BindingValues.DurationMinutes.StringValue) * 60 ret.VotingDuration = int_or_panic(apiCard.BindingValues.DurationMinutes.StringValue) * 60
ret.VotingEndsAt, err = TimestampFromString(apiCard.BindingValues.EndDatetimeUTC.StringValue) ret.VotingEndsAt, err = TimestampFromString(apiCard.BindingValues.EndDatetimeUTC.StringValue)
if err != nil { if err != nil {
panic(err) panic(err)
} }
ret.LastUpdatedAt, err = TimestampFromString(apiCard.BindingValues.LastUpdatedAt.StringValue) ret.LastUpdatedAt, err = TimestampFromString(apiCard.BindingValues.LastUpdatedAt.StringValue)
if err != nil { if err != nil {
panic(err) panic(err)
} }
ret.Choice1 = apiCard.BindingValues.Choice1.StringValue ret.Choice1 = apiCard.BindingValues.Choice1.StringValue
ret.Choice1_Votes = int_or_panic(apiCard.BindingValues.Choice1_Count.StringValue) ret.Choice1_Votes = int_or_panic(apiCard.BindingValues.Choice1_Count.StringValue)
ret.Choice2 = apiCard.BindingValues.Choice2.StringValue ret.Choice2 = apiCard.BindingValues.Choice2.StringValue
ret.Choice2_Votes = int_or_panic(apiCard.BindingValues.Choice2_Count.StringValue) ret.Choice2_Votes = int_or_panic(apiCard.BindingValues.Choice2_Count.StringValue)
if ret.NumChoices > 2 { if ret.NumChoices > 2 {
ret.Choice3 = apiCard.BindingValues.Choice3.StringValue ret.Choice3 = apiCard.BindingValues.Choice3.StringValue
ret.Choice3_Votes = int_or_panic(apiCard.BindingValues.Choice3_Count.StringValue) ret.Choice3_Votes = int_or_panic(apiCard.BindingValues.Choice3_Count.StringValue)
} }
if ret.NumChoices > 3 { if ret.NumChoices > 3 {
ret.Choice4 = apiCard.BindingValues.Choice4.StringValue ret.Choice4 = apiCard.BindingValues.Choice4.StringValue
ret.Choice4_Votes = int_or_panic(apiCard.BindingValues.Choice4_Count.StringValue) ret.Choice4_Votes = int_or_panic(apiCard.BindingValues.Choice4_Count.StringValue)
} }
return ret return ret
} }
func parse_num_choices(card_name string) int { func parse_num_choices(card_name string) int {
if strings.Index(card_name, "poll") != 0 || strings.Index(card_name, "choice") != 5 { if strings.Index(card_name, "poll") != 0 || strings.Index(card_name, "choice") != 5 {
panic("Not valid card name: " + card_name) panic("Not valid card name: " + card_name)
} }
return int_or_panic(card_name[4:5]) return int_or_panic(card_name[4:5])
} }
func int_or_panic(s string) int { func int_or_panic(s string) int {
result, err := strconv.Atoi(s) result, err := strconv.Atoi(s)
if err != nil { if err != nil {
panic(err) panic(err)
} }
return result return result
} }

View File

@ -1,67 +1,67 @@
package scraper_test package scraper_test
import ( import (
"testing" "encoding/json"
"os" "os"
"encoding/json" "testing"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
. "offline_twitter/scraper" . "offline_twitter/scraper"
) )
func TestParsePoll2Choices(t *testing.T) { func TestParsePoll2Choices(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)
data, err := os.ReadFile("test_responses/tweet_content/poll_card_2_options.json") data, err := os.ReadFile("test_responses/tweet_content/poll_card_2_options.json")
if err != nil { if err != nil {
panic(err) panic(err)
} }
var apiCard APICard var apiCard APICard
err = json.Unmarshal(data, &apiCard) err = json.Unmarshal(data, &apiCard)
require.NoError(t, err) require.NoError(t, err)
poll := ParseAPIPoll(apiCard) poll := ParseAPIPoll(apiCard)
assert.Equal(PollID(1457419248461131776), poll.ID) assert.Equal(PollID(1457419248461131776), poll.ID)
assert.Equal(2, poll.NumChoices) assert.Equal(2, poll.NumChoices)
assert.Equal(60 * 60 * 24, poll.VotingDuration) assert.Equal(60*60*24, poll.VotingDuration)
assert.Equal(int64(1636397201), poll.VotingEndsAt.Unix()) assert.Equal(int64(1636397201), poll.VotingEndsAt.Unix())
assert.Equal(int64(1636318755), poll.LastUpdatedAt.Unix()) assert.Equal(int64(1636318755), poll.LastUpdatedAt.Unix())
assert.Less(poll.LastUpdatedAt.Unix(), poll.VotingEndsAt.Unix()) assert.Less(poll.LastUpdatedAt.Unix(), poll.VotingEndsAt.Unix())
assert.Equal("Yes", poll.Choice1) assert.Equal("Yes", poll.Choice1)
assert.Equal("No", poll.Choice2) assert.Equal("No", poll.Choice2)
assert.Equal(529, poll.Choice1_Votes) assert.Equal(529, poll.Choice1_Votes)
assert.Equal(2182, poll.Choice2_Votes) assert.Equal(2182, poll.Choice2_Votes)
} }
func TestParsePoll4Choices(t *testing.T) { func TestParsePoll4Choices(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)
data, err := os.ReadFile("test_responses/tweet_content/poll_card_4_options_ended.json") data, err := os.ReadFile("test_responses/tweet_content/poll_card_4_options_ended.json")
if err != nil { if err != nil {
panic(err) panic(err)
} }
var apiCard APICard var apiCard APICard
err = json.Unmarshal(data, &apiCard) err = json.Unmarshal(data, &apiCard)
require.NoError(t, err) require.NoError(t, err)
poll := ParseAPIPoll(apiCard) poll := ParseAPIPoll(apiCard)
assert.Equal(PollID(1455611588854140929), poll.ID) assert.Equal(PollID(1455611588854140929), poll.ID)
assert.Equal(4, poll.NumChoices) assert.Equal(4, poll.NumChoices)
assert.Equal(60 * 60 * 24, poll.VotingDuration) assert.Equal(60*60*24, poll.VotingDuration)
assert.Equal(int64(1635966221), poll.VotingEndsAt.Unix()) assert.Equal(int64(1635966221), poll.VotingEndsAt.Unix())
assert.Equal(int64(1635966226), poll.LastUpdatedAt.Unix()) assert.Equal(int64(1635966226), poll.LastUpdatedAt.Unix())
assert.Greater(poll.LastUpdatedAt.Unix(), poll.VotingEndsAt.Unix()) assert.Greater(poll.LastUpdatedAt.Unix(), poll.VotingEndsAt.Unix())
assert.Equal("Alec Baldwin", poll.Choice1) assert.Equal("Alec Baldwin", poll.Choice1)
assert.Equal(1669, poll.Choice1_Votes) assert.Equal(1669, poll.Choice1_Votes)
assert.Equal("Andew Cuomo", poll.Choice2) assert.Equal("Andew Cuomo", poll.Choice2)
assert.Equal(272, poll.Choice2_Votes) assert.Equal(272, poll.Choice2_Votes)
assert.Equal("George Floyd", poll.Choice3) assert.Equal("George Floyd", poll.Choice3)
assert.Equal(829, poll.Choice3_Votes) assert.Equal(829, poll.Choice3_Votes)
assert.Equal("Derek Chauvin", poll.Choice4) assert.Equal("Derek Chauvin", poll.Choice4)
assert.Equal(2397, poll.Choice4_Votes) assert.Equal(2397, poll.Choice4_Votes)
} }

View File

@ -1,12 +1,12 @@
package scraper package scraper
type Retweet struct { type Retweet struct {
RetweetID TweetID RetweetID TweetID
TweetID TweetID TweetID TweetID
Tweet *Tweet Tweet *Tweet
RetweetedByID UserID `db:"retweeted_by"` RetweetedByID UserID `db:"retweeted_by"`
RetweetedBy *User RetweetedBy *User
RetweetedAt Timestamp RetweetedAt Timestamp
} }
func ParseSingleRetweet(apiTweet APITweet) (ret Retweet, err error) { func ParseSingleRetweet(apiTweet APITweet) (ret Retweet, err error) {

View File

@ -5,8 +5,8 @@ import (
"os" "os"
"testing" "testing"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
. "offline_twitter/scraper" . "offline_twitter/scraper"
) )

View File

@ -5,7 +5,7 @@ import (
) )
func TimestampToDateString(timestamp int) string { func TimestampToDateString(timestamp int) string {
panic("???") // TODO panic("???") // TODO
} }
/** /**

View File

@ -1,9 +1,9 @@
package scraper package scraper
import ( import (
"time"
"fmt" "fmt"
"strings" "strings"
"time"
"offline_twitter/terminal_utils" "offline_twitter/terminal_utils"
) )
@ -13,18 +13,18 @@ const DEFAULT_MAX_REPLIES_EAGER_LOAD = 50
type TweetID int64 type TweetID int64
type Tweet struct { type Tweet struct {
ID TweetID ID TweetID
UserID UserID UserID UserID
UserHandle UserHandle // For processing tombstones UserHandle UserHandle // For processing tombstones
User *User User *User
Text string Text string
PostedAt Timestamp PostedAt Timestamp
NumLikes int NumLikes int
NumRetweets int NumRetweets int
NumReplies int NumReplies int
NumQuoteTweets int NumQuoteTweets int
InReplyToID TweetID InReplyToID TweetID
QuotedTweetID TweetID QuotedTweetID TweetID
Images []Image Images []Image
Videos []Video Videos []Video
@ -35,14 +35,13 @@ type Tweet struct {
Polls []Poll Polls []Poll
TombstoneType string TombstoneType string
IsStub bool IsStub bool
IsContentDownloaded bool IsContentDownloaded bool
IsConversationScraped bool IsConversationScraped bool
LastScrapedAt Timestamp LastScrapedAt Timestamp
} }
func (t Tweet) String() string { func (t Tweet) String() string {
var author string var author string
if t.User != nil { if t.User != nil {
@ -52,7 +51,7 @@ func (t Tweet) String() string {
} }
ret := fmt.Sprintf( ret := fmt.Sprintf(
`%s `%s
%s %s
%s %s
Replies: %d RT: %d QT: %d Likes: %d Replies: %d RT: %d QT: %d Likes: %d
@ -67,11 +66,11 @@ Replies: %d RT: %d QT: %d Likes: %d
) )
if len(t.Images) > 0 { if len(t.Images) > 0 {
ret += fmt.Sprintf(terminal_utils.COLOR_GREEN + "images: %d\n" + terminal_utils.COLOR_RESET, len(t.Images)) ret += fmt.Sprintf(terminal_utils.COLOR_GREEN+"images: %d\n"+terminal_utils.COLOR_RESET, len(t.Images))
} }
if len(t.Urls) > 0 { if len(t.Urls) > 0 {
ret += "urls: [\n" ret += "urls: [\n"
for _, url := range(t.Urls) { for _, url := range t.Urls {
ret += " " + url.Text + "\n" ret += " " + url.Text + "\n"
} }
ret += "]" ret += "]"
@ -90,7 +89,7 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
ret.Text = apiTweet.FullText ret.Text = apiTweet.FullText
// Process "posted-at" date and time // Process "posted-at" date and time
if apiTweet.TombstoneText == "" { // Skip time parsing for tombstones if apiTweet.TombstoneText == "" { // Skip time parsing for tombstones
ret.PostedAt, err = TimestampFromString(apiTweet.CreatedAt) ret.PostedAt, err = TimestampFromString(apiTweet.CreatedAt)
if err != nil { if err != nil {
return Tweet{}, fmt.Errorf("Error parsing time on tweet ID %d:\n %w", ret.ID, err) return Tweet{}, fmt.Errorf("Error parsing time on tweet ID %d:\n %w", ret.ID, err)
@ -125,7 +124,7 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
// Process images // Process images
for _, media := range apiTweet.Entities.Media { for _, media := range apiTweet.Entities.Media {
if media.Type != "photo" { // TODO: remove this eventually if media.Type != "photo" { // TODO: remove this eventually
panic(fmt.Errorf("Unknown media type %q:\n %w", media.Type, EXTERNAL_API_ERROR)) panic(fmt.Errorf("Unknown media type %q:\n %w", media.Type, EXTERNAL_API_ERROR))
} }
new_image := ParseAPIMedia(media) new_image := ParseAPIMedia(media)
@ -151,7 +150,6 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
} }
} }
// Process videos // Process videos
for _, entity := range apiTweet.ExtendedEntities.Media { for _, entity := range apiTweet.ExtendedEntities.Media {
if entity.Type != "video" && entity.Type != "animated_gif" { if entity.Type != "video" && entity.Type != "animated_gif" {
@ -175,13 +173,12 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
// Process tombstones and other metadata // Process tombstones and other metadata
ret.TombstoneType = apiTweet.TombstoneText ret.TombstoneType = apiTweet.TombstoneText
ret.IsStub = !(ret.TombstoneType == "") ret.IsStub = !(ret.TombstoneType == "")
ret.LastScrapedAt = TimestampFromUnix(0) // Caller will change this for the tweet that was actually scraped ret.LastScrapedAt = TimestampFromUnix(0) // Caller will change this for the tweet that was actually scraped
ret.IsConversationScraped = false // Safe due to the "No Worsening" principle ret.IsConversationScraped = false // Safe due to the "No Worsening" principle
return return
} }
/** /**
* Get a single tweet with no replies from the API. * Get a single tweet with no replies from the API.
* *
@ -206,7 +203,6 @@ func GetTweet(id TweetID) (Tweet, error) {
return ParseSingleTweet(single_tweet) return ParseSingleTweet(single_tweet)
} }
/** /**
* Return a list of tweets, including the original and the rest of its thread, * Return a list of tweets, including the original and the rest of its thread,
* along with a list of associated users. * along with a list of associated users.
@ -227,7 +223,7 @@ func GetTweetFull(id TweetID) (trove TweetTrove, err error) {
return return
} }
if len(tweet_response.GlobalObjects.Tweets) < DEFAULT_MAX_REPLIES_EAGER_LOAD && if len(tweet_response.GlobalObjects.Tweets) < DEFAULT_MAX_REPLIES_EAGER_LOAD &&
tweet_response.GetCursor() != "" { tweet_response.GetCursor() != "" {
err = api.GetMoreReplies(id, &tweet_response, DEFAULT_MAX_REPLIES_EAGER_LOAD) err = api.GetMoreReplies(id, &tweet_response, DEFAULT_MAX_REPLIES_EAGER_LOAD)
if err != nil { if err != nil {
err = fmt.Errorf("Error getting more tweet replies: %d\n %w", id, err) err = fmt.Errorf("Error getting more tweet replies: %d\n %w", id, err)

View File

@ -5,13 +5,13 @@ import (
"os" "os"
"testing" "testing"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
. "offline_twitter/scraper" . "offline_twitter/scraper"
) )
func load_tweet_from_file(filename string) Tweet{ func load_tweet_from_file(filename string) Tweet {
data, err := os.ReadFile(filename) data, err := os.ReadFile(filename)
if err != nil { if err != nil {
panic(err) panic(err)
@ -28,12 +28,11 @@ func load_tweet_from_file(filename string) Tweet{
return tweet return tweet
} }
func TestParseSingleTweet(t *testing.T) { func TestParseSingleTweet(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_unicode_chars.json") tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_unicode_chars.json")
assert.Equal("The fact that @michaelmalice new book The Anarchist Handbook is just absolutely destroying on the charts is the " + assert.Equal("The fact that @michaelmalice new book The Anarchist Handbook is just absolutely destroying on the charts is the "+
"largest white pill Ive swallowed in years.", tweet.Text) "largest white pill Ive swallowed in years.", tweet.Text)
assert.Len(tweet.Mentions, 1) assert.Len(tweet.Mentions, 1)
assert.Contains(tweet.Mentions, UserHandle("michaelmalice")) assert.Contains(tweet.Mentions, UserHandle("michaelmalice"))
@ -73,7 +72,7 @@ func TestParseTweetWithQuotedTweetAndLink(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_quoted_tweet_and_url.json") tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_quoted_tweet_and_url.json")
assert.Equal("This is video hes talking about. Please watch. Is there a single US politician capable of doing this with the " + assert.Equal("This is video hes talking about. Please watch. Is there a single US politician capable of doing this with the "+
"weasels and rats running American industry today?", tweet.Text) "weasels and rats running American industry today?", tweet.Text)
assert.Equal(TweetID(1497997890999898115), tweet.QuotedTweetID) assert.Equal(TweetID(1497997890999898115), tweet.QuotedTweetID)
@ -135,7 +134,7 @@ func TestParseTweetWithMultipleUrls(t *testing.T) {
assert.False(tweet.Urls[0].HasCard) assert.False(tweet.Urls[0].HasCard)
assert.False(tweet.Urls[1].HasCard) assert.False(tweet.Urls[1].HasCard)
assert.True (tweet.Urls[2].HasCard) assert.True(tweet.Urls[2].HasCard)
assert.Equal("Bidens victory came from the suburbs", tweet.Urls[2].Title) assert.Equal("Bidens victory came from the suburbs", tweet.Urls[2].Title)
} }
@ -166,12 +165,11 @@ func TestTweetWithPoll(t *testing.T) {
assert.Equal(624, p.Choice2_Votes) assert.Equal(624, p.Choice2_Votes)
assert.Equal(778, p.Choice3_Votes) assert.Equal(778, p.Choice3_Votes)
assert.Equal(1138, p.Choice4_Votes) assert.Equal(1138, p.Choice4_Votes)
assert.Equal(1440 * 60, p.VotingDuration) assert.Equal(1440*60, p.VotingDuration)
assert.Equal(int64(1638331934), p.VotingEndsAt.Unix()) assert.Equal(int64(1638331934), p.VotingEndsAt.Unix())
assert.Equal(int64(1638331935), p.LastUpdatedAt.Unix()) assert.Equal(int64(1638331935), p.LastUpdatedAt.Unix())
} }
func TestParseTweetResponse(t *testing.T) { func TestParseTweetResponse(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)
data, err := os.ReadFile("test_responses/michael_malice_feed.json") data, err := os.ReadFile("test_responses/michael_malice_feed.json")
@ -186,7 +184,7 @@ func TestParseTweetResponse(t *testing.T) {
require.NoError(t, err) require.NoError(t, err)
tweets, retweets, users := trove.Transform() tweets, retweets, users := trove.Transform()
assert.Len(tweets, 29 - 3) assert.Len(tweets, 29-3)
assert.Len(retweets, 3) assert.Len(retweets, 3)
assert.Len(users, 9) assert.Len(users, 9)
} }

View File

@ -8,9 +8,9 @@ import (
) )
type TweetTrove struct { type TweetTrove struct {
Tweets map[TweetID]Tweet Tweets map[TweetID]Tweet
Users map[UserID]User Users map[UserID]User
Retweets map[TweetID]Retweet Retweets map[TweetID]Retweet
TombstoneUsers []UserHandle TombstoneUsers []UserHandle
} }
@ -38,7 +38,7 @@ func (trove TweetTrove) Transform() (tweets []Tweet, retweets []Retweet, users [
retweets = append(retweets, val) retweets = append(retweets, val)
} }
return return
} // TODO: refactor until this function isn't needed anymore } // TODO: refactor until this function isn't needed anymore
/** /**
* Search for a user by handle. Second param is whether the user was found or not. * Search for a user by handle. Second param is whether the user was found or not.

View File

@ -2,28 +2,28 @@ package scraper
import ( import (
"fmt" "fmt"
"net/url"
"path" "path"
"regexp" "regexp"
"net/url"
) )
type Url struct { type Url struct {
TweetID TweetID TweetID TweetID
Domain string Domain string
Text string Text string
ShortText string ShortText string
Title string Title string
Description string Description string
ThumbnailWidth int ThumbnailWidth int
ThumbnailHeight int ThumbnailHeight int
ThumbnailRemoteUrl string ThumbnailRemoteUrl string
ThumbnailLocalPath string ThumbnailLocalPath string
CreatorID UserID CreatorID UserID
SiteID UserID SiteID UserID
HasCard bool HasCard bool
HasThumbnail bool HasThumbnail bool
IsContentDownloaded bool IsContentDownloaded bool
} }
@ -86,7 +86,7 @@ func TryParseTweetUrl(url string) (UserHandle, TweetID, bool) {
if matches == nil { if matches == nil {
return UserHandle(""), TweetID(0), false return UserHandle(""), TweetID(0), false
} }
if len(matches) != 3 { // matches[0] is the full string if len(matches) != 3 { // matches[0] is the full string
panic(matches) panic(matches)
} }
return UserHandle(matches[1]), TweetID(int_or_panic(matches[2])), true return UserHandle(matches[1]), TweetID(int_or_panic(matches[2])), true

View File

@ -1,153 +1,153 @@
package scraper_test package scraper_test
import ( import (
"testing" "encoding/json"
"os" "os"
"encoding/json" "testing"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
. "offline_twitter/scraper" . "offline_twitter/scraper"
) )
func TestParseAPIUrlCard(t *testing.T) { func TestParseAPIUrlCard(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)
data, err := os.ReadFile("test_responses/tweet_content/url_card.json") data, err := os.ReadFile("test_responses/tweet_content/url_card.json")
if err != nil { if err != nil {
panic(err) panic(err)
} }
var apiCard APICard var apiCard APICard
err = json.Unmarshal(data, &apiCard) err = json.Unmarshal(data, &apiCard)
require.NoError(t, err) require.NoError(t, err)
url := ParseAPIUrlCard(apiCard) url := ParseAPIUrlCard(apiCard)
assert.Equal("reason.com", url.Domain) assert.Equal("reason.com", url.Domain)
assert.Equal("L.A. Teachers Union Leader: 'There's No Such Thing As Learning Loss'", url.Title) assert.Equal("L.A. Teachers Union Leader: 'There's No Such Thing As Learning Loss'", url.Title)
assert.Equal("\"Its OK that our babies may not have learned all their times tables,\" says Cecily Myart-Cruz. \"They learned " + assert.Equal("\"Its OK that our babies may not have learned all their times tables,\" says Cecily Myart-Cruz. \"They learned "+
"resilience.\"", url.Description) "resilience.\"", url.Description)
assert.Equal(600, url.ThumbnailWidth) assert.Equal(600, url.ThumbnailWidth)
assert.Equal(315, url.ThumbnailHeight) assert.Equal(315, url.ThumbnailHeight)
assert.Equal("https://pbs.twimg.com/card_img/1434998862305968129/odDi9EqO?format=jpg&name=600x600", url.ThumbnailRemoteUrl) assert.Equal("https://pbs.twimg.com/card_img/1434998862305968129/odDi9EqO?format=jpg&name=600x600", url.ThumbnailRemoteUrl)
assert.Equal("odDi9EqO_600x600.jpg", url.ThumbnailLocalPath) assert.Equal("odDi9EqO_600x600.jpg", url.ThumbnailLocalPath)
assert.Equal(UserID(155581583), url.CreatorID) assert.Equal(UserID(155581583), url.CreatorID)
assert.Equal(UserID(16467567), url.SiteID) assert.Equal(UserID(16467567), url.SiteID)
assert.True(url.HasThumbnail) assert.True(url.HasThumbnail)
assert.False(url.IsContentDownloaded) assert.False(url.IsContentDownloaded)
} }
func TestParseAPIUrlCardWithPlayer(t *testing.T) { func TestParseAPIUrlCardWithPlayer(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)
data, err := os.ReadFile("test_responses/tweet_content/url_card_with_player.json") data, err := os.ReadFile("test_responses/tweet_content/url_card_with_player.json")
if err != nil { if err != nil {
panic(err) panic(err)
} }
var apiCard APICard var apiCard APICard
err = json.Unmarshal(data, &apiCard) err = json.Unmarshal(data, &apiCard)
require.NoError(t, err) require.NoError(t, err)
url := ParseAPIUrlCard(apiCard) url := ParseAPIUrlCard(apiCard)
assert.Equal("www.youtube.com", url.Domain) assert.Equal("www.youtube.com", url.Domain)
assert.Equal("The Politically Incorrect Guide to the Constitution (Starring Tom...", url.Title) assert.Equal("The Politically Incorrect Guide to the Constitution (Starring Tom...", url.Title)
assert.Equal("Watch this episode on LBRY/Odysee: https://odysee.com/@capitalresearch:5/the-politically-incorrect-guide-to-the:8" + assert.Equal("Watch this episode on LBRY/Odysee: https://odysee.com/@capitalresearch:5/the-politically-incorrect-guide-to-the:8"+
"Watch this episode on Rumble: https://rumble...", url.Description) "Watch this episode on Rumble: https://rumble...", url.Description)
assert.Equal("https://pbs.twimg.com/card_img/1437849456423194639/_1t0btyt?format=jpg&name=800x320_1", url.ThumbnailRemoteUrl) assert.Equal("https://pbs.twimg.com/card_img/1437849456423194639/_1t0btyt?format=jpg&name=800x320_1", url.ThumbnailRemoteUrl)
assert.Equal("_1t0btyt_800x320_1.jpg", url.ThumbnailLocalPath) assert.Equal("_1t0btyt_800x320_1.jpg", url.ThumbnailLocalPath)
assert.Equal(UserID(10228272), url.SiteID) assert.Equal(UserID(10228272), url.SiteID)
assert.True(url.HasThumbnail) assert.True(url.HasThumbnail)
assert.False(url.IsContentDownloaded) assert.False(url.IsContentDownloaded)
} }
func TestParseAPIUrlCardWithPlayerAndPlaceholderThumbnail(t *testing.T) { func TestParseAPIUrlCardWithPlayerAndPlaceholderThumbnail(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)
data, err := os.ReadFile("test_responses/tweet_content/url_card_with_player_placeholder_image.json") data, err := os.ReadFile("test_responses/tweet_content/url_card_with_player_placeholder_image.json")
if err != nil { if err != nil {
panic(err) panic(err)
} }
var apiCard APICard var apiCard APICard
err = json.Unmarshal(data, &apiCard) err = json.Unmarshal(data, &apiCard)
require.NoError(t, err) require.NoError(t, err)
url := ParseAPIUrlCard(apiCard) url := ParseAPIUrlCard(apiCard)
assert.Equal("www.youtube.com", url.Domain) assert.Equal("www.youtube.com", url.Domain)
assert.Equal("Did Michael Malice Turn Me into an Anarchist? | Ep 181", url.Title) assert.Equal("Did Michael Malice Turn Me into an Anarchist? | Ep 181", url.Title)
assert.Equal("SUBSCRIBE TO THE NEW SHOW W/ ELIJAH & SYDNEY: \"YOU ARE HERE\"YT: https://www.youtube.com/youareheredaily____________" + assert.Equal("SUBSCRIBE TO THE NEW SHOW W/ ELIJAH & SYDNEY: \"YOU ARE HERE\"YT: https://www.youtube.com/youareheredaily____________"+
"__________________________________________...", url.Description) "__________________________________________...", url.Description)
assert.Equal("https://pbs.twimg.com/cards/player-placeholder.png", url.ThumbnailRemoteUrl) assert.Equal("https://pbs.twimg.com/cards/player-placeholder.png", url.ThumbnailRemoteUrl)
assert.Equal("player-placeholder.png", url.ThumbnailLocalPath) assert.Equal("player-placeholder.png", url.ThumbnailLocalPath)
assert.Equal(UserID(10228272), url.SiteID) assert.Equal(UserID(10228272), url.SiteID)
assert.True(url.HasThumbnail) assert.True(url.HasThumbnail)
assert.False(url.IsContentDownloaded) assert.False(url.IsContentDownloaded)
} }
func TestParseAPIUrlCardWithoutThumbnail(t *testing.T) { func TestParseAPIUrlCardWithoutThumbnail(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)
data, err := os.ReadFile("test_responses/tweet_content/url_card_without_thumbnail.json") data, err := os.ReadFile("test_responses/tweet_content/url_card_without_thumbnail.json")
if err != nil { if err != nil {
panic(err) panic(err)
} }
var apiCard APICard var apiCard APICard
err = json.Unmarshal(data, &apiCard) err = json.Unmarshal(data, &apiCard)
require.NoError(t, err) require.NoError(t, err)
url := ParseAPIUrlCard(apiCard) url := ParseAPIUrlCard(apiCard)
assert.Equal("en.m.wikipedia.org", url.Domain) assert.Equal("en.m.wikipedia.org", url.Domain)
assert.Equal("Entryism - Wikipedia", url.Title) assert.Equal("Entryism - Wikipedia", url.Title)
assert.Equal("", url.Description) assert.Equal("", url.Description)
assert.True(url.HasCard) assert.True(url.HasCard)
assert.False(url.HasThumbnail) assert.False(url.HasThumbnail)
} }
/** /**
* Should check if a url is a tweet url, and if so, parse it * Should check if a url is a tweet url, and if so, parse it
*/ */
func TestParseTweetUrl(t *testing.T) { func TestParseTweetUrl(t *testing.T) {
assert:= assert.New(t) assert := assert.New(t)
// Test valid tweet url // Test valid tweet url
url := "https://twitter.com/kanesays23/status/1429583672827465730" url := "https://twitter.com/kanesays23/status/1429583672827465730"
handle, id, is_ok := TryParseTweetUrl(url) handle, id, is_ok := TryParseTweetUrl(url)
assert.True(is_ok) assert.True(is_ok)
assert.Equal(UserHandle("kanesays23"), handle) assert.Equal(UserHandle("kanesays23"), handle)
assert.Equal(TweetID(1429583672827465730), id) assert.Equal(TweetID(1429583672827465730), id)
// Test url with GET params // Test url with GET params
handle, id, is_ok = TryParseTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720?s=20") handle, id, is_ok = TryParseTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720?s=20")
assert.True(is_ok) assert.True(is_ok)
assert.Equal(UserHandle("NerdNoticing"), handle) assert.Equal(UserHandle("NerdNoticing"), handle)
assert.Equal(TweetID(1263192389050654720), id) assert.Equal(TweetID(1263192389050654720), id)
// Test invalid url // Test invalid url
_, _, is_ok = TryParseTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720s=20") _, _, is_ok = TryParseTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720s=20")
assert.False(is_ok) assert.False(is_ok)
// Test empty string // Test empty string
_, _, is_ok = TryParseTweetUrl("") _, _, is_ok = TryParseTweetUrl("")
assert.False(is_ok) assert.False(is_ok)
} }
/** /**
* Should extract a user handle from a tweet URL, or fail if URL is invalid * Should extract a user handle from a tweet URL, or fail if URL is invalid
*/ */
func TestParseHandleFromTweetUrl(t *testing.T) { func TestParseHandleFromTweetUrl(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)
// Test valid tweet url // Test valid tweet url
url := "https://twitter.com/kanesays23/status/1429583672827465730" url := "https://twitter.com/kanesays23/status/1429583672827465730"
result, err := ParseHandleFromTweetUrl(url) result, err := ParseHandleFromTweetUrl(url)
assert.NoError(err) assert.NoError(err)
assert.Equal(UserHandle("kanesays23"), result) assert.Equal(UserHandle("kanesays23"), result)
// Test url with GET params // Test url with GET params
result, err = ParseHandleFromTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720?s=20") result, err = ParseHandleFromTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720?s=20")
assert.NoError(err) assert.NoError(err)
assert.Equal(UserHandle("NerdNoticing"), result) assert.Equal(UserHandle("NerdNoticing"), result)
// Test invalid url // Test invalid url
_, err = ParseHandleFromTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720s=20") _, err = ParseHandleFromTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720s=20")
assert.Error(err) assert.Error(err)
// Test empty string // Test empty string
_, err = ParseHandleFromTweetUrl("") _, err = ParseHandleFromTweetUrl("")
assert.Error(err) assert.Error(err)
} }

View File

@ -1,12 +1,12 @@
package scraper package scraper
import ( import (
"fmt" "fmt"
"strings" "path"
"regexp" "regexp"
"path" "strings"
"offline_twitter/terminal_utils" "offline_twitter/terminal_utils"
) )
const DEFAULT_PROFILE_IMAGE_URL = "https://abs.twimg.com/sticky/default_profile_images/default_profile.png" const DEFAULT_PROFILE_IMAGE_URL = "https://abs.twimg.com/sticky/default_profile_images/default_profile.png"
@ -15,47 +15,48 @@ type UserID int64
type UserHandle string type UserHandle string
func JoinArrayOfHandles(handles []UserHandle) string { func JoinArrayOfHandles(handles []UserHandle) string {
ret := []string{} ret := []string{}
for _, h := range handles { for _, h := range handles {
ret = append(ret, string(h)) ret = append(ret, string(h))
} }
return strings.Join(ret, ",") return strings.Join(ret, ",")
} }
type User struct { type User struct {
ID UserID ID UserID
DisplayName string DisplayName string
Handle UserHandle Handle UserHandle
Bio string Bio string
FollowingCount int FollowingCount int
FollowersCount int FollowersCount int
Location string Location string
Website string Website string
JoinDate Timestamp JoinDate Timestamp
IsPrivate bool IsPrivate bool
IsVerified bool IsVerified bool
IsBanned bool IsBanned bool
ProfileImageUrl string IsDeleted bool
ProfileImageLocalPath string ProfileImageUrl string
BannerImageUrl string ProfileImageLocalPath string
BannerImageLocalPath string BannerImageUrl string
BannerImageLocalPath string
PinnedTweetID TweetID PinnedTweetID TweetID
PinnedTweet *Tweet PinnedTweet *Tweet
IsFollowed bool IsFollowed bool
IsContentDownloaded bool IsContentDownloaded bool
IsNeedingFakeID bool IsNeedingFakeID bool
IsIdFake bool IsIdFake bool
} }
func (u User) String() string { func (u User) String() string {
var verified string var verified string
if u.IsVerified { if u.IsVerified {
verified = "[\u2713]" verified = "[\u2713]"
} }
ret := fmt.Sprintf( ret := fmt.Sprintf(
`%s%s `%s%s
@%s @%s
%s %s
@ -65,115 +66,112 @@ Joined %s
%s %s
%s %s
`, `,
u.DisplayName, u.DisplayName,
verified, verified,
u.Handle, u.Handle,
terminal_utils.WrapText(u.Bio, 60), terminal_utils.WrapText(u.Bio, 60),
u.FollowingCount, u.FollowingCount,
u.FollowersCount, u.FollowersCount,
terminal_utils.FormatDate(u.JoinDate.Time), terminal_utils.FormatDate(u.JoinDate.Time),
u.Location, u.Location,
u.Website, u.Website,
) )
if u.PinnedTweet != nil { if u.PinnedTweet != nil {
ret += "\n" + terminal_utils.WrapText(u.PinnedTweet.Text, 60) ret += "\n" + terminal_utils.WrapText(u.PinnedTweet.Text, 60)
} else { } else {
println("Pinned tweet id:", u.PinnedTweetID) println("Pinned tweet id:", u.PinnedTweetID)
} }
return ret return ret
} }
/** /**
* Unknown Users with handles are only created by direct GetUser calls (either `twitter fetch_user` * Unknown Users with handles are only created by direct GetUser calls (either `twitter fetch_user`
* subcommand or as part of tombstone user fetching.) * subcommand or as part of tombstone user fetching.)
*/ */
func GetUnknownUserWithHandle(handle UserHandle) User { func GetUnknownUserWithHandle(handle UserHandle) User {
return User{ return User{
ID: UserID(0), // 2^62 + 1... ID: UserID(0), // 2^62 + 1...
DisplayName: string(handle), DisplayName: string(handle),
Handle: handle, Handle: handle,
Bio: "<blank>", Bio: "<blank>",
FollowersCount: 0, FollowersCount: 0,
FollowingCount: 0, FollowingCount: 0,
Location: "<blank>", Location: "<blank>",
Website:"<blank>", Website: "<blank>",
JoinDate: TimestampFromUnix(0), JoinDate: TimestampFromUnix(0),
IsVerified: false, IsVerified: false,
IsPrivate: false, IsPrivate: false,
IsNeedingFakeID: true, IsNeedingFakeID: true,
IsIdFake: true, IsIdFake: true,
} }
} }
// Turn an APIUser, as returned from the scraper, into a properly structured User object // Turn an APIUser, as returned from the scraper, into a properly structured User object
func ParseSingleUser(apiUser APIUser) (ret User, err error) { func ParseSingleUser(apiUser APIUser) (ret User, err error) {
if apiUser.DoesntExist { if apiUser.DoesntExist {
// User may have been deleted, or there was a typo. There's no data to parse // User may have been deleted, or there was a typo. There's no data to parse
if apiUser.ScreenName == "" { if apiUser.ScreenName == "" {
panic("ScreenName is empty!") panic("ScreenName is empty!")
} }
ret = GetUnknownUserWithHandle(UserHandle(apiUser.ScreenName)) ret = GetUnknownUserWithHandle(UserHandle(apiUser.ScreenName))
return return
} }
ret.ID = UserID(apiUser.ID) ret.ID = UserID(apiUser.ID)
ret.Handle = UserHandle(apiUser.ScreenName) ret.Handle = UserHandle(apiUser.ScreenName)
if apiUser.IsBanned { if apiUser.IsBanned {
// Banned users won't have any further info, so just return here // Banned users won't have any further info, so just return here
ret.IsBanned = true ret.IsBanned = true
return return
} }
ret.DisplayName = apiUser.Name ret.DisplayName = apiUser.Name
ret.Bio = apiUser.Description ret.Bio = apiUser.Description
ret.FollowingCount = apiUser.FriendsCount ret.FollowingCount = apiUser.FriendsCount
ret.FollowersCount = apiUser.FollowersCount ret.FollowersCount = apiUser.FollowersCount
ret.Location = apiUser.Location ret.Location = apiUser.Location
if len(apiUser.Entities.URL.Urls) > 0 { if len(apiUser.Entities.URL.Urls) > 0 {
ret.Website = apiUser.Entities.URL.Urls[0].ExpandedURL ret.Website = apiUser.Entities.URL.Urls[0].ExpandedURL
} }
ret.JoinDate, err = TimestampFromString(apiUser.CreatedAt) ret.JoinDate, err = TimestampFromString(apiUser.CreatedAt)
if err != nil { if err != nil {
err = fmt.Errorf("Error parsing time on user ID %d: %w", ret.ID, err) err = fmt.Errorf("Error parsing time on user ID %d: %w", ret.ID, err)
return return
} }
ret.IsPrivate = apiUser.Protected ret.IsPrivate = apiUser.Protected
ret.IsVerified = apiUser.Verified ret.IsVerified = apiUser.Verified
ret.ProfileImageUrl = apiUser.ProfileImageURLHTTPS ret.ProfileImageUrl = apiUser.ProfileImageURLHTTPS
if regexp.MustCompile(`_normal\.\w{2,4}`).MatchString(ret.ProfileImageUrl) {
ret.ProfileImageUrl = strings.ReplaceAll(ret.ProfileImageUrl, "_normal.", ".")
}
ret.BannerImageUrl = apiUser.ProfileBannerURL
if regexp.MustCompile(`_normal\.\w{2,4}`).MatchString(ret.ProfileImageUrl) { ret.ProfileImageLocalPath = ret.compute_profile_image_local_path()
ret.ProfileImageUrl = strings.ReplaceAll(ret.ProfileImageUrl, "_normal.", ".") ret.BannerImageLocalPath = ret.compute_banner_image_local_path()
}
ret.BannerImageUrl = apiUser.ProfileBannerURL
ret.ProfileImageLocalPath = ret.compute_profile_image_local_path() if len(apiUser.PinnedTweetIdsStr) > 0 {
ret.BannerImageLocalPath = ret.compute_banner_image_local_path() ret.PinnedTweetID = TweetID(idstr_to_int(apiUser.PinnedTweetIdsStr[0]))
}
if len(apiUser.PinnedTweetIdsStr) > 0 { return
ret.PinnedTweetID = TweetID(idstr_to_int(apiUser.PinnedTweetIdsStr[0]))
}
return
} }
// Calls API#GetUser and returns the parsed result // Calls API#GetUser and returns the parsed result
func GetUser(handle UserHandle) (User, error) { func GetUser(handle UserHandle) (User, error) {
api := API{} api := API{}
apiUser, err := api.GetUser(handle) apiUser, err := api.GetUser(handle)
if apiUser.ScreenName == "" { if apiUser.ScreenName == "" {
apiUser.ScreenName = string(handle) apiUser.ScreenName = string(handle)
} }
if err != nil { if err != nil {
return User{}, err return User{}, err
} }
return ParseSingleUser(apiUser) return ParseSingleUser(apiUser)
} }
/** /**
* Make a filename for the profile image, that hopefully won't clobber other ones * Make a filename for the profile image, that hopefully won't clobber other ones
*/ */
func (u User) compute_profile_image_local_path() string { func (u User) compute_profile_image_local_path() string {
return string(u.Handle) + "_profile_" + path.Base(u.ProfileImageUrl) return string(u.Handle) + "_profile_" + path.Base(u.ProfileImageUrl)
} }
/** /**
@ -182,34 +180,34 @@ func (u User) compute_profile_image_local_path() string {
* If there is no banner image, just return nothing. * If there is no banner image, just return nothing.
*/ */
func (u User) compute_banner_image_local_path() string { func (u User) compute_banner_image_local_path() string {
if u.BannerImageUrl == "" { if u.BannerImageUrl == "" {
return "" return ""
} }
base_name := path.Base(u.BannerImageUrl) base_name := path.Base(u.BannerImageUrl)
// Check if it has an extension (e.g., ".png" or ".jpeg") // Check if it has an extension (e.g., ".png" or ".jpeg")
if !regexp.MustCompile(`\.\w{2,4}$`).MatchString(base_name) { if !regexp.MustCompile(`\.\w{2,4}$`).MatchString(base_name) {
// If it doesn't have an extension, add one // If it doesn't have an extension, add one
base_name += ".jpg" base_name += ".jpg"
} }
return string(u.Handle) + "_banner_" + base_name return string(u.Handle) + "_banner_" + base_name
} }
/** /**
* Get the URL where we would expect to find a User's tiny profile image * Get the URL where we would expect to find a User's tiny profile image
*/ */
func (u User) GetTinyProfileImageUrl() string { func (u User) GetTinyProfileImageUrl() string {
// If profile image is empty, then just use the default profile image // If profile image is empty, then just use the default profile image
if u.ProfileImageUrl == "" { if u.ProfileImageUrl == "" {
return DEFAULT_PROFILE_IMAGE_URL return DEFAULT_PROFILE_IMAGE_URL
} }
// Check that the format is as expected // Check that the format is as expected
r := regexp.MustCompile(`(\.\w{2,4})$`) r := regexp.MustCompile(`(\.\w{2,4})$`)
if !r.MatchString(u.ProfileImageUrl) { if !r.MatchString(u.ProfileImageUrl) {
panic(fmt.Errorf("Weird profile image url (here is the file extension?): %s", u.ProfileImageUrl)) panic(fmt.Errorf("Weird profile image url (here is the file extension?): %s", u.ProfileImageUrl))
} }
return r.ReplaceAllString(u.ProfileImageUrl, "_normal$1") return r.ReplaceAllString(u.ProfileImageUrl, "_normal$1")
} }
/** /**
@ -217,8 +215,8 @@ func (u User) GetTinyProfileImageUrl() string {
* If user has a blank or default profile image, return a non-personalized default path. * If user has a blank or default profile image, return a non-personalized default path.
*/ */
func (u User) GetTinyProfileImageLocalPath() string { func (u User) GetTinyProfileImageLocalPath() string {
if u.ProfileImageUrl == "" { if u.ProfileImageUrl == "" {
return path.Base(u.GetTinyProfileImageUrl()) return path.Base(u.GetTinyProfileImageUrl())
} }
return string(u.Handle) + "_profile_" + path.Base(u.GetTinyProfileImageUrl()) return string(u.Handle) + "_profile_" + path.Base(u.GetTinyProfileImageUrl())
} }

View File

@ -1,8 +1,8 @@
package scraper package scraper
import ( import (
"fmt"
"errors" "errors"
"fmt"
) )
/** /**
@ -33,7 +33,6 @@ func GetUserFeedFor(user_id UserID, min_tweets int) (trove TweetTrove, err error
return ParseTweetResponse(tweet_response) return ParseTweetResponse(tweet_response)
} }
func GetUserFeedGraphqlFor(user_id UserID, min_tweets int) (trove TweetTrove, err error) { func GetUserFeedGraphqlFor(user_id UserID, min_tweets int) (trove TweetTrove, err error) {
api := API{} api := API{}
api_response, err := api.GetGraphqlFeedFor(user_id, "") api_response, err := api.GetGraphqlFeedFor(user_id, "")

View File

@ -1,14 +1,14 @@
package scraper_test package scraper_test
import ( import (
"testing"
"encoding/json" "encoding/json"
"os"
"net/http" "net/http"
"os"
"testing"
"github.com/jarcoal/httpmock" "github.com/jarcoal/httpmock"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
. "offline_twitter/scraper" . "offline_twitter/scraper"
) )
@ -31,7 +31,7 @@ func TestParseSingleUser(t *testing.T) {
assert.Equal(UserID(44067298), user.ID) assert.Equal(UserID(44067298), user.ID)
assert.Equal("Michael Malice", user.DisplayName) assert.Equal("Michael Malice", user.DisplayName)
assert.Equal(UserHandle("michaelmalice"), user.Handle) assert.Equal(UserHandle("michaelmalice"), user.Handle)
assert.Equal("Author of Dear Reader, The New Right & The Anarchist Handbook\nHost of \"YOUR WELCOME\" \nSubject of Ego & Hubris by " + assert.Equal("Author of Dear Reader, The New Right & The Anarchist Handbook\nHost of \"YOUR WELCOME\" \nSubject of Ego & Hubris by "+
"Harvey Pekar\nUnderwear Model\nHe/Him ⚑", user.Bio) "Harvey Pekar\nUnderwear Model\nHe/Him ⚑", user.Bio)
assert.Equal(941, user.FollowingCount) assert.Equal(941, user.FollowingCount)
assert.Equal(208589, user.FollowersCount) assert.Equal(208589, user.FollowersCount)
@ -39,7 +39,7 @@ func TestParseSingleUser(t *testing.T) {
assert.Equal("https://amzn.to/3oInafv", user.Website) assert.Equal("https://amzn.to/3oInafv", user.Website)
assert.Equal(int64(1243920952), user.JoinDate.Unix()) assert.Equal(int64(1243920952), user.JoinDate.Unix())
assert.False(user.IsPrivate) assert.False(user.IsPrivate)
assert.True (user.IsVerified) assert.True(user.IsVerified)
assert.False(user.IsBanned) assert.False(user.IsBanned)
assert.Equal("https://pbs.twimg.com/profile_images/1064051934812913664/Lbwdb_C9.jpg", user.ProfileImageUrl) assert.Equal("https://pbs.twimg.com/profile_images/1064051934812913664/Lbwdb_C9.jpg", user.ProfileImageUrl)
assert.Equal("https://pbs.twimg.com/profile_images/1064051934812913664/Lbwdb_C9_normal.jpg", user.GetTinyProfileImageUrl()) assert.Equal("https://pbs.twimg.com/profile_images/1064051934812913664/Lbwdb_C9_normal.jpg", user.GetTinyProfileImageUrl())
@ -90,7 +90,7 @@ func TestParseDeletedUser(t *testing.T) {
handle := "Some Random Deleted User" handle := "Some Random Deleted User"
apiUser := user_resp.ConvertToAPIUser() apiUser := user_resp.ConvertToAPIUser()
apiUser.ScreenName = string(handle) // This is done in scraper.GetUser, since users are retrieved by handle anyway apiUser.ScreenName = string(handle) // This is done in scraper.GetUser, since users are retrieved by handle anyway
user, err := ParseSingleUser(apiUser) user, err := ParseSingleUser(apiUser)
require.NoError(t, err) require.NoError(t, err)

View File

@ -1,9 +1,9 @@
package scraper package scraper
import ( import (
"fmt" "fmt"
"sort" "path"
"path" "sort"
) )
type VideoID int64 type VideoID int64
@ -12,61 +12,61 @@ type VideoID int64
// from someone else). // from someone else).
type Video struct { type Video struct {
ID VideoID ID VideoID
TweetID TweetID TweetID TweetID
Width int Width int
Height int Height int
RemoteURL string RemoteURL string
LocalFilename string LocalFilename string
ThumbnailRemoteUrl string ThumbnailRemoteUrl string
ThumbnailLocalPath string `db:"thumbnail_local_filename"` ThumbnailLocalPath string `db:"thumbnail_local_filename"`
Duration int // milliseconds Duration int // milliseconds
ViewCount int ViewCount int
IsDownloaded bool IsDownloaded bool
IsGif bool IsGif bool
} }
func ParseAPIVideo(apiVideo APIExtendedMedia, tweet_id TweetID) Video { func ParseAPIVideo(apiVideo APIExtendedMedia, tweet_id TweetID) Video {
variants := apiVideo.VideoInfo.Variants variants := apiVideo.VideoInfo.Variants
sort.Sort(variants) sort.Sort(variants)
var view_count int var view_count int
r := apiVideo.Ext.MediaStats.R r := apiVideo.Ext.MediaStats.R
switch r.(type) { switch r.(type) {
case string: case string:
view_count = 0 view_count = 0
case map[string]interface{}: case map[string]interface{}:
OK_entry, ok := r.(map[string]interface{})["ok"] OK_entry, ok := r.(map[string]interface{})["ok"]
if !ok { if !ok {
panic("No 'ok' value found in the R!") panic("No 'ok' value found in the R!")
} }
view_count_str, ok := OK_entry.(map[string]interface{})["viewCount"] view_count_str, ok := OK_entry.(map[string]interface{})["viewCount"]
view_count = int_or_panic(view_count_str.(string)) view_count = int_or_panic(view_count_str.(string))
if !ok { if !ok {
panic("No 'viewCount' value found in the OK!") panic("No 'viewCount' value found in the OK!")
} }
} }
local_filename := fmt.Sprintf("%d.mp4", tweet_id) local_filename := fmt.Sprintf("%d.mp4", tweet_id)
return Video{ return Video{
ID: VideoID(apiVideo.ID), ID: VideoID(apiVideo.ID),
TweetID: tweet_id, TweetID: tweet_id,
Width: apiVideo.OriginalInfo.Width, Width: apiVideo.OriginalInfo.Width,
Height: apiVideo.OriginalInfo.Height, Height: apiVideo.OriginalInfo.Height,
RemoteURL: variants[0].URL, RemoteURL: variants[0].URL,
LocalFilename: local_filename, LocalFilename: local_filename,
ThumbnailRemoteUrl: apiVideo.MediaURLHttps, ThumbnailRemoteUrl: apiVideo.MediaURLHttps,
ThumbnailLocalPath: path.Base(apiVideo.MediaURLHttps), ThumbnailLocalPath: path.Base(apiVideo.MediaURLHttps),
Duration: apiVideo.VideoInfo.Duration, Duration: apiVideo.VideoInfo.Duration,
ViewCount: view_count, ViewCount: view_count,
IsDownloaded: false, IsDownloaded: false,
IsGif: apiVideo.Type == "animated_gif", IsGif: apiVideo.Type == "animated_gif",
} }
} }

View File

@ -1,37 +1,37 @@
package scraper_test package scraper_test
import ( import (
"testing" "encoding/json"
"os" "os"
"encoding/json" "testing"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
. "offline_twitter/scraper" . "offline_twitter/scraper"
) )
func TestParseAPIVideo(t *testing.T) { func TestParseAPIVideo(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)
data, err := os.ReadFile("test_responses/tweet_content/video.json") data, err := os.ReadFile("test_responses/tweet_content/video.json")
if err != nil { if err != nil {
panic(err) panic(err)
} }
var apivideo APIExtendedMedia var apivideo APIExtendedMedia
err = json.Unmarshal(data, &apivideo) err = json.Unmarshal(data, &apivideo)
require.NoError(t, err) require.NoError(t, err)
tweet_id := TweetID(28) tweet_id := TweetID(28)
video := ParseAPIVideo(apivideo, tweet_id) video := ParseAPIVideo(apivideo, tweet_id)
assert.Equal(VideoID(1418951950020845568), video.ID) assert.Equal(VideoID(1418951950020845568), video.ID)
assert.Equal(tweet_id, video.TweetID) assert.Equal(tweet_id, video.TweetID)
assert.Equal(1280, video.Height) assert.Equal(1280, video.Height)
assert.Equal(720, video.Width) assert.Equal(720, video.Width)
assert.Equal("https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12", video.RemoteURL) assert.Equal("https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12", video.RemoteURL)
assert.Equal("28.mp4", video.LocalFilename) assert.Equal("28.mp4", video.LocalFilename)
assert.Equal("https://pbs.twimg.com/ext_tw_video_thumb/1418951950020845568/pu/img/eUTaYYfuAJ8FyjUi.jpg", video.ThumbnailRemoteUrl) assert.Equal("https://pbs.twimg.com/ext_tw_video_thumb/1418951950020845568/pu/img/eUTaYYfuAJ8FyjUi.jpg", video.ThumbnailRemoteUrl)
assert.Equal("eUTaYYfuAJ8FyjUi.jpg", video.ThumbnailLocalPath) assert.Equal("eUTaYYfuAJ8FyjUi.jpg", video.ThumbnailLocalPath)
assert.Equal(275952, video.ViewCount) assert.Equal(275952, video.ViewCount)
assert.Equal(88300, video.Duration) assert.Equal(88300, video.Duration)
assert.False(video.IsDownloaded) assert.False(video.IsDownloaded)
} }