Add 'gofmt' linter
This commit is contained in:
parent
223734d001
commit
d1d80a91cd
@ -27,6 +27,7 @@ linters:
|
|||||||
- wrapcheck
|
- wrapcheck
|
||||||
- lll
|
- lll
|
||||||
- godox
|
- godox
|
||||||
|
- gofmt
|
||||||
- errorlint
|
- errorlint
|
||||||
- nolintlint
|
- nolintlint
|
||||||
|
|
||||||
@ -203,9 +204,9 @@ linters-settings:
|
|||||||
keywords: # default keywords are TODO, BUG, and FIXME, these can be overwritten by this setting
|
keywords: # default keywords are TODO, BUG, and FIXME, these can be overwritten by this setting
|
||||||
- XXX
|
- XXX
|
||||||
|
|
||||||
# gofmt:
|
gofmt:
|
||||||
# # simplify code: gofmt with `-s` option, true by default
|
# simplify code: gofmt with `-s` option, true by default
|
||||||
# simplify: true
|
simplify: true
|
||||||
|
|
||||||
# gofumpt:
|
# gofumpt:
|
||||||
# # Select the Go version to target. The default is `1.15`.
|
# # Select the Go version to target. The default is `1.15`.
|
||||||
|
@ -6,9 +6,9 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"math/rand"
|
"math/rand"
|
||||||
|
|
||||||
|
"github.com/go-test/deep"
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
"github.com/go-test/deep"
|
|
||||||
|
|
||||||
"offline_twitter/scraper"
|
"offline_twitter/scraper"
|
||||||
)
|
)
|
||||||
@ -76,7 +76,7 @@ func TestModifyUser(t *testing.T) {
|
|||||||
fake_user.FollowersCount = 2000
|
fake_user.FollowersCount = 2000
|
||||||
fake_user.JoinDate = scraper.TimestampFromUnix(2000)
|
fake_user.JoinDate = scraper.TimestampFromUnix(2000)
|
||||||
fake_user.ProfileImageUrl = "asdf2"
|
fake_user.ProfileImageUrl = "asdf2"
|
||||||
fake_user.IsContentDownloaded = false // test No Worsening
|
fake_user.IsContentDownloaded = false // test No Worsening
|
||||||
|
|
||||||
// Save the modified user
|
// Save the modified user
|
||||||
err = profile.SaveUser(&fake_user)
|
err = profile.SaveUser(&fake_user)
|
||||||
|
@ -3,9 +3,9 @@ package persistence
|
|||||||
import (
|
import (
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"os"
|
||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
"os"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
var NotInDatabase = errors.New("Not in database")
|
var NotInDatabase = errors.New("Not in database")
|
||||||
@ -35,7 +35,7 @@ func file_exists(path string) bool {
|
|||||||
* https://stackoverflow.com/questions/56616196/how-to-convert-camel-case-string-to-snake-case#56616250
|
* https://stackoverflow.com/questions/56616196/how-to-convert-camel-case-string-to-snake-case#56616250
|
||||||
*/
|
*/
|
||||||
func ToSnakeCase(str string) string {
|
func ToSnakeCase(str string) string {
|
||||||
snake := regexp.MustCompile("(.)_?([A-Z][a-z]+)").ReplaceAllString(str, "${1}_${2}")
|
snake := regexp.MustCompile("(.)_?([A-Z][a-z]+)").ReplaceAllString(str, "${1}_${2}")
|
||||||
snake = regexp.MustCompile("([a-z0-9])_?([A-Z])").ReplaceAllString(snake, "${1}_${2}")
|
snake = regexp.MustCompile("([a-z0-9])_?([A-Z])").ReplaceAllString(snake, "${1}_${2}")
|
||||||
return strings.ToLower(snake)
|
return strings.ToLower(snake)
|
||||||
}
|
}
|
||||||
|
@ -70,13 +70,13 @@ func create_stable_user() scraper.User {
|
|||||||
func create_image_from_id(id int) scraper.Image {
|
func create_image_from_id(id int) scraper.Image {
|
||||||
filename := fmt.Sprintf("image%d.jpg", id)
|
filename := fmt.Sprintf("image%d.jpg", id)
|
||||||
return scraper.Image{
|
return scraper.Image{
|
||||||
ID: scraper.ImageID(id),
|
ID: scraper.ImageID(id),
|
||||||
TweetID: -1,
|
TweetID: -1,
|
||||||
Width: id * 10,
|
Width: id * 10,
|
||||||
Height: id * 5,
|
Height: id * 5,
|
||||||
RemoteURL: filename,
|
RemoteURL: filename,
|
||||||
LocalFilename: filename,
|
LocalFilename: filename,
|
||||||
IsDownloaded: false,
|
IsDownloaded: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -86,18 +86,18 @@ func create_image_from_id(id int) scraper.Image {
|
|||||||
func create_video_from_id(id int) scraper.Video {
|
func create_video_from_id(id int) scraper.Video {
|
||||||
filename := fmt.Sprintf("video%d.jpg", id)
|
filename := fmt.Sprintf("video%d.jpg", id)
|
||||||
return scraper.Video{
|
return scraper.Video{
|
||||||
ID: scraper.VideoID(id),
|
ID: scraper.VideoID(id),
|
||||||
TweetID: -1,
|
TweetID: -1,
|
||||||
Width: id * 10,
|
Width: id * 10,
|
||||||
Height: id * 5,
|
Height: id * 5,
|
||||||
RemoteURL: filename,
|
RemoteURL: filename,
|
||||||
LocalFilename: filename,
|
LocalFilename: filename,
|
||||||
ThumbnailRemoteUrl: filename,
|
ThumbnailRemoteUrl: filename,
|
||||||
ThumbnailLocalPath: filename,
|
ThumbnailLocalPath: filename,
|
||||||
Duration: 10000,
|
Duration: 10000,
|
||||||
ViewCount: 200,
|
ViewCount: 200,
|
||||||
IsDownloaded: false,
|
IsDownloaded: false,
|
||||||
IsGif: false,
|
IsGif: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -265,13 +265,13 @@ func create_dummy_tombstone() scraper.Tweet {
|
|||||||
tweet_id := scraper.TweetID(rand.Int())
|
tweet_id := scraper.TweetID(rand.Int())
|
||||||
|
|
||||||
return scraper.Tweet{
|
return scraper.Tweet{
|
||||||
ID: tweet_id,
|
ID: tweet_id,
|
||||||
UserID: -1,
|
UserID: -1,
|
||||||
TombstoneType: "deleted",
|
TombstoneType: "deleted",
|
||||||
IsStub: true,
|
IsStub: true,
|
||||||
Mentions: []scraper.UserHandle{},
|
Mentions: []scraper.UserHandle{},
|
||||||
ReplyMentions: []scraper.UserHandle{},
|
ReplyMentions: []scraper.UserHandle{},
|
||||||
Hashtags: []string{},
|
Hashtags: []string{},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -7,29 +7,27 @@ import (
|
|||||||
"offline_twitter/terminal_utils"
|
"offline_twitter/terminal_utils"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
const ENGINE_DATABASE_VERSION = 11
|
const ENGINE_DATABASE_VERSION = 11
|
||||||
|
|
||||||
|
|
||||||
type VersionMismatchError struct {
|
type VersionMismatchError struct {
|
||||||
EngineVersion int
|
EngineVersion int
|
||||||
DatabaseVersion int
|
DatabaseVersion int
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e VersionMismatchError) Error() string {
|
func (e VersionMismatchError) Error() string {
|
||||||
return fmt.Sprintf(
|
return fmt.Sprintf(
|
||||||
`This profile was created with database schema version %d, which is newer than this application's database schema version, %d.
|
`This profile was created with database schema version %d, which is newer than this application's database schema version, %d.
|
||||||
Please upgrade this application to a newer version to use this profile. Or downgrade the profile's schema version, somehow.`,
|
Please upgrade this application to a newer version to use this profile. Or downgrade the profile's schema version, somehow.`,
|
||||||
e.DatabaseVersion, e.EngineVersion,
|
e.DatabaseVersion, e.EngineVersion,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The Nth entry is the migration that moves you from version N to version N+1.
|
* The Nth entry is the migration that moves you from version N to version N+1.
|
||||||
* `len(MIGRATIONS)` should always equal `ENGINE_DATABASE_VERSION`.
|
* `len(MIGRATIONS)` should always equal `ENGINE_DATABASE_VERSION`.
|
||||||
*/
|
*/
|
||||||
var MIGRATIONS = []string{
|
var MIGRATIONS = []string{
|
||||||
`create table polls (rowid integer primary key,
|
`create table polls (rowid integer primary key,
|
||||||
id integer unique not null check(typeof(id) = 'integer'),
|
id integer unique not null check(typeof(id) = 'integer'),
|
||||||
tweet_id integer not null,
|
tweet_id integer not null,
|
||||||
num_choices integer not null,
|
num_choices integer not null,
|
||||||
@ -50,25 +48,25 @@ var MIGRATIONS = []string{
|
|||||||
|
|
||||||
foreign key(tweet_id) references tweets(id)
|
foreign key(tweet_id) references tweets(id)
|
||||||
);`,
|
);`,
|
||||||
`alter table tweets add column is_conversation_scraped boolean default 0;
|
`alter table tweets add column is_conversation_scraped boolean default 0;
|
||||||
alter table tweets add column last_scraped_at integer not null default 0`,
|
alter table tweets add column last_scraped_at integer not null default 0`,
|
||||||
`update tombstone_types set tombstone_text = 'This Tweet is from a suspended account' where rowid = 2;
|
`update tombstone_types set tombstone_text = 'This Tweet is from a suspended account' where rowid = 2;
|
||||||
insert into tombstone_types (rowid, short_name, tombstone_text)
|
insert into tombstone_types (rowid, short_name, tombstone_text)
|
||||||
values (5, 'violated', 'This Tweet violated the Twitter Rules'),
|
values (5, 'violated', 'This Tweet violated the Twitter Rules'),
|
||||||
(6, 'no longer exists', 'This Tweet is from an account that no longer exists')`,
|
(6, 'no longer exists', 'This Tweet is from an account that no longer exists')`,
|
||||||
`alter table videos add column thumbnail_remote_url text not null default "missing";
|
`alter table videos add column thumbnail_remote_url text not null default "missing";
|
||||||
alter table videos add column thumbnail_local_filename text not null default "missing"`,
|
alter table videos add column thumbnail_local_filename text not null default "missing"`,
|
||||||
`alter table videos add column duration integer not null default 0;
|
`alter table videos add column duration integer not null default 0;
|
||||||
alter table videos add column view_count integer not null default 0`,
|
alter table videos add column view_count integer not null default 0`,
|
||||||
`alter table users add column is_banned boolean default 0`,
|
`alter table users add column is_banned boolean default 0`,
|
||||||
`alter table urls add column short_text text not null default ""`,
|
`alter table urls add column short_text text not null default ""`,
|
||||||
`insert into tombstone_types (rowid, short_name, tombstone_text) values (7, 'age-restricted', 'Age-restricted adult content. '
|
`insert into tombstone_types (rowid, short_name, tombstone_text) values (7, 'age-restricted', 'Age-restricted adult content. '
|
||||||
|| 'This content might not be appropriate for people under 18 years old. To view this media, you’ll need to log in to Twitter')`,
|
|| 'This content might not be appropriate for people under 18 years old. To view this media, you’ll need to log in to Twitter')`,
|
||||||
`alter table users add column is_followed boolean default 0`,
|
`alter table users add column is_followed boolean default 0`,
|
||||||
`create table fake_user_sequence(latest_fake_id integer not null);
|
`create table fake_user_sequence(latest_fake_id integer not null);
|
||||||
insert into fake_user_sequence values(0x4000000000000000);
|
insert into fake_user_sequence values(0x4000000000000000);
|
||||||
alter table users add column is_id_fake boolean default 0;`,
|
alter table users add column is_id_fake boolean default 0;`,
|
||||||
`delete from urls where rowid in (select urls.rowid from tweets join urls on tweets.id = urls.tweet_id where urls.text like
|
`delete from urls where rowid in (select urls.rowid from tweets join urls on tweets.id = urls.tweet_id where urls.text like
|
||||||
'https://twitter.com/%/status/' || tweets.quoted_tweet_id || "%")`,
|
'https://twitter.com/%/status/' || tweets.quoted_tweet_id || "%")`,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2,12 +2,13 @@ package persistence_test
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
"offline_twitter/scraper"
|
|
||||||
"offline_twitter/persistence"
|
"offline_twitter/persistence"
|
||||||
|
"offline_twitter/scraper"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestVersionUpgrade(t *testing.T) {
|
func TestVersionUpgrade(t *testing.T) {
|
||||||
@ -25,7 +26,7 @@ func TestVersionUpgrade(t *testing.T) {
|
|||||||
require.False(profile.IsTweetInDatabase(test_tweet_id), "Test tweet shouldn't be in db yet")
|
require.False(profile.IsTweetInDatabase(test_tweet_id), "Test tweet shouldn't be in db yet")
|
||||||
|
|
||||||
persistence.MIGRATIONS = append(persistence.MIGRATIONS, test_migration)
|
persistence.MIGRATIONS = append(persistence.MIGRATIONS, test_migration)
|
||||||
err := profile.UpgradeFromXToY(persistence.ENGINE_DATABASE_VERSION, persistence.ENGINE_DATABASE_VERSION + 1)
|
err := profile.UpgradeFromXToY(persistence.ENGINE_DATABASE_VERSION, persistence.ENGINE_DATABASE_VERSION+1)
|
||||||
require.NoError(err)
|
require.NoError(err)
|
||||||
|
|
||||||
require.True(profile.IsTweetInDatabase(test_tweet_id), "Migration should have created the tweet, but it didn't")
|
require.True(profile.IsTweetInDatabase(test_tweet_id), "Migration should have created the tweet, but it didn't")
|
||||||
|
@ -5,8 +5,8 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
END_OF_FEED = fmt.Errorf("End of feed")
|
END_OF_FEED = fmt.Errorf("End of feed")
|
||||||
DOESNT_EXIST = fmt.Errorf("Doesn't exist")
|
DOESNT_EXIST = fmt.Errorf("Doesn't exist")
|
||||||
EXTERNAL_API_ERROR = fmt.Errorf("Unexpected result from external API")
|
EXTERNAL_API_ERROR = fmt.Errorf("Unexpected result from external API")
|
||||||
API_PARSE_ERROR = fmt.Errorf("Couldn't parse the result returned from the API")
|
API_PARSE_ERROR = fmt.Errorf("Couldn't parse the result returned from the API")
|
||||||
)
|
)
|
||||||
|
@ -1,33 +1,33 @@
|
|||||||
package scraper
|
package scraper
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"html"
|
"html"
|
||||||
"time"
|
|
||||||
"strings"
|
|
||||||
"encoding/json"
|
|
||||||
"strconv"
|
|
||||||
"sort"
|
"sort"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
type APIMedia struct {
|
type APIMedia struct {
|
||||||
ID int64 `json:"id_str,string"`
|
ID int64 `json:"id_str,string"`
|
||||||
MediaURLHttps string `json:"media_url_https"`
|
MediaURLHttps string `json:"media_url_https"`
|
||||||
Type string `json:"type"`
|
Type string `json:"type"`
|
||||||
URL string `json:"url"`
|
URL string `json:"url"`
|
||||||
OriginalInfo struct {
|
OriginalInfo struct {
|
||||||
Width int `json:"width"`
|
Width int `json:"width"`
|
||||||
Height int `json:"height"`
|
Height int `json:"height"`
|
||||||
} `json:"original_info"`
|
} `json:"original_info"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type SortableVariants []struct {
|
type SortableVariants []struct {
|
||||||
Bitrate int `json:"bitrate,omitempty"`
|
Bitrate int `json:"bitrate,omitempty"`
|
||||||
URL string `json:"url"`
|
URL string `json:"url"`
|
||||||
}
|
}
|
||||||
func (v SortableVariants) Len() int { return len(v) }
|
|
||||||
func (v SortableVariants) Swap(i, j int) { v[i], v[j] = v[j], v[i] }
|
func (v SortableVariants) Len() int { return len(v) }
|
||||||
|
func (v SortableVariants) Swap(i, j int) { v[i], v[j] = v[j], v[i] }
|
||||||
func (v SortableVariants) Less(i, j int) bool { return v[i].Bitrate > v[j].Bitrate }
|
func (v SortableVariants) Less(i, j int) bool { return v[i].Bitrate > v[j].Bitrate }
|
||||||
|
|
||||||
type APIExtendedMedia struct {
|
type APIExtendedMedia struct {
|
||||||
@ -35,12 +35,12 @@ type APIExtendedMedia struct {
|
|||||||
MediaURLHttps string `json:"media_url_https"`
|
MediaURLHttps string `json:"media_url_https"`
|
||||||
Type string `json:"type"`
|
Type string `json:"type"`
|
||||||
VideoInfo struct {
|
VideoInfo struct {
|
||||||
Variants SortableVariants `json:"variants"`
|
Variants SortableVariants `json:"variants"`
|
||||||
Duration int `json:"duration_millis"`
|
Duration int `json:"duration_millis"`
|
||||||
} `json:"video_info"`
|
} `json:"video_info"`
|
||||||
OriginalInfo struct {
|
OriginalInfo struct {
|
||||||
Width int `json:"width"`
|
Width int `json:"width"`
|
||||||
Height int `json:"height"`
|
Height int `json:"height"`
|
||||||
} `json:"original_info"`
|
} `json:"original_info"`
|
||||||
Ext struct {
|
Ext struct {
|
||||||
MediaStats struct {
|
MediaStats struct {
|
||||||
@ -74,9 +74,9 @@ type APICard struct {
|
|||||||
} `json:"description"`
|
} `json:"description"`
|
||||||
Thumbnail struct {
|
Thumbnail struct {
|
||||||
ImageValue struct {
|
ImageValue struct {
|
||||||
Url string `json:"url"`
|
Url string `json:"url"`
|
||||||
Width int `json:"width"`
|
Width int `json:"width"`
|
||||||
Height int `json:"height"`
|
Height int `json:"height"`
|
||||||
} `json:"image_value"`
|
} `json:"image_value"`
|
||||||
} `json:"thumbnail_image_large"`
|
} `json:"thumbnail_image_large"`
|
||||||
PlayerImage struct {
|
PlayerImage struct {
|
||||||
@ -128,18 +128,18 @@ type APICard struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type APITweet struct {
|
type APITweet struct {
|
||||||
ID int64 `json:"id_str,string"`
|
ID int64 `json:"id_str,string"`
|
||||||
ConversationID int64 `json:"conversation_id_str,string"`
|
ConversationID int64 `json:"conversation_id_str,string"`
|
||||||
CreatedAt string `json:"created_at"`
|
CreatedAt string `json:"created_at"`
|
||||||
FavoriteCount int `json:"favorite_count"`
|
FavoriteCount int `json:"favorite_count"`
|
||||||
FullText string `json:"full_text"`
|
FullText string `json:"full_text"`
|
||||||
DisplayTextRange []int `json:"display_text_range"`
|
DisplayTextRange []int `json:"display_text_range"`
|
||||||
Entities struct {
|
Entities struct {
|
||||||
Hashtags []struct {
|
Hashtags []struct {
|
||||||
Text string `json:"text"`
|
Text string `json:"text"`
|
||||||
} `json:"hashtags"`
|
} `json:"hashtags"`
|
||||||
Media []APIMedia `json:"media"`
|
Media []APIMedia `json:"media"`
|
||||||
URLs []struct {
|
URLs []struct {
|
||||||
ExpandedURL string `json:"expanded_url"`
|
ExpandedURL string `json:"expanded_url"`
|
||||||
ShortenedUrl string `json:"url"`
|
ShortenedUrl string `json:"url"`
|
||||||
} `json:"urls"`
|
} `json:"urls"`
|
||||||
@ -147,30 +147,30 @@ type APITweet struct {
|
|||||||
UserName string `json:"screen_name"`
|
UserName string `json:"screen_name"`
|
||||||
UserID int64 `json:"id_str,string"`
|
UserID int64 `json:"id_str,string"`
|
||||||
} `json:"user_mentions"`
|
} `json:"user_mentions"`
|
||||||
ReplyMentions string // The leading part of the text which is cut off by "DisplayTextRange"
|
ReplyMentions string // The leading part of the text which is cut off by "DisplayTextRange"
|
||||||
} `json:"entities"`
|
} `json:"entities"`
|
||||||
ExtendedEntities struct {
|
ExtendedEntities struct {
|
||||||
Media []APIExtendedMedia `json:"media"`
|
Media []APIExtendedMedia `json:"media"`
|
||||||
} `json:"extended_entities"`
|
} `json:"extended_entities"`
|
||||||
InReplyToStatusID int64 `json:"in_reply_to_status_id_str,string"`
|
InReplyToStatusID int64 `json:"in_reply_to_status_id_str,string"`
|
||||||
InReplyToUserID int64 `json:"in_reply_to_user_id_str,string"`
|
InReplyToUserID int64 `json:"in_reply_to_user_id_str,string"`
|
||||||
InReplyToScreenName string `json:"in_reply_to_screen_name"`
|
InReplyToScreenName string `json:"in_reply_to_screen_name"`
|
||||||
ReplyCount int `json:"reply_count"`
|
ReplyCount int `json:"reply_count"`
|
||||||
RetweetCount int `json:"retweet_count"`
|
RetweetCount int `json:"retweet_count"`
|
||||||
QuoteCount int `json:"quote_count"`
|
QuoteCount int `json:"quote_count"`
|
||||||
RetweetedStatusIDStr string `json:"retweeted_status_id_str"` // Can be empty string
|
RetweetedStatusIDStr string `json:"retweeted_status_id_str"` // Can be empty string
|
||||||
RetweetedStatusID int64
|
RetweetedStatusID int64
|
||||||
QuotedStatusIDStr string `json:"quoted_status_id_str"` // Can be empty string
|
QuotedStatusIDStr string `json:"quoted_status_id_str"` // Can be empty string
|
||||||
QuotedStatusID int64
|
QuotedStatusID int64
|
||||||
QuotedStatusPermalink struct {
|
QuotedStatusPermalink struct {
|
||||||
ShortURL string `json:"url"`
|
ShortURL string `json:"url"`
|
||||||
ExpandedURL string `json:"expanded"`
|
ExpandedURL string `json:"expanded"`
|
||||||
} `json:"quoted_status_permalink"`
|
} `json:"quoted_status_permalink"`
|
||||||
Time time.Time `json:"time"`
|
Time time.Time `json:"time"`
|
||||||
UserID int64 `json:"user_id_str,string"`
|
UserID int64 `json:"user_id_str,string"`
|
||||||
UserHandle string
|
UserHandle string
|
||||||
Card APICard `json:"card"`
|
Card APICard `json:"card"`
|
||||||
TombstoneText string
|
TombstoneText string
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *APITweet) NormalizeContent() {
|
func (t *APITweet) NormalizeContent() {
|
||||||
@ -183,7 +183,7 @@ func (t *APITweet) NormalizeContent() {
|
|||||||
t.RetweetedStatusID = int64(id)
|
t.RetweetedStatusID = int64(id)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (len(t.DisplayTextRange) == 2) {
|
if len(t.DisplayTextRange) == 2 {
|
||||||
t.Entities.ReplyMentions = strings.TrimSpace(string([]rune(t.FullText)[0:t.DisplayTextRange[0]]))
|
t.Entities.ReplyMentions = strings.TrimSpace(string([]rune(t.FullText)[0:t.DisplayTextRange[0]]))
|
||||||
t.FullText = string([]rune(t.FullText)[t.DisplayTextRange[0]:t.DisplayTextRange[1]])
|
t.FullText = string([]rune(t.FullText)[t.DisplayTextRange[0]:t.DisplayTextRange[1]])
|
||||||
}
|
}
|
||||||
@ -217,7 +217,6 @@ func (t APITweet) String() string {
|
|||||||
return string(data)
|
return string(data)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
type APIUser struct {
|
type APIUser struct {
|
||||||
CreatedAt string `json:"created_at"`
|
CreatedAt string `json:"created_at"`
|
||||||
Description string `json:"description"`
|
Description string `json:"description"`
|
||||||
@ -235,7 +234,7 @@ type APIUser struct {
|
|||||||
ListedCount int `json:"listed_count"`
|
ListedCount int `json:"listed_count"`
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
Location string `json:"location"`
|
Location string `json:"location"`
|
||||||
PinnedTweetIdsStr []string `json:"pinned_tweet_ids_str"` // Dunno how to type-convert an array
|
PinnedTweetIdsStr []string `json:"pinned_tweet_ids_str"` // Dunno how to type-convert an array
|
||||||
ProfileBannerURL string `json:"profile_banner_url"`
|
ProfileBannerURL string `json:"profile_banner_url"`
|
||||||
ProfileImageURLHTTPS string `json:"profile_image_url_https"`
|
ProfileImageURLHTTPS string `json:"profile_image_url_https"`
|
||||||
Protected bool `json:"protected"`
|
Protected bool `json:"protected"`
|
||||||
@ -246,7 +245,6 @@ type APIUser struct {
|
|||||||
DoesntExist bool
|
DoesntExist bool
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
type UserResponse struct {
|
type UserResponse struct {
|
||||||
Data struct {
|
Data struct {
|
||||||
User struct {
|
User struct {
|
||||||
@ -255,11 +253,12 @@ type UserResponse struct {
|
|||||||
} `json:"user"`
|
} `json:"user"`
|
||||||
} `json:"data"`
|
} `json:"data"`
|
||||||
Errors []struct {
|
Errors []struct {
|
||||||
Message string `json:"message"`
|
Message string `json:"message"`
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
Code int `json:"code"`
|
Code int `json:"code"`
|
||||||
} `json:"errors"`
|
} `json:"errors"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (u UserResponse) ConvertToAPIUser() APIUser {
|
func (u UserResponse) ConvertToAPIUser() APIUser {
|
||||||
ret := u.Data.User.Legacy
|
ret := u.Data.User.Legacy
|
||||||
ret.ID = u.Data.User.ID
|
ret.ID = u.Data.User.ID
|
||||||
@ -279,9 +278,9 @@ func (u UserResponse) ConvertToAPIUser() APIUser {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type Entry struct {
|
type Entry struct {
|
||||||
EntryID string `json:"entryId"`
|
EntryID string `json:"entryId"`
|
||||||
SortIndex int64 `json:"sortIndex,string"`
|
SortIndex int64 `json:"sortIndex,string"`
|
||||||
Content struct {
|
Content struct {
|
||||||
Item struct {
|
Item struct {
|
||||||
Content struct {
|
Content struct {
|
||||||
Tombstone struct {
|
Tombstone struct {
|
||||||
@ -303,12 +302,15 @@ type Entry struct {
|
|||||||
} `json:"operation"`
|
} `json:"operation"`
|
||||||
} `json:"content"`
|
} `json:"content"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e Entry) GetTombstoneText() string {
|
func (e Entry) GetTombstoneText() string {
|
||||||
return e.Content.Item.Content.Tombstone.TombstoneInfo.RichText.Text
|
return e.Content.Item.Content.Tombstone.TombstoneInfo.RichText.Text
|
||||||
}
|
}
|
||||||
|
|
||||||
type SortableEntries []Entry
|
type SortableEntries []Entry
|
||||||
func (e SortableEntries) Len() int { return len(e) }
|
|
||||||
func (e SortableEntries) Swap(i, j int) { e[i], e[j] = e[j], e[i] }
|
func (e SortableEntries) Len() int { return len(e) }
|
||||||
|
func (e SortableEntries) Swap(i, j int) { e[i], e[j] = e[j], e[i] }
|
||||||
func (e SortableEntries) Less(i, j int) bool { return e[i].SortIndex > e[j].SortIndex }
|
func (e SortableEntries) Less(i, j int) bool { return e[i].SortIndex > e[j].SortIndex }
|
||||||
|
|
||||||
type TweetResponse struct {
|
type TweetResponse struct {
|
||||||
@ -329,15 +331,16 @@ type TweetResponse struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var tombstone_types = map[string]string{
|
var tombstone_types = map[string]string{
|
||||||
"This Tweet was deleted by the Tweet author. Learn more": "deleted",
|
"This Tweet was deleted by the Tweet author. Learn more": "deleted",
|
||||||
"This Tweet is from a suspended account. Learn more": "suspended",
|
"This Tweet is from a suspended account. Learn more": "suspended",
|
||||||
"You’re unable to view this Tweet because this account owner limits who can view their Tweets. Learn more": "hidden",
|
"You’re unable to view this Tweet because this account owner limits who can view their Tweets. Learn more": "hidden",
|
||||||
"This Tweet is unavailable. Learn more": "unavailable",
|
"This Tweet is unavailable. Learn more": "unavailable",
|
||||||
"This Tweet violated the Twitter Rules. Learn more": "violated",
|
"This Tweet violated the Twitter Rules. Learn more": "violated",
|
||||||
"This Tweet is from an account that no longer exists. Learn more": "no longer exists",
|
"This Tweet is from an account that no longer exists. Learn more": "no longer exists",
|
||||||
"Age-restricted adult content. This content might not be appropriate for people under 18 years old. To view this media, " +
|
"Age-restricted adult content. This content might not be appropriate for people under 18 years old. To view this media, " +
|
||||||
"you’ll need to log in to Twitter. Learn more": "age-restricted",
|
"you’ll need to log in to Twitter. Learn more": "age-restricted",
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Insert tweets into GlobalObjects for each tombstone. Returns a list of users that need to
|
* Insert tweets into GlobalObjects for each tombstone. Returns a list of users that need to
|
||||||
* be fetched for tombstones.
|
* be fetched for tombstones.
|
||||||
@ -379,8 +382,8 @@ func (t *TweetResponse) HandleTombstones() []UserHandle {
|
|||||||
if entry.GetTombstoneText() != "" {
|
if entry.GetTombstoneText() != "" {
|
||||||
// Try to reconstruct the tombstone tweet
|
// Try to reconstruct the tombstone tweet
|
||||||
var tombstoned_tweet APITweet
|
var tombstoned_tweet APITweet
|
||||||
tombstoned_tweet.ID = int64(i) // Set a default to prevent clobbering other tombstones
|
tombstoned_tweet.ID = int64(i) // Set a default to prevent clobbering other tombstones
|
||||||
if i + 1 < len(entries) && entries[i+1].Content.Item.Content.Tweet.ID != 0 {
|
if i+1 < len(entries) && entries[i+1].Content.Item.Content.Tweet.ID != 0 {
|
||||||
next_tweet_id := entries[i+1].Content.Item.Content.Tweet.ID
|
next_tweet_id := entries[i+1].Content.Item.Content.Tweet.ID
|
||||||
api_tweet, ok := t.GlobalObjects.Tweets[fmt.Sprint(next_tweet_id)]
|
api_tweet, ok := t.GlobalObjects.Tweets[fmt.Sprint(next_tweet_id)]
|
||||||
if !ok {
|
if !ok {
|
||||||
@ -390,7 +393,7 @@ func (t *TweetResponse) HandleTombstones() []UserHandle {
|
|||||||
tombstoned_tweet.UserID = api_tweet.InReplyToUserID
|
tombstoned_tweet.UserID = api_tweet.InReplyToUserID
|
||||||
ret = append(ret, UserHandle(api_tweet.InReplyToScreenName))
|
ret = append(ret, UserHandle(api_tweet.InReplyToScreenName))
|
||||||
}
|
}
|
||||||
if i - 1 >= 0 && entries[i-1].Content.Item.Content.Tweet.ID != 0 {
|
if i-1 >= 0 && entries[i-1].Content.Item.Content.Tweet.ID != 0 {
|
||||||
prev_tweet_id := entries[i-1].Content.Item.Content.Tweet.ID
|
prev_tweet_id := entries[i-1].Content.Item.Content.Tweet.ID
|
||||||
_, ok := t.GlobalObjects.Tweets[fmt.Sprint(prev_tweet_id)]
|
_, ok := t.GlobalObjects.Tweets[fmt.Sprint(prev_tweet_id)]
|
||||||
if !ok {
|
if !ok {
|
||||||
@ -416,7 +419,7 @@ func (t *TweetResponse) HandleTombstones() []UserHandle {
|
|||||||
func (t *TweetResponse) GetCursor() string {
|
func (t *TweetResponse) GetCursor() string {
|
||||||
entries := t.Timeline.Instructions[0].AddEntries.Entries
|
entries := t.Timeline.Instructions[0].AddEntries.Entries
|
||||||
if len(entries) > 0 {
|
if len(entries) > 0 {
|
||||||
last_entry := entries[len(entries) - 1]
|
last_entry := entries[len(entries)-1]
|
||||||
if strings.Contains(last_entry.EntryID, "cursor") {
|
if strings.Contains(last_entry.EntryID, "cursor") {
|
||||||
return last_entry.Content.Operation.Cursor.Value
|
return last_entry.Content.Operation.Cursor.Value
|
||||||
}
|
}
|
||||||
@ -424,7 +427,7 @@ func (t *TweetResponse) GetCursor() string {
|
|||||||
|
|
||||||
// Next, try the other format ("replaceEntry")
|
// Next, try the other format ("replaceEntry")
|
||||||
instructions := t.Timeline.Instructions
|
instructions := t.Timeline.Instructions
|
||||||
last_replace_entry := instructions[len(instructions) - 1].ReplaceEntry.Entry
|
last_replace_entry := instructions[len(instructions)-1].ReplaceEntry.Entry
|
||||||
if strings.Contains(last_replace_entry.EntryID, "cursor") {
|
if strings.Contains(last_replace_entry.EntryID, "cursor") {
|
||||||
return last_replace_entry.Content.Operation.Cursor.Value
|
return last_replace_entry.Content.Operation.Cursor.Value
|
||||||
}
|
}
|
||||||
@ -450,7 +453,6 @@ func (t *TweetResponse) IsEndOfFeed() bool {
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
func idstr_to_int(idstr string) int64 {
|
func idstr_to_int(idstr string) int64 {
|
||||||
id, err := strconv.Atoi(idstr)
|
id, err := strconv.Atoi(idstr)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
package scraper_test
|
package scraper_test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"testing"
|
|
||||||
"os"
|
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"os"
|
||||||
|
"testing"
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
@ -11,17 +11,16 @@ import (
|
|||||||
. "offline_twitter/scraper"
|
. "offline_twitter/scraper"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
func TestNormalizeContent(t *testing.T) {
|
func TestNormalizeContent(t *testing.T) {
|
||||||
assert := assert.New(t)
|
assert := assert.New(t)
|
||||||
test_cases := []struct {
|
test_cases := []struct {
|
||||||
filename string
|
filename string
|
||||||
eventual_full_text string
|
eventual_full_text string
|
||||||
quoted_status_id TweetID
|
quoted_status_id TweetID
|
||||||
in_reply_to_id TweetID
|
in_reply_to_id TweetID
|
||||||
retweeted_status_id TweetID
|
retweeted_status_id TweetID
|
||||||
reply_mentions string
|
reply_mentions string
|
||||||
} {
|
}{
|
||||||
{"test_responses/single_tweets/tweet_that_is_a_reply_with_gif.json", "", 0, 1395882872729477131, 0, "@michaelmalice"},
|
{"test_responses/single_tweets/tweet_that_is_a_reply_with_gif.json", "", 0, 1395882872729477131, 0, "@michaelmalice"},
|
||||||
{"test_responses/single_tweets/tweet_with_image.json", "this saddens me every time", 0, 0, 0, ""},
|
{"test_responses/single_tweets/tweet_with_image.json", "this saddens me every time", 0, 0, 0, ""},
|
||||||
{"test_responses/single_tweets/tweet_that_is_a_reply.json", "Noted", 0, 1396194494710788100, 0, "@RvaTeddy @michaelmalice"},
|
{"test_responses/single_tweets/tweet_that_is_a_reply.json", "Noted", 0, 1396194494710788100, 0, "@RvaTeddy @michaelmalice"},
|
||||||
@ -48,7 +47,7 @@ func TestNormalizeContent(t *testing.T) {
|
|||||||
}
|
}
|
||||||
var tweet APITweet
|
var tweet APITweet
|
||||||
err = json.Unmarshal(data, &tweet)
|
err = json.Unmarshal(data, &tweet)
|
||||||
assert.NoError(err, "Failed at " + v.filename)
|
assert.NoError(err, "Failed at "+v.filename)
|
||||||
|
|
||||||
tweet.NormalizeContent()
|
tweet.NormalizeContent()
|
||||||
|
|
||||||
@ -60,7 +59,6 @@ func TestNormalizeContent(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
func TestUserProfileToAPIUser(t *testing.T) {
|
func TestUserProfileToAPIUser(t *testing.T) {
|
||||||
assert := assert.New(t)
|
assert := assert.New(t)
|
||||||
data, err := os.ReadFile("test_responses/michael_malice_user_profile.json")
|
data, err := os.ReadFile("test_responses/michael_malice_user_profile.json")
|
||||||
@ -76,7 +74,6 @@ func TestUserProfileToAPIUser(t *testing.T) {
|
|||||||
assert.Equal(user_resp.Data.User.Legacy.FollowersCount, result.FollowersCount)
|
assert.Equal(user_resp.Data.User.Legacy.FollowersCount, result.FollowersCount)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
func TestGetCursor(t *testing.T) {
|
func TestGetCursor(t *testing.T) {
|
||||||
assert := assert.New(t)
|
assert := assert.New(t)
|
||||||
data, err := os.ReadFile("test_responses/midriffs_anarchist_cookbook.json")
|
data, err := os.ReadFile("test_responses/midriffs_anarchist_cookbook.json")
|
||||||
@ -91,13 +88,12 @@ func TestGetCursor(t *testing.T) {
|
|||||||
tweet_resp.GetCursor())
|
tweet_resp.GetCursor())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
func TestIsEndOfFeed(t *testing.T) {
|
func TestIsEndOfFeed(t *testing.T) {
|
||||||
assert := assert.New(t)
|
assert := assert.New(t)
|
||||||
test_cases := []struct {
|
test_cases := []struct {
|
||||||
filename string
|
filename string
|
||||||
is_end_of_feed bool
|
is_end_of_feed bool
|
||||||
} {
|
}{
|
||||||
{"test_responses/michael_malice_feed.json", false},
|
{"test_responses/michael_malice_feed.json", false},
|
||||||
{"test_responses/kwiber_end_of_feed.json", true},
|
{"test_responses/kwiber_end_of_feed.json", true},
|
||||||
}
|
}
|
||||||
@ -113,7 +109,6 @@ func TestIsEndOfFeed(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
func TestHandleTombstonesHidden(t *testing.T) {
|
func TestHandleTombstonesHidden(t *testing.T) {
|
||||||
assert := assert.New(t)
|
assert := assert.New(t)
|
||||||
data, err := os.ReadFile("test_responses/tombstones/tombstone_hidden_1.json")
|
data, err := os.ReadFile("test_responses/tombstones/tombstone_hidden_1.json")
|
||||||
|
@ -13,13 +13,13 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
type CardValue struct {
|
type CardValue struct {
|
||||||
Type string `json:"type"`
|
Type string `json:"type"`
|
||||||
StringValue string `json:"string_value"`
|
StringValue string `json:"string_value"`
|
||||||
ImageValue struct {
|
ImageValue struct {
|
||||||
AltText string `json:"alt"`
|
AltText string `json:"alt"`
|
||||||
Height int `json:"height"`
|
Height int `json:"height"`
|
||||||
Width int `json:"width"`
|
Width int `json:"width"`
|
||||||
Url string `json:"url"`
|
Url string `json:"url"`
|
||||||
} `json:"image_value"`
|
} `json:"image_value"`
|
||||||
UserValue struct {
|
UserValue struct {
|
||||||
ID int64 `json:"id_str,string"`
|
ID int64 `json:"id_str,string"`
|
||||||
@ -30,13 +30,14 @@ type CardValue struct {
|
|||||||
type APIV2Card struct {
|
type APIV2Card struct {
|
||||||
Legacy struct {
|
Legacy struct {
|
||||||
BindingValues []struct {
|
BindingValues []struct {
|
||||||
Key string `json:"key"`
|
Key string `json:"key"`
|
||||||
Value CardValue `json:"value"`
|
Value CardValue `json:"value"`
|
||||||
} `json:"binding_values"`
|
} `json:"binding_values"`
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
Url string `json:"url"`
|
Url string `json:"url"`
|
||||||
} `json:"legacy"`
|
} `json:"legacy"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (card APIV2Card) ParseAsUrl() Url {
|
func (card APIV2Card) ParseAsUrl() Url {
|
||||||
values := make(map[string]CardValue)
|
values := make(map[string]CardValue)
|
||||||
for _, obj := range card.Legacy.BindingValues {
|
for _, obj := range card.Legacy.BindingValues {
|
||||||
@ -121,6 +122,7 @@ type APIV2UserResult struct {
|
|||||||
} `json:"result"`
|
} `json:"result"`
|
||||||
} `json:"user_results"`
|
} `json:"user_results"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (u APIV2UserResult) ToUser() User {
|
func (u APIV2UserResult) ToUser() User {
|
||||||
user, err := ParseSingleUser(u.UserResults.Result.Legacy)
|
user, err := ParseSingleUser(u.UserResults.Result.Legacy)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -131,16 +133,16 @@ func (u APIV2UserResult) ToUser() User {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type _Result struct {
|
type _Result struct {
|
||||||
ID int64 `json:"rest_id,string"`
|
ID int64 `json:"rest_id,string"`
|
||||||
Legacy APIV2Tweet `json:"legacy"`
|
Legacy APIV2Tweet `json:"legacy"`
|
||||||
Tombstone *struct {
|
Tombstone *struct {
|
||||||
Text struct {
|
Text struct {
|
||||||
Text string `json:"text"`
|
Text string `json:"text"`
|
||||||
} `json:"text"`
|
} `json:"text"`
|
||||||
} `json:"tombstone"`
|
} `json:"tombstone"`
|
||||||
Core *APIV2UserResult `json:"core"`
|
Core *APIV2UserResult `json:"core"`
|
||||||
Card APIV2Card `json:"card"`
|
Card APIV2Card `json:"card"`
|
||||||
QuotedStatusResult *APIV2Result `json:"quoted_status_result"`
|
QuotedStatusResult *APIV2Result `json:"quoted_status_result"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type APIV2Result struct {
|
type APIV2Result struct {
|
||||||
@ -149,11 +151,12 @@ type APIV2Result struct {
|
|||||||
Tweet _Result `json:"tweet"`
|
Tweet _Result `json:"tweet"`
|
||||||
} `json:"result"`
|
} `json:"result"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (api_result APIV2Result) ToTweetTrove(ignore_null_entries bool) TweetTrove {
|
func (api_result APIV2Result) ToTweetTrove(ignore_null_entries bool) TweetTrove {
|
||||||
ret := NewTweetTrove()
|
ret := NewTweetTrove()
|
||||||
|
|
||||||
// Start by checking if this is a null entry in a feed
|
// Start by checking if this is a null entry in a feed
|
||||||
if api_result.Result.Tombstone != nil && ignore_null_entries{
|
if api_result.Result.Tombstone != nil && ignore_null_entries {
|
||||||
// TODO: this is becoming really spaghetti. Why do we need a separate execution path for this?
|
// TODO: this is becoming really spaghetti. Why do we need a separate execution path for this?
|
||||||
return ret
|
return ret
|
||||||
}
|
}
|
||||||
@ -221,7 +224,7 @@ func (api_result APIV2Result) ToTweetTrove(ignore_null_entries bool) TweetTrove
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
found = true
|
found = true
|
||||||
url.Text = main_tweet.Urls[i].Text // Copy the expanded URL over, since the card doesn't have it in the new API
|
url.Text = main_tweet.Urls[i].Text // Copy the expanded URL over, since the card doesn't have it in the new API
|
||||||
main_tweet.Urls[i] = url
|
main_tweet.Urls[i] = url
|
||||||
}
|
}
|
||||||
if !found {
|
if !found {
|
||||||
@ -245,6 +248,7 @@ type APIV2Tweet struct {
|
|||||||
RetweetedStatusResult *APIV2Result `json:"retweeted_status_result"`
|
RetweetedStatusResult *APIV2Result `json:"retweeted_status_result"`
|
||||||
APITweet
|
APITweet
|
||||||
}
|
}
|
||||||
|
|
||||||
func (api_v2_tweet APIV2Tweet) ToTweetTrove() TweetTrove {
|
func (api_v2_tweet APIV2Tweet) ToTweetTrove() TweetTrove {
|
||||||
ret := NewTweetTrove()
|
ret := NewTweetTrove()
|
||||||
|
|
||||||
@ -253,7 +257,6 @@ func (api_v2_tweet APIV2Tweet) ToTweetTrove() TweetTrove {
|
|||||||
orig_tweet_trove := api_v2_tweet.RetweetedStatusResult.ToTweetTrove(false)
|
orig_tweet_trove := api_v2_tweet.RetweetedStatusResult.ToTweetTrove(false)
|
||||||
ret.MergeWith(orig_tweet_trove)
|
ret.MergeWith(orig_tweet_trove)
|
||||||
|
|
||||||
|
|
||||||
retweet := Retweet{}
|
retweet := Retweet{}
|
||||||
var err error
|
var err error
|
||||||
retweet.RetweetID = TweetID(api_v2_tweet.ID)
|
retweet.RetweetID = TweetID(api_v2_tweet.ID)
|
||||||
@ -277,25 +280,24 @@ func (api_v2_tweet APIV2Tweet) ToTweetTrove() TweetTrove {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type APIV2Entry struct {
|
type APIV2Entry struct {
|
||||||
EntryID string `json:"entryId"`
|
EntryID string `json:"entryId"`
|
||||||
SortIndex int64 `json:"sortIndex,string"`
|
SortIndex int64 `json:"sortIndex,string"`
|
||||||
Content struct {
|
Content struct {
|
||||||
ItemContent struct {
|
ItemContent struct {
|
||||||
EntryType string `json:"entryType"`
|
EntryType string `json:"entryType"`
|
||||||
TweetResults APIV2Result `json:"tweet_results"`
|
TweetResults APIV2Result `json:"tweet_results"`
|
||||||
} `json:"itemContent"`
|
} `json:"itemContent"`
|
||||||
|
|
||||||
// Cursors
|
// Cursors
|
||||||
EntryType string `json:"entryType"`
|
EntryType string `json:"entryType"`
|
||||||
Value string `json:"value"`
|
Value string `json:"value"`
|
||||||
CursorType string `json:"cursorType"`
|
CursorType string `json:"cursorType"`
|
||||||
|
|
||||||
} `json:"content"`
|
} `json:"content"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type APIV2Instruction struct {
|
type APIV2Instruction struct {
|
||||||
Type string `json:"type"`
|
Type string `json:"type"`
|
||||||
Entries []APIV2Entry`json:"entries"`
|
Entries []APIV2Entry `json:"entries"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type APIV2Response struct {
|
type APIV2Response struct {
|
||||||
@ -324,7 +326,7 @@ func (api_response APIV2Response) GetMainInstruction() *APIV2Instruction {
|
|||||||
|
|
||||||
func (api_response APIV2Response) GetCursorBottom() string {
|
func (api_response APIV2Response) GetCursorBottom() string {
|
||||||
entries := api_response.GetMainInstruction().Entries
|
entries := api_response.GetMainInstruction().Entries
|
||||||
last_entry := entries[len(entries) - 1]
|
last_entry := entries[len(entries)-1]
|
||||||
if last_entry.Content.CursorType != "Bottom" {
|
if last_entry.Content.CursorType != "Bottom" {
|
||||||
panic("No bottom cursor found")
|
panic("No bottom cursor found")
|
||||||
}
|
}
|
||||||
@ -349,7 +351,7 @@ func (api_response APIV2Response) IsEmpty() bool {
|
|||||||
*/
|
*/
|
||||||
func (api_response APIV2Response) ToTweetTrove() (TweetTrove, error) {
|
func (api_response APIV2Response) ToTweetTrove() (TweetTrove, error) {
|
||||||
ret := NewTweetTrove()
|
ret := NewTweetTrove()
|
||||||
for _, entry := range api_response.GetMainInstruction().Entries { // TODO: the second Instruction is the pinned tweet
|
for _, entry := range api_response.GetMainInstruction().Entries { // TODO: the second Instruction is the pinned tweet
|
||||||
if !strings.HasPrefix(entry.EntryID, "tweet-") {
|
if !strings.HasPrefix(entry.EntryID, "tweet-") {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@ -363,12 +365,11 @@ func (api_response APIV2Response) ToTweetTrove() (TweetTrove, error) {
|
|||||||
return ret, nil
|
return ret, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
func get_graphql_user_timeline_url(user_id UserID, cursor string) string {
|
func get_graphql_user_timeline_url(user_id UserID, cursor string) string {
|
||||||
if cursor != "" {
|
if cursor != "" {
|
||||||
return "https://twitter.com/i/api/graphql/CwLU7qTfeu0doqhSr6tW4A/UserTweetsAndReplies?variables=%7B%22userId%22%3A%22" + fmt.Sprint(user_id) + "%22%2C%22count%22%3A40%2C%22cursor%22%3A%22" + url.QueryEscape(cursor) + "%22%2C%22includePromotedContent%22%3Atrue%2C%22withCommunity%22%3Atrue%2C%22withSuperFollowsUserFields%22%3Atrue%2C%22withBirdwatchPivots%22%3Afalse%2C%22withDownvotePerspective%22%3Afalse%2C%22withReactionsMetadata%22%3Afalse%2C%22withReactionsPerspective%22%3Afalse%2C%22withSuperFollowsTweetFields%22%3Atrue%2C%22withVoice%22%3Atrue%2C%22withV2Timeline%22%3Afalse%2C%22__fs_interactive_text%22%3Afalse%2C%22__fs_responsive_web_uc_gql_enabled%22%3Afalse%2C%22__fs_dont_mention_me_view_api_enabled%22%3Afalse%7D" // nolint:lll // It's a URL, come on
|
return "https://twitter.com/i/api/graphql/CwLU7qTfeu0doqhSr6tW4A/UserTweetsAndReplies?variables=%7B%22userId%22%3A%22" + fmt.Sprint(user_id) + "%22%2C%22count%22%3A40%2C%22cursor%22%3A%22" + url.QueryEscape(cursor) + "%22%2C%22includePromotedContent%22%3Atrue%2C%22withCommunity%22%3Atrue%2C%22withSuperFollowsUserFields%22%3Atrue%2C%22withBirdwatchPivots%22%3Afalse%2C%22withDownvotePerspective%22%3Afalse%2C%22withReactionsMetadata%22%3Afalse%2C%22withReactionsPerspective%22%3Afalse%2C%22withSuperFollowsTweetFields%22%3Atrue%2C%22withVoice%22%3Atrue%2C%22withV2Timeline%22%3Afalse%2C%22__fs_interactive_text%22%3Afalse%2C%22__fs_responsive_web_uc_gql_enabled%22%3Afalse%2C%22__fs_dont_mention_me_view_api_enabled%22%3Afalse%7D" // nolint:lll // It's a URL, come on
|
||||||
}
|
}
|
||||||
return "https://twitter.com/i/api/graphql/CwLU7qTfeu0doqhSr6tW4A/UserTweetsAndReplies?variables=%7B%22userId%22%3A%22" + fmt.Sprint(user_id) + "%22%2C%22count%22%3A40%2C%22includePromotedContent%22%3Afalse%2C%22withCommunity%22%3Atrue%2C%22withSuperFollowsUserFields%22%3Atrue%2C%22withBirdwatchPivots%22%3Afalse%2C%22withDownvotePerspective%22%3Afalse%2C%22withReactionsMetadata%22%3Afalse%2C%22withReactionsPerspective%22%3Afalse%2C%22withSuperFollowsTweetFields%22%3Atrue%2C%22withVoice%22%3Atrue%2C%22withV2Timeline%22%3Afalse%2C%22__fs_interactive_text%22%3Afalse%2C%22__fs_dont_mention_me_view_api_enabled%22%3Afalse%7D" // nolint:lll // It's a URL, come on
|
return "https://twitter.com/i/api/graphql/CwLU7qTfeu0doqhSr6tW4A/UserTweetsAndReplies?variables=%7B%22userId%22%3A%22" + fmt.Sprint(user_id) + "%22%2C%22count%22%3A40%2C%22includePromotedContent%22%3Afalse%2C%22withCommunity%22%3Atrue%2C%22withSuperFollowsUserFields%22%3Atrue%2C%22withBirdwatchPivots%22%3Afalse%2C%22withDownvotePerspective%22%3Afalse%2C%22withReactionsMetadata%22%3Afalse%2C%22withReactionsPerspective%22%3Afalse%2C%22withSuperFollowsTweetFields%22%3Atrue%2C%22withVoice%22%3Atrue%2C%22withV2Timeline%22%3Afalse%2C%22__fs_interactive_text%22%3Afalse%2C%22__fs_dont_mention_me_view_api_enabled%22%3Afalse%7D" // nolint:lll // It's a URL, come on
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -446,7 +447,7 @@ func (api API) GetMoreTweetsFromGraphqlFeed(user_id UserID, response *APIV2Respo
|
|||||||
}
|
}
|
||||||
if fresh_response.IsEmpty() {
|
if fresh_response.IsEmpty() {
|
||||||
// Response has a pinned tweet, but no other content: end of feed has been reached
|
// Response has a pinned tweet, but no other content: end of feed has been reached
|
||||||
return END_OF_FEED // TODO: check that there actually is a pinned tweet and the request didn't just fail lol
|
return END_OF_FEED // TODO: check that there actually is a pinned tweet and the request didn't just fail lol
|
||||||
}
|
}
|
||||||
|
|
||||||
last_response = &fresh_response
|
last_response = &fresh_response
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
package scraper_test
|
package scraper_test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"testing"
|
|
||||||
"os"
|
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"testing"
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
@ -34,7 +34,7 @@ func TestAPIV2ParseUser(t *testing.T) {
|
|||||||
assert.Equal(user.ID, UserID(44067298))
|
assert.Equal(user.ID, UserID(44067298))
|
||||||
assert.Equal(user.DisplayName, "Michael Malice")
|
assert.Equal(user.DisplayName, "Michael Malice")
|
||||||
assert.Equal(user.Handle, UserHandle("michaelmalice"))
|
assert.Equal(user.Handle, UserHandle("michaelmalice"))
|
||||||
assert.Equal(user.Bio, "Author of Dear Reader, The New Right & The Anarchist Handbook\nHost of \"YOUR WELCOME\" \nSubject of Ego & " +
|
assert.Equal(user.Bio, "Author of Dear Reader, The New Right & The Anarchist Handbook\nHost of \"YOUR WELCOME\" \nSubject of Ego & "+
|
||||||
"Hubris by Harvey Pekar\nHe/Him ⚑\n@SheathUnderwear Model")
|
"Hubris by Harvey Pekar\nHe/Him ⚑\n@SheathUnderwear Model")
|
||||||
assert.Equal(user.FollowingCount, 964)
|
assert.Equal(user.FollowingCount, 964)
|
||||||
assert.Equal(user.FollowersCount, 334571)
|
assert.Equal(user.FollowersCount, 334571)
|
||||||
@ -70,7 +70,7 @@ func TestAPIV2ParseTweet(t *testing.T) {
|
|||||||
assert.True(ok)
|
assert.True(ok)
|
||||||
assert.Equal(tweet.ID, TweetID(1485708879174508550))
|
assert.Equal(tweet.ID, TweetID(1485708879174508550))
|
||||||
assert.Equal(tweet.UserID, UserID(44067298))
|
assert.Equal(tweet.UserID, UserID(44067298))
|
||||||
assert.Equal(tweet.Text, "If Boris Johnson is driven out of office, it wouldn't mark the first time the Tories had four PMs in a " +
|
assert.Equal(tweet.Text, "If Boris Johnson is driven out of office, it wouldn't mark the first time the Tories had four PMs in a "+
|
||||||
"row\nThey had previously governed the UK for 13 years with 4 PMs, from 1951-1964")
|
"row\nThey had previously governed the UK for 13 years with 4 PMs, from 1951-1964")
|
||||||
assert.Equal(tweet.PostedAt.Unix(), int64(1643055574))
|
assert.Equal(tweet.PostedAt.Unix(), int64(1643055574))
|
||||||
assert.Equal(tweet.QuotedTweetID, TweetID(0))
|
assert.Equal(tweet.QuotedTweetID, TweetID(0))
|
||||||
@ -133,7 +133,7 @@ func TestAPIV2ParseTweetWithQuotedTweet(t *testing.T) {
|
|||||||
assert.True(ok)
|
assert.True(ok)
|
||||||
assert.Equal(TweetID(1485690410899021826), quote_tweet.ID)
|
assert.Equal(TweetID(1485690410899021826), quote_tweet.ID)
|
||||||
assert.Equal(TweetID(1485690069079846915), quote_tweet.QuotedTweetID)
|
assert.Equal(TweetID(1485690069079846915), quote_tweet.QuotedTweetID)
|
||||||
assert.Equal("Hatred is powerless in and of itself despite all the agitprop to the contrary\nHatred didnt stop Trump's election, " +
|
assert.Equal("Hatred is powerless in and of itself despite all the agitprop to the contrary\nHatred didnt stop Trump's election, "+
|
||||||
"for example", quote_tweet.Text)
|
"for example", quote_tweet.Text)
|
||||||
|
|
||||||
// Should be 2 users: quoter and quoted
|
// Should be 2 users: quoter and quoted
|
||||||
@ -182,7 +182,7 @@ func TestAPIV2ParseRetweet(t *testing.T) {
|
|||||||
// Check the video
|
// Check the video
|
||||||
v := tweet.Videos[0]
|
v := tweet.Videos[0]
|
||||||
assert.Equal("https://pbs.twimg.com/ext_tw_video_thumb/1485627274594590721/pu/img/O6mMKrsqWl8WcMy1.jpg", v.ThumbnailRemoteUrl)
|
assert.Equal("https://pbs.twimg.com/ext_tw_video_thumb/1485627274594590721/pu/img/O6mMKrsqWl8WcMy1.jpg", v.ThumbnailRemoteUrl)
|
||||||
assert.Equal(0, v.ViewCount) // TODO: make this work
|
assert.Equal(0, v.ViewCount) // TODO: make this work
|
||||||
assert.Equal(720, v.Height)
|
assert.Equal(720, v.Height)
|
||||||
assert.Equal(720, v.Width)
|
assert.Equal(720, v.Width)
|
||||||
assert.Equal(30066, v.Duration)
|
assert.Equal(30066, v.Duration)
|
||||||
@ -200,7 +200,6 @@ func TestAPIV2ParseRetweet(t *testing.T) {
|
|||||||
assert.Equal(UserID(44067298), retweeting_user.ID)
|
assert.Equal(UserID(44067298), retweeting_user.ID)
|
||||||
assert.Equal(UserHandle("michaelmalice"), retweeting_user.Handle)
|
assert.Equal(UserHandle("michaelmalice"), retweeting_user.Handle)
|
||||||
|
|
||||||
|
|
||||||
// Should be 1 retweet
|
// Should be 1 retweet
|
||||||
assert.Equal(1, len(trove.Retweets))
|
assert.Equal(1, len(trove.Retweets))
|
||||||
retweet, ok := trove.Retweets[1485699748514476037]
|
retweet, ok := trove.Retweets[1485699748514476037]
|
||||||
@ -270,7 +269,6 @@ func TestAPIV2ParseRetweetedQuoteTweet(t *testing.T) {
|
|||||||
assert.Equal(UserID(599817378), retweet.RetweetedByID)
|
assert.Equal(UserID(599817378), retweet.RetweetedByID)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parse tweet with quoted tombstone
|
* Parse tweet with quoted tombstone
|
||||||
*/
|
*/
|
||||||
@ -300,13 +298,12 @@ func TestAPIV2ParseTweetWithQuotedTombstone(t *testing.T) {
|
|||||||
assert.True(ok)
|
assert.True(ok)
|
||||||
assert.Equal(TweetID(1485774025347371008), tombstoned_tweet.ID)
|
assert.Equal(TweetID(1485774025347371008), tombstoned_tweet.ID)
|
||||||
assert.Equal("no longer exists", tombstoned_tweet.TombstoneType)
|
assert.Equal("no longer exists", tombstoned_tweet.TombstoneType)
|
||||||
assert.True (tombstoned_tweet.IsStub)
|
assert.True(tombstoned_tweet.IsStub)
|
||||||
assert.Equal(UserHandle("coltnkat"), tombstoned_tweet.UserHandle)
|
assert.Equal(UserHandle("coltnkat"), tombstoned_tweet.UserHandle)
|
||||||
|
|
||||||
assert.Equal(0, len(trove.Retweets))
|
assert.Equal(0, len(trove.Retweets))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parse a tweet with a link
|
* Parse a tweet with a link
|
||||||
*/
|
*/
|
||||||
@ -326,7 +323,7 @@ func TestAPIV2ParseTweetWithURL(t *testing.T) {
|
|||||||
assert.Equal(1, len(trove.Tweets))
|
assert.Equal(1, len(trove.Tweets))
|
||||||
tweet, ok := trove.Tweets[1485695695025803264]
|
tweet, ok := trove.Tweets[1485695695025803264]
|
||||||
assert.True(ok)
|
assert.True(ok)
|
||||||
assert.Equal("This led to what I discussed as \"anguish signaling,\" where progs competed in proclaiming their distress both to " +
|
assert.Equal("This led to what I discussed as \"anguish signaling,\" where progs competed in proclaiming their distress both to "+
|
||||||
"show they were the Good Guys but also to get the pack to regroup, akin to wolves howling.", tweet.Text)
|
"show they were the Good Guys but also to get the pack to regroup, akin to wolves howling.", tweet.Text)
|
||||||
|
|
||||||
assert.Equal(1, len(tweet.Urls))
|
assert.Equal(1, len(tweet.Urls))
|
||||||
@ -335,7 +332,7 @@ func TestAPIV2ParseTweetWithURL(t *testing.T) {
|
|||||||
assert.Equal("observer.com", url.Domain)
|
assert.Equal("observer.com", url.Domain)
|
||||||
assert.Equal("Why Evangelical Progressives Need to Demonstrate Anguish Publicly", url.Title)
|
assert.Equal("Why Evangelical Progressives Need to Demonstrate Anguish Publicly", url.Title)
|
||||||
assert.Equal("https://observer.com/2016/12/why-evangelical-progressives-need-to-demonstrate-anguish-publicly/", url.Text)
|
assert.Equal("https://observer.com/2016/12/why-evangelical-progressives-need-to-demonstrate-anguish-publicly/", url.Text)
|
||||||
assert.Equal("The concept of “virtue signaling” gained a great deal of currency in this past year. It’s a way to demonstrate to " +
|
assert.Equal("The concept of “virtue signaling” gained a great deal of currency in this past year. It’s a way to demonstrate to "+
|
||||||
"others that one is a good person without having to do anything", url.Description)
|
"others that one is a good person without having to do anything", url.Description)
|
||||||
assert.Equal("https://pbs.twimg.com/card_img/1485694664640507911/WsproWyP?format=jpg&name=600x600", url.ThumbnailRemoteUrl)
|
assert.Equal("https://pbs.twimg.com/card_img/1485694664640507911/WsproWyP?format=jpg&name=600x600", url.ThumbnailRemoteUrl)
|
||||||
assert.Equal(600, url.ThumbnailWidth)
|
assert.Equal(600, url.ThumbnailWidth)
|
||||||
@ -439,10 +436,9 @@ func TestAPIV2ParseTweetWithPoll(t *testing.T) {
|
|||||||
|
|
||||||
assert.Equal(int64(1643137976), poll.VotingEndsAt.Unix())
|
assert.Equal(int64(1643137976), poll.VotingEndsAt.Unix())
|
||||||
assert.Equal(int64(1643055638), poll.LastUpdatedAt.Unix())
|
assert.Equal(int64(1643055638), poll.LastUpdatedAt.Unix())
|
||||||
assert.Equal(1440 * 60, poll.VotingDuration)
|
assert.Equal(1440*60, poll.VotingDuration)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
func TestParseAPIV2UserFeed(t *testing.T) {
|
func TestParseAPIV2UserFeed(t *testing.T) {
|
||||||
data, err := os.ReadFile("test_responses/api_v2/user_feed_apiv2.json")
|
data, err := os.ReadFile("test_responses/api_v2/user_feed_apiv2.json")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -495,7 +491,6 @@ func TestParseAPIV2UserFeed(t *testing.T) {
|
|||||||
fmt.Printf("%d Users, %d Tweets, %d Retweets\n", len(tweet_trove.Users), len(tweet_trove.Tweets), len(tweet_trove.Retweets))
|
fmt.Printf("%d Users, %d Tweets, %d Retweets\n", len(tweet_trove.Users), len(tweet_trove.Tweets), len(tweet_trove.Retweets))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Should correctly identify an "empty" response
|
* Should correctly identify an "empty" response
|
||||||
*/
|
*/
|
||||||
@ -562,13 +557,12 @@ func TestAPIV2TombstoneEntry(t *testing.T) {
|
|||||||
err = json.Unmarshal(data, &tweet_result)
|
err = json.Unmarshal(data, &tweet_result)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
trove := tweet_result.ToTweetTrove(true) // 'true' indicates to ignore empty entries
|
trove := tweet_result.ToTweetTrove(true) // 'true' indicates to ignore empty entries
|
||||||
assert.Len(trove.Tweets, 0)
|
assert.Len(trove.Tweets, 0)
|
||||||
assert.Len(trove.Users, 0)
|
assert.Len(trove.Users, 0)
|
||||||
assert.Len(trove.Retweets, 0)
|
assert.Len(trove.Retweets, 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
func TestTweetWithWarning(t *testing.T) {
|
func TestTweetWithWarning(t *testing.T) {
|
||||||
assert := assert.New(t)
|
assert := assert.New(t)
|
||||||
data, err := os.ReadFile("test_responses/api_v2/tweet_with_warning.json")
|
data, err := os.ReadFile("test_responses/api_v2/tweet_with_warning.json")
|
||||||
|
@ -1,29 +1,29 @@
|
|||||||
package scraper
|
package scraper
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"path"
|
"path"
|
||||||
)
|
)
|
||||||
|
|
||||||
type ImageID int64
|
type ImageID int64
|
||||||
|
|
||||||
type Image struct {
|
type Image struct {
|
||||||
ID ImageID
|
ID ImageID
|
||||||
TweetID TweetID
|
TweetID TweetID
|
||||||
Width int
|
Width int
|
||||||
Height int
|
Height int
|
||||||
RemoteURL string
|
RemoteURL string
|
||||||
LocalFilename string
|
LocalFilename string
|
||||||
IsDownloaded bool
|
IsDownloaded bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func ParseAPIMedia(apiMedia APIMedia) Image {
|
func ParseAPIMedia(apiMedia APIMedia) Image {
|
||||||
local_filename := path.Base(apiMedia.MediaURLHttps)
|
local_filename := path.Base(apiMedia.MediaURLHttps)
|
||||||
return Image{
|
return Image{
|
||||||
ID: ImageID(apiMedia.ID),
|
ID: ImageID(apiMedia.ID),
|
||||||
RemoteURL: apiMedia.MediaURLHttps,
|
RemoteURL: apiMedia.MediaURLHttps,
|
||||||
Width: apiMedia.OriginalInfo.Width,
|
Width: apiMedia.OriginalInfo.Width,
|
||||||
Height: apiMedia.OriginalInfo.Height,
|
Height: apiMedia.OriginalInfo.Height,
|
||||||
LocalFilename: local_filename,
|
LocalFilename: local_filename,
|
||||||
IsDownloaded: false,
|
IsDownloaded: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,31 +1,31 @@
|
|||||||
package scraper_test
|
package scraper_test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"testing"
|
"encoding/json"
|
||||||
"os"
|
"os"
|
||||||
"encoding/json"
|
"testing"
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
. "offline_twitter/scraper"
|
. "offline_twitter/scraper"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestParseAPIMedia(t *testing.T) {
|
func TestParseAPIMedia(t *testing.T) {
|
||||||
assert := assert.New(t)
|
assert := assert.New(t)
|
||||||
data, err := os.ReadFile("test_responses/tweet_content/image.json")
|
data, err := os.ReadFile("test_responses/tweet_content/image.json")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
var apimedia APIMedia
|
var apimedia APIMedia
|
||||||
err = json.Unmarshal(data, &apimedia)
|
err = json.Unmarshal(data, &apimedia)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
image := ParseAPIMedia(apimedia)
|
image := ParseAPIMedia(apimedia)
|
||||||
assert.Equal(ImageID(1395882862289772553), image.ID)
|
assert.Equal(ImageID(1395882862289772553), image.ID)
|
||||||
assert.Equal("https://pbs.twimg.com/media/E18sEUrWYAk8dBl.jpg", image.RemoteURL)
|
assert.Equal("https://pbs.twimg.com/media/E18sEUrWYAk8dBl.jpg", image.RemoteURL)
|
||||||
assert.Equal(593, image.Width)
|
assert.Equal(593, image.Width)
|
||||||
assert.Equal(239, image.Height)
|
assert.Equal(239, image.Height)
|
||||||
assert.Equal("E18sEUrWYAk8dBl.jpg", image.LocalFilename)
|
assert.Equal("E18sEUrWYAk8dBl.jpg", image.LocalFilename)
|
||||||
assert.False(image.IsDownloaded)
|
assert.False(image.IsDownloaded)
|
||||||
}
|
}
|
||||||
|
@ -2,11 +2,10 @@ package scraper
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"time"
|
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return the expanded version of a short URL. Input must be a real short URL.
|
* Return the expanded version of a short URL. Input must be a real short URL.
|
||||||
*/
|
*/
|
||||||
@ -21,7 +20,7 @@ func ExpandShortUrl(short_url string) string {
|
|||||||
|
|
||||||
resp, err := client.Get(short_url)
|
resp, err := client.Get(short_url)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err) // TODO: handle timeouts
|
panic(err) // TODO: handle timeouts
|
||||||
}
|
}
|
||||||
if resp.StatusCode != 301 {
|
if resp.StatusCode != 301 {
|
||||||
panic(fmt.Errorf("Unknown status code returned when expanding short url %q: %s\n %w", short_url, resp.Status, EXTERNAL_API_ERROR))
|
panic(fmt.Errorf("Unknown status code returned when expanding short url %q: %s\n %w", short_url, resp.Status, EXTERNAL_API_ERROR))
|
||||||
|
@ -6,12 +6,11 @@ import (
|
|||||||
"net/http"
|
"net/http"
|
||||||
"net/http/httptest"
|
"net/http/httptest"
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
|
|
||||||
. "offline_twitter/scraper"
|
. "offline_twitter/scraper"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
func TestExpandShortUrl(t *testing.T) {
|
func TestExpandShortUrl(t *testing.T) {
|
||||||
redirecting_to := "redirect target"
|
redirecting_to := "redirect target"
|
||||||
srvr := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
|
srvr := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
|
||||||
|
112
scraper/poll.go
112
scraper/poll.go
@ -1,82 +1,82 @@
|
|||||||
package scraper
|
package scraper
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"strings"
|
"net/url"
|
||||||
"strconv"
|
"strconv"
|
||||||
"net/url"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
type PollID int64
|
type PollID int64
|
||||||
|
|
||||||
type Poll struct {
|
type Poll struct {
|
||||||
ID PollID
|
ID PollID
|
||||||
TweetID TweetID
|
TweetID TweetID
|
||||||
NumChoices int
|
NumChoices int
|
||||||
|
|
||||||
Choice1 string
|
Choice1 string
|
||||||
Choice1_Votes int
|
Choice1_Votes int
|
||||||
Choice2 string
|
Choice2 string
|
||||||
Choice2_Votes int
|
Choice2_Votes int
|
||||||
Choice3 string
|
Choice3 string
|
||||||
Choice3_Votes int
|
Choice3_Votes int
|
||||||
Choice4 string
|
Choice4 string
|
||||||
Choice4_Votes int
|
Choice4_Votes int
|
||||||
|
|
||||||
VotingDuration int // In seconds
|
VotingDuration int // In seconds
|
||||||
VotingEndsAt Timestamp
|
VotingEndsAt Timestamp
|
||||||
|
|
||||||
LastUpdatedAt Timestamp `db:"last_scraped_at"`
|
LastUpdatedAt Timestamp `db:"last_scraped_at"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func ParseAPIPoll(apiCard APICard) Poll {
|
func ParseAPIPoll(apiCard APICard) Poll {
|
||||||
card_url, err := url.Parse(apiCard.ShortenedUrl)
|
card_url, err := url.Parse(apiCard.ShortenedUrl)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
id := int_or_panic(card_url.Hostname())
|
id := int_or_panic(card_url.Hostname())
|
||||||
|
|
||||||
ret := Poll{}
|
ret := Poll{}
|
||||||
ret.ID = PollID(id)
|
ret.ID = PollID(id)
|
||||||
ret.NumChoices = parse_num_choices(apiCard.Name)
|
ret.NumChoices = parse_num_choices(apiCard.Name)
|
||||||
ret.VotingDuration = int_or_panic(apiCard.BindingValues.DurationMinutes.StringValue) * 60
|
ret.VotingDuration = int_or_panic(apiCard.BindingValues.DurationMinutes.StringValue) * 60
|
||||||
ret.VotingEndsAt, err = TimestampFromString(apiCard.BindingValues.EndDatetimeUTC.StringValue)
|
ret.VotingEndsAt, err = TimestampFromString(apiCard.BindingValues.EndDatetimeUTC.StringValue)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
ret.LastUpdatedAt, err = TimestampFromString(apiCard.BindingValues.LastUpdatedAt.StringValue)
|
ret.LastUpdatedAt, err = TimestampFromString(apiCard.BindingValues.LastUpdatedAt.StringValue)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
ret.Choice1 = apiCard.BindingValues.Choice1.StringValue
|
ret.Choice1 = apiCard.BindingValues.Choice1.StringValue
|
||||||
ret.Choice1_Votes = int_or_panic(apiCard.BindingValues.Choice1_Count.StringValue)
|
ret.Choice1_Votes = int_or_panic(apiCard.BindingValues.Choice1_Count.StringValue)
|
||||||
ret.Choice2 = apiCard.BindingValues.Choice2.StringValue
|
ret.Choice2 = apiCard.BindingValues.Choice2.StringValue
|
||||||
ret.Choice2_Votes = int_or_panic(apiCard.BindingValues.Choice2_Count.StringValue)
|
ret.Choice2_Votes = int_or_panic(apiCard.BindingValues.Choice2_Count.StringValue)
|
||||||
|
|
||||||
if ret.NumChoices > 2 {
|
if ret.NumChoices > 2 {
|
||||||
ret.Choice3 = apiCard.BindingValues.Choice3.StringValue
|
ret.Choice3 = apiCard.BindingValues.Choice3.StringValue
|
||||||
ret.Choice3_Votes = int_or_panic(apiCard.BindingValues.Choice3_Count.StringValue)
|
ret.Choice3_Votes = int_or_panic(apiCard.BindingValues.Choice3_Count.StringValue)
|
||||||
}
|
}
|
||||||
if ret.NumChoices > 3 {
|
if ret.NumChoices > 3 {
|
||||||
ret.Choice4 = apiCard.BindingValues.Choice4.StringValue
|
ret.Choice4 = apiCard.BindingValues.Choice4.StringValue
|
||||||
ret.Choice4_Votes = int_or_panic(apiCard.BindingValues.Choice4_Count.StringValue)
|
ret.Choice4_Votes = int_or_panic(apiCard.BindingValues.Choice4_Count.StringValue)
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
}
|
}
|
||||||
|
|
||||||
func parse_num_choices(card_name string) int {
|
func parse_num_choices(card_name string) int {
|
||||||
if strings.Index(card_name, "poll") != 0 || strings.Index(card_name, "choice") != 5 {
|
if strings.Index(card_name, "poll") != 0 || strings.Index(card_name, "choice") != 5 {
|
||||||
panic("Not valid card name: " + card_name)
|
panic("Not valid card name: " + card_name)
|
||||||
}
|
}
|
||||||
|
|
||||||
return int_or_panic(card_name[4:5])
|
return int_or_panic(card_name[4:5])
|
||||||
}
|
}
|
||||||
|
|
||||||
func int_or_panic(s string) int {
|
func int_or_panic(s string) int {
|
||||||
result, err := strconv.Atoi(s)
|
result, err := strconv.Atoi(s)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
@ -1,67 +1,67 @@
|
|||||||
package scraper_test
|
package scraper_test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"testing"
|
"encoding/json"
|
||||||
"os"
|
"os"
|
||||||
"encoding/json"
|
"testing"
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
. "offline_twitter/scraper"
|
. "offline_twitter/scraper"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestParsePoll2Choices(t *testing.T) {
|
func TestParsePoll2Choices(t *testing.T) {
|
||||||
assert := assert.New(t)
|
assert := assert.New(t)
|
||||||
data, err := os.ReadFile("test_responses/tweet_content/poll_card_2_options.json")
|
data, err := os.ReadFile("test_responses/tweet_content/poll_card_2_options.json")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
var apiCard APICard
|
var apiCard APICard
|
||||||
err = json.Unmarshal(data, &apiCard)
|
err = json.Unmarshal(data, &apiCard)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
poll := ParseAPIPoll(apiCard)
|
poll := ParseAPIPoll(apiCard)
|
||||||
assert.Equal(PollID(1457419248461131776), poll.ID)
|
assert.Equal(PollID(1457419248461131776), poll.ID)
|
||||||
assert.Equal(2, poll.NumChoices)
|
assert.Equal(2, poll.NumChoices)
|
||||||
assert.Equal(60 * 60 * 24, poll.VotingDuration)
|
assert.Equal(60*60*24, poll.VotingDuration)
|
||||||
assert.Equal(int64(1636397201), poll.VotingEndsAt.Unix())
|
assert.Equal(int64(1636397201), poll.VotingEndsAt.Unix())
|
||||||
assert.Equal(int64(1636318755), poll.LastUpdatedAt.Unix())
|
assert.Equal(int64(1636318755), poll.LastUpdatedAt.Unix())
|
||||||
|
|
||||||
assert.Less(poll.LastUpdatedAt.Unix(), poll.VotingEndsAt.Unix())
|
assert.Less(poll.LastUpdatedAt.Unix(), poll.VotingEndsAt.Unix())
|
||||||
assert.Equal("Yes", poll.Choice1)
|
assert.Equal("Yes", poll.Choice1)
|
||||||
assert.Equal("No", poll.Choice2)
|
assert.Equal("No", poll.Choice2)
|
||||||
assert.Equal(529, poll.Choice1_Votes)
|
assert.Equal(529, poll.Choice1_Votes)
|
||||||
assert.Equal(2182, poll.Choice2_Votes)
|
assert.Equal(2182, poll.Choice2_Votes)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestParsePoll4Choices(t *testing.T) {
|
func TestParsePoll4Choices(t *testing.T) {
|
||||||
assert := assert.New(t)
|
assert := assert.New(t)
|
||||||
data, err := os.ReadFile("test_responses/tweet_content/poll_card_4_options_ended.json")
|
data, err := os.ReadFile("test_responses/tweet_content/poll_card_4_options_ended.json")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
var apiCard APICard
|
var apiCard APICard
|
||||||
err = json.Unmarshal(data, &apiCard)
|
err = json.Unmarshal(data, &apiCard)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
poll := ParseAPIPoll(apiCard)
|
poll := ParseAPIPoll(apiCard)
|
||||||
assert.Equal(PollID(1455611588854140929), poll.ID)
|
assert.Equal(PollID(1455611588854140929), poll.ID)
|
||||||
assert.Equal(4, poll.NumChoices)
|
assert.Equal(4, poll.NumChoices)
|
||||||
assert.Equal(60 * 60 * 24, poll.VotingDuration)
|
assert.Equal(60*60*24, poll.VotingDuration)
|
||||||
assert.Equal(int64(1635966221), poll.VotingEndsAt.Unix())
|
assert.Equal(int64(1635966221), poll.VotingEndsAt.Unix())
|
||||||
assert.Equal(int64(1635966226), poll.LastUpdatedAt.Unix())
|
assert.Equal(int64(1635966226), poll.LastUpdatedAt.Unix())
|
||||||
assert.Greater(poll.LastUpdatedAt.Unix(), poll.VotingEndsAt.Unix())
|
assert.Greater(poll.LastUpdatedAt.Unix(), poll.VotingEndsAt.Unix())
|
||||||
|
|
||||||
assert.Equal("Alec Baldwin", poll.Choice1)
|
assert.Equal("Alec Baldwin", poll.Choice1)
|
||||||
assert.Equal(1669, poll.Choice1_Votes)
|
assert.Equal(1669, poll.Choice1_Votes)
|
||||||
|
|
||||||
assert.Equal("Andew Cuomo", poll.Choice2)
|
assert.Equal("Andew Cuomo", poll.Choice2)
|
||||||
assert.Equal(272, poll.Choice2_Votes)
|
assert.Equal(272, poll.Choice2_Votes)
|
||||||
|
|
||||||
assert.Equal("George Floyd", poll.Choice3)
|
assert.Equal("George Floyd", poll.Choice3)
|
||||||
assert.Equal(829, poll.Choice3_Votes)
|
assert.Equal(829, poll.Choice3_Votes)
|
||||||
|
|
||||||
assert.Equal("Derek Chauvin", poll.Choice4)
|
assert.Equal("Derek Chauvin", poll.Choice4)
|
||||||
assert.Equal(2397, poll.Choice4_Votes)
|
assert.Equal(2397, poll.Choice4_Votes)
|
||||||
}
|
}
|
||||||
|
@ -1,12 +1,12 @@
|
|||||||
package scraper
|
package scraper
|
||||||
|
|
||||||
type Retweet struct {
|
type Retweet struct {
|
||||||
RetweetID TweetID
|
RetweetID TweetID
|
||||||
TweetID TweetID
|
TweetID TweetID
|
||||||
Tweet *Tweet
|
Tweet *Tweet
|
||||||
RetweetedByID UserID `db:"retweeted_by"`
|
RetweetedByID UserID `db:"retweeted_by"`
|
||||||
RetweetedBy *User
|
RetweetedBy *User
|
||||||
RetweetedAt Timestamp
|
RetweetedAt Timestamp
|
||||||
}
|
}
|
||||||
|
|
||||||
func ParseSingleRetweet(apiTweet APITweet) (ret Retweet, err error) {
|
func ParseSingleRetweet(apiTweet APITweet) (ret Retweet, err error) {
|
||||||
|
@ -5,8 +5,8 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
. "offline_twitter/scraper"
|
. "offline_twitter/scraper"
|
||||||
)
|
)
|
||||||
|
@ -5,7 +5,7 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func TimestampToDateString(timestamp int) string {
|
func TimestampToDateString(timestamp int) string {
|
||||||
panic("???") // TODO
|
panic("???") // TODO
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
package scraper
|
package scraper
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"time"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"strings"
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
"offline_twitter/terminal_utils"
|
"offline_twitter/terminal_utils"
|
||||||
)
|
)
|
||||||
@ -13,18 +13,18 @@ const DEFAULT_MAX_REPLIES_EAGER_LOAD = 50
|
|||||||
type TweetID int64
|
type TweetID int64
|
||||||
|
|
||||||
type Tweet struct {
|
type Tweet struct {
|
||||||
ID TweetID
|
ID TweetID
|
||||||
UserID UserID
|
UserID UserID
|
||||||
UserHandle UserHandle // For processing tombstones
|
UserHandle UserHandle // For processing tombstones
|
||||||
User *User
|
User *User
|
||||||
Text string
|
Text string
|
||||||
PostedAt Timestamp
|
PostedAt Timestamp
|
||||||
NumLikes int
|
NumLikes int
|
||||||
NumRetweets int
|
NumRetweets int
|
||||||
NumReplies int
|
NumReplies int
|
||||||
NumQuoteTweets int
|
NumQuoteTweets int
|
||||||
InReplyToID TweetID
|
InReplyToID TweetID
|
||||||
QuotedTweetID TweetID
|
QuotedTweetID TweetID
|
||||||
|
|
||||||
Images []Image
|
Images []Image
|
||||||
Videos []Video
|
Videos []Video
|
||||||
@ -35,14 +35,13 @@ type Tweet struct {
|
|||||||
Polls []Poll
|
Polls []Poll
|
||||||
|
|
||||||
TombstoneType string
|
TombstoneType string
|
||||||
IsStub bool
|
IsStub bool
|
||||||
|
|
||||||
IsContentDownloaded bool
|
IsContentDownloaded bool
|
||||||
IsConversationScraped bool
|
IsConversationScraped bool
|
||||||
LastScrapedAt Timestamp
|
LastScrapedAt Timestamp
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
func (t Tweet) String() string {
|
func (t Tweet) String() string {
|
||||||
var author string
|
var author string
|
||||||
if t.User != nil {
|
if t.User != nil {
|
||||||
@ -52,7 +51,7 @@ func (t Tweet) String() string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
ret := fmt.Sprintf(
|
ret := fmt.Sprintf(
|
||||||
`%s
|
`%s
|
||||||
%s
|
%s
|
||||||
%s
|
%s
|
||||||
Replies: %d RT: %d QT: %d Likes: %d
|
Replies: %d RT: %d QT: %d Likes: %d
|
||||||
@ -67,11 +66,11 @@ Replies: %d RT: %d QT: %d Likes: %d
|
|||||||
)
|
)
|
||||||
|
|
||||||
if len(t.Images) > 0 {
|
if len(t.Images) > 0 {
|
||||||
ret += fmt.Sprintf(terminal_utils.COLOR_GREEN + "images: %d\n" + terminal_utils.COLOR_RESET, len(t.Images))
|
ret += fmt.Sprintf(terminal_utils.COLOR_GREEN+"images: %d\n"+terminal_utils.COLOR_RESET, len(t.Images))
|
||||||
}
|
}
|
||||||
if len(t.Urls) > 0 {
|
if len(t.Urls) > 0 {
|
||||||
ret += "urls: [\n"
|
ret += "urls: [\n"
|
||||||
for _, url := range(t.Urls) {
|
for _, url := range t.Urls {
|
||||||
ret += " " + url.Text + "\n"
|
ret += " " + url.Text + "\n"
|
||||||
}
|
}
|
||||||
ret += "]"
|
ret += "]"
|
||||||
@ -90,7 +89,7 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
|
|||||||
ret.Text = apiTweet.FullText
|
ret.Text = apiTweet.FullText
|
||||||
|
|
||||||
// Process "posted-at" date and time
|
// Process "posted-at" date and time
|
||||||
if apiTweet.TombstoneText == "" { // Skip time parsing for tombstones
|
if apiTweet.TombstoneText == "" { // Skip time parsing for tombstones
|
||||||
ret.PostedAt, err = TimestampFromString(apiTweet.CreatedAt)
|
ret.PostedAt, err = TimestampFromString(apiTweet.CreatedAt)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return Tweet{}, fmt.Errorf("Error parsing time on tweet ID %d:\n %w", ret.ID, err)
|
return Tweet{}, fmt.Errorf("Error parsing time on tweet ID %d:\n %w", ret.ID, err)
|
||||||
@ -125,7 +124,7 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
|
|||||||
|
|
||||||
// Process images
|
// Process images
|
||||||
for _, media := range apiTweet.Entities.Media {
|
for _, media := range apiTweet.Entities.Media {
|
||||||
if media.Type != "photo" { // TODO: remove this eventually
|
if media.Type != "photo" { // TODO: remove this eventually
|
||||||
panic(fmt.Errorf("Unknown media type %q:\n %w", media.Type, EXTERNAL_API_ERROR))
|
panic(fmt.Errorf("Unknown media type %q:\n %w", media.Type, EXTERNAL_API_ERROR))
|
||||||
}
|
}
|
||||||
new_image := ParseAPIMedia(media)
|
new_image := ParseAPIMedia(media)
|
||||||
@ -151,7 +150,6 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Process videos
|
// Process videos
|
||||||
for _, entity := range apiTweet.ExtendedEntities.Media {
|
for _, entity := range apiTweet.ExtendedEntities.Media {
|
||||||
if entity.Type != "video" && entity.Type != "animated_gif" {
|
if entity.Type != "video" && entity.Type != "animated_gif" {
|
||||||
@ -175,13 +173,12 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
|
|||||||
// Process tombstones and other metadata
|
// Process tombstones and other metadata
|
||||||
ret.TombstoneType = apiTweet.TombstoneText
|
ret.TombstoneType = apiTweet.TombstoneText
|
||||||
ret.IsStub = !(ret.TombstoneType == "")
|
ret.IsStub = !(ret.TombstoneType == "")
|
||||||
ret.LastScrapedAt = TimestampFromUnix(0) // Caller will change this for the tweet that was actually scraped
|
ret.LastScrapedAt = TimestampFromUnix(0) // Caller will change this for the tweet that was actually scraped
|
||||||
ret.IsConversationScraped = false // Safe due to the "No Worsening" principle
|
ret.IsConversationScraped = false // Safe due to the "No Worsening" principle
|
||||||
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get a single tweet with no replies from the API.
|
* Get a single tweet with no replies from the API.
|
||||||
*
|
*
|
||||||
@ -206,7 +203,6 @@ func GetTweet(id TweetID) (Tweet, error) {
|
|||||||
return ParseSingleTweet(single_tweet)
|
return ParseSingleTweet(single_tweet)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return a list of tweets, including the original and the rest of its thread,
|
* Return a list of tweets, including the original and the rest of its thread,
|
||||||
* along with a list of associated users.
|
* along with a list of associated users.
|
||||||
@ -227,7 +223,7 @@ func GetTweetFull(id TweetID) (trove TweetTrove, err error) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
if len(tweet_response.GlobalObjects.Tweets) < DEFAULT_MAX_REPLIES_EAGER_LOAD &&
|
if len(tweet_response.GlobalObjects.Tweets) < DEFAULT_MAX_REPLIES_EAGER_LOAD &&
|
||||||
tweet_response.GetCursor() != "" {
|
tweet_response.GetCursor() != "" {
|
||||||
err = api.GetMoreReplies(id, &tweet_response, DEFAULT_MAX_REPLIES_EAGER_LOAD)
|
err = api.GetMoreReplies(id, &tweet_response, DEFAULT_MAX_REPLIES_EAGER_LOAD)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
err = fmt.Errorf("Error getting more tweet replies: %d\n %w", id, err)
|
err = fmt.Errorf("Error getting more tweet replies: %d\n %w", id, err)
|
||||||
|
@ -5,13 +5,13 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
. "offline_twitter/scraper"
|
. "offline_twitter/scraper"
|
||||||
)
|
)
|
||||||
|
|
||||||
func load_tweet_from_file(filename string) Tweet{
|
func load_tweet_from_file(filename string) Tweet {
|
||||||
data, err := os.ReadFile(filename)
|
data, err := os.ReadFile(filename)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
@ -28,12 +28,11 @@ func load_tweet_from_file(filename string) Tweet{
|
|||||||
return tweet
|
return tweet
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
func TestParseSingleTweet(t *testing.T) {
|
func TestParseSingleTweet(t *testing.T) {
|
||||||
assert := assert.New(t)
|
assert := assert.New(t)
|
||||||
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_unicode_chars.json")
|
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_unicode_chars.json")
|
||||||
|
|
||||||
assert.Equal("The fact that @michaelmalice new book ‘The Anarchist Handbook’ is just absolutely destroying on the charts is the " +
|
assert.Equal("The fact that @michaelmalice new book ‘The Anarchist Handbook’ is just absolutely destroying on the charts is the "+
|
||||||
"largest white pill I’ve swallowed in years.", tweet.Text)
|
"largest white pill I’ve swallowed in years.", tweet.Text)
|
||||||
assert.Len(tweet.Mentions, 1)
|
assert.Len(tweet.Mentions, 1)
|
||||||
assert.Contains(tweet.Mentions, UserHandle("michaelmalice"))
|
assert.Contains(tweet.Mentions, UserHandle("michaelmalice"))
|
||||||
@ -73,7 +72,7 @@ func TestParseTweetWithQuotedTweetAndLink(t *testing.T) {
|
|||||||
assert := assert.New(t)
|
assert := assert.New(t)
|
||||||
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_quoted_tweet_and_url.json")
|
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_quoted_tweet_and_url.json")
|
||||||
|
|
||||||
assert.Equal("This is video he’s talking about. Please watch. Is there a single US politician capable of doing this with the " +
|
assert.Equal("This is video he’s talking about. Please watch. Is there a single US politician capable of doing this with the "+
|
||||||
"weasels and rats running American industry today?", tweet.Text)
|
"weasels and rats running American industry today?", tweet.Text)
|
||||||
assert.Equal(TweetID(1497997890999898115), tweet.QuotedTweetID)
|
assert.Equal(TweetID(1497997890999898115), tweet.QuotedTweetID)
|
||||||
|
|
||||||
@ -135,7 +134,7 @@ func TestParseTweetWithMultipleUrls(t *testing.T) {
|
|||||||
|
|
||||||
assert.False(tweet.Urls[0].HasCard)
|
assert.False(tweet.Urls[0].HasCard)
|
||||||
assert.False(tweet.Urls[1].HasCard)
|
assert.False(tweet.Urls[1].HasCard)
|
||||||
assert.True (tweet.Urls[2].HasCard)
|
assert.True(tweet.Urls[2].HasCard)
|
||||||
|
|
||||||
assert.Equal("Biden’s victory came from the suburbs", tweet.Urls[2].Title)
|
assert.Equal("Biden’s victory came from the suburbs", tweet.Urls[2].Title)
|
||||||
}
|
}
|
||||||
@ -166,12 +165,11 @@ func TestTweetWithPoll(t *testing.T) {
|
|||||||
assert.Equal(624, p.Choice2_Votes)
|
assert.Equal(624, p.Choice2_Votes)
|
||||||
assert.Equal(778, p.Choice3_Votes)
|
assert.Equal(778, p.Choice3_Votes)
|
||||||
assert.Equal(1138, p.Choice4_Votes)
|
assert.Equal(1138, p.Choice4_Votes)
|
||||||
assert.Equal(1440 * 60, p.VotingDuration)
|
assert.Equal(1440*60, p.VotingDuration)
|
||||||
assert.Equal(int64(1638331934), p.VotingEndsAt.Unix())
|
assert.Equal(int64(1638331934), p.VotingEndsAt.Unix())
|
||||||
assert.Equal(int64(1638331935), p.LastUpdatedAt.Unix())
|
assert.Equal(int64(1638331935), p.LastUpdatedAt.Unix())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
func TestParseTweetResponse(t *testing.T) {
|
func TestParseTweetResponse(t *testing.T) {
|
||||||
assert := assert.New(t)
|
assert := assert.New(t)
|
||||||
data, err := os.ReadFile("test_responses/michael_malice_feed.json")
|
data, err := os.ReadFile("test_responses/michael_malice_feed.json")
|
||||||
@ -186,7 +184,7 @@ func TestParseTweetResponse(t *testing.T) {
|
|||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
tweets, retweets, users := trove.Transform()
|
tweets, retweets, users := trove.Transform()
|
||||||
|
|
||||||
assert.Len(tweets, 29 - 3)
|
assert.Len(tweets, 29-3)
|
||||||
assert.Len(retweets, 3)
|
assert.Len(retweets, 3)
|
||||||
assert.Len(users, 9)
|
assert.Len(users, 9)
|
||||||
}
|
}
|
||||||
|
@ -8,9 +8,9 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
type TweetTrove struct {
|
type TweetTrove struct {
|
||||||
Tweets map[TweetID]Tweet
|
Tweets map[TweetID]Tweet
|
||||||
Users map[UserID]User
|
Users map[UserID]User
|
||||||
Retweets map[TweetID]Retweet
|
Retweets map[TweetID]Retweet
|
||||||
|
|
||||||
TombstoneUsers []UserHandle
|
TombstoneUsers []UserHandle
|
||||||
}
|
}
|
||||||
@ -38,7 +38,7 @@ func (trove TweetTrove) Transform() (tweets []Tweet, retweets []Retweet, users [
|
|||||||
retweets = append(retweets, val)
|
retweets = append(retweets, val)
|
||||||
}
|
}
|
||||||
return
|
return
|
||||||
} // TODO: refactor until this function isn't needed anymore
|
} // TODO: refactor until this function isn't needed anymore
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Search for a user by handle. Second param is whether the user was found or not.
|
* Search for a user by handle. Second param is whether the user was found or not.
|
||||||
|
@ -2,28 +2,28 @@ package scraper
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"net/url"
|
||||||
"path"
|
"path"
|
||||||
"regexp"
|
"regexp"
|
||||||
"net/url"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type Url struct {
|
type Url struct {
|
||||||
TweetID TweetID
|
TweetID TweetID
|
||||||
|
|
||||||
Domain string
|
Domain string
|
||||||
Text string
|
Text string
|
||||||
ShortText string
|
ShortText string
|
||||||
Title string
|
Title string
|
||||||
Description string
|
Description string
|
||||||
ThumbnailWidth int
|
ThumbnailWidth int
|
||||||
ThumbnailHeight int
|
ThumbnailHeight int
|
||||||
ThumbnailRemoteUrl string
|
ThumbnailRemoteUrl string
|
||||||
ThumbnailLocalPath string
|
ThumbnailLocalPath string
|
||||||
CreatorID UserID
|
CreatorID UserID
|
||||||
SiteID UserID
|
SiteID UserID
|
||||||
|
|
||||||
HasCard bool
|
HasCard bool
|
||||||
HasThumbnail bool
|
HasThumbnail bool
|
||||||
IsContentDownloaded bool
|
IsContentDownloaded bool
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -86,7 +86,7 @@ func TryParseTweetUrl(url string) (UserHandle, TweetID, bool) {
|
|||||||
if matches == nil {
|
if matches == nil {
|
||||||
return UserHandle(""), TweetID(0), false
|
return UserHandle(""), TweetID(0), false
|
||||||
}
|
}
|
||||||
if len(matches) != 3 { // matches[0] is the full string
|
if len(matches) != 3 { // matches[0] is the full string
|
||||||
panic(matches)
|
panic(matches)
|
||||||
}
|
}
|
||||||
return UserHandle(matches[1]), TweetID(int_or_panic(matches[2])), true
|
return UserHandle(matches[1]), TweetID(int_or_panic(matches[2])), true
|
||||||
|
@ -1,153 +1,153 @@
|
|||||||
package scraper_test
|
package scraper_test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"testing"
|
"encoding/json"
|
||||||
"os"
|
"os"
|
||||||
"encoding/json"
|
"testing"
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
. "offline_twitter/scraper"
|
. "offline_twitter/scraper"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestParseAPIUrlCard(t *testing.T) {
|
func TestParseAPIUrlCard(t *testing.T) {
|
||||||
assert := assert.New(t)
|
assert := assert.New(t)
|
||||||
data, err := os.ReadFile("test_responses/tweet_content/url_card.json")
|
data, err := os.ReadFile("test_responses/tweet_content/url_card.json")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
var apiCard APICard
|
var apiCard APICard
|
||||||
err = json.Unmarshal(data, &apiCard)
|
err = json.Unmarshal(data, &apiCard)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
url := ParseAPIUrlCard(apiCard)
|
url := ParseAPIUrlCard(apiCard)
|
||||||
assert.Equal("reason.com", url.Domain)
|
assert.Equal("reason.com", url.Domain)
|
||||||
assert.Equal("L.A. Teachers Union Leader: 'There's No Such Thing As Learning Loss'", url.Title)
|
assert.Equal("L.A. Teachers Union Leader: 'There's No Such Thing As Learning Loss'", url.Title)
|
||||||
assert.Equal("\"It’s OK that our babies may not have learned all their times tables,\" says Cecily Myart-Cruz. \"They learned " +
|
assert.Equal("\"It’s OK that our babies may not have learned all their times tables,\" says Cecily Myart-Cruz. \"They learned "+
|
||||||
"resilience.\"", url.Description)
|
"resilience.\"", url.Description)
|
||||||
assert.Equal(600, url.ThumbnailWidth)
|
assert.Equal(600, url.ThumbnailWidth)
|
||||||
assert.Equal(315, url.ThumbnailHeight)
|
assert.Equal(315, url.ThumbnailHeight)
|
||||||
assert.Equal("https://pbs.twimg.com/card_img/1434998862305968129/odDi9EqO?format=jpg&name=600x600", url.ThumbnailRemoteUrl)
|
assert.Equal("https://pbs.twimg.com/card_img/1434998862305968129/odDi9EqO?format=jpg&name=600x600", url.ThumbnailRemoteUrl)
|
||||||
assert.Equal("odDi9EqO_600x600.jpg", url.ThumbnailLocalPath)
|
assert.Equal("odDi9EqO_600x600.jpg", url.ThumbnailLocalPath)
|
||||||
assert.Equal(UserID(155581583), url.CreatorID)
|
assert.Equal(UserID(155581583), url.CreatorID)
|
||||||
assert.Equal(UserID(16467567), url.SiteID)
|
assert.Equal(UserID(16467567), url.SiteID)
|
||||||
assert.True(url.HasThumbnail)
|
assert.True(url.HasThumbnail)
|
||||||
assert.False(url.IsContentDownloaded)
|
assert.False(url.IsContentDownloaded)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestParseAPIUrlCardWithPlayer(t *testing.T) {
|
func TestParseAPIUrlCardWithPlayer(t *testing.T) {
|
||||||
assert := assert.New(t)
|
assert := assert.New(t)
|
||||||
data, err := os.ReadFile("test_responses/tweet_content/url_card_with_player.json")
|
data, err := os.ReadFile("test_responses/tweet_content/url_card_with_player.json")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
var apiCard APICard
|
var apiCard APICard
|
||||||
err = json.Unmarshal(data, &apiCard)
|
err = json.Unmarshal(data, &apiCard)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
url := ParseAPIUrlCard(apiCard)
|
url := ParseAPIUrlCard(apiCard)
|
||||||
assert.Equal("www.youtube.com", url.Domain)
|
assert.Equal("www.youtube.com", url.Domain)
|
||||||
assert.Equal("The Politically Incorrect Guide to the Constitution (Starring Tom...", url.Title)
|
assert.Equal("The Politically Incorrect Guide to the Constitution (Starring Tom...", url.Title)
|
||||||
assert.Equal("Watch this episode on LBRY/Odysee: https://odysee.com/@capitalresearch:5/the-politically-incorrect-guide-to-the:8" +
|
assert.Equal("Watch this episode on LBRY/Odysee: https://odysee.com/@capitalresearch:5/the-politically-incorrect-guide-to-the:8"+
|
||||||
"Watch this episode on Rumble: https://rumble...", url.Description)
|
"Watch this episode on Rumble: https://rumble...", url.Description)
|
||||||
assert.Equal("https://pbs.twimg.com/card_img/1437849456423194639/_1t0btyt?format=jpg&name=800x320_1", url.ThumbnailRemoteUrl)
|
assert.Equal("https://pbs.twimg.com/card_img/1437849456423194639/_1t0btyt?format=jpg&name=800x320_1", url.ThumbnailRemoteUrl)
|
||||||
assert.Equal("_1t0btyt_800x320_1.jpg", url.ThumbnailLocalPath)
|
assert.Equal("_1t0btyt_800x320_1.jpg", url.ThumbnailLocalPath)
|
||||||
assert.Equal(UserID(10228272), url.SiteID)
|
assert.Equal(UserID(10228272), url.SiteID)
|
||||||
assert.True(url.HasThumbnail)
|
assert.True(url.HasThumbnail)
|
||||||
assert.False(url.IsContentDownloaded)
|
assert.False(url.IsContentDownloaded)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestParseAPIUrlCardWithPlayerAndPlaceholderThumbnail(t *testing.T) {
|
func TestParseAPIUrlCardWithPlayerAndPlaceholderThumbnail(t *testing.T) {
|
||||||
assert := assert.New(t)
|
assert := assert.New(t)
|
||||||
data, err := os.ReadFile("test_responses/tweet_content/url_card_with_player_placeholder_image.json")
|
data, err := os.ReadFile("test_responses/tweet_content/url_card_with_player_placeholder_image.json")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
var apiCard APICard
|
var apiCard APICard
|
||||||
err = json.Unmarshal(data, &apiCard)
|
err = json.Unmarshal(data, &apiCard)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
url := ParseAPIUrlCard(apiCard)
|
url := ParseAPIUrlCard(apiCard)
|
||||||
assert.Equal("www.youtube.com", url.Domain)
|
assert.Equal("www.youtube.com", url.Domain)
|
||||||
assert.Equal("Did Michael Malice Turn Me into an Anarchist? | Ep 181", url.Title)
|
assert.Equal("Did Michael Malice Turn Me into an Anarchist? | Ep 181", url.Title)
|
||||||
assert.Equal("SUBSCRIBE TO THE NEW SHOW W/ ELIJAH & SYDNEY: \"YOU ARE HERE\"YT: https://www.youtube.com/youareheredaily____________" +
|
assert.Equal("SUBSCRIBE TO THE NEW SHOW W/ ELIJAH & SYDNEY: \"YOU ARE HERE\"YT: https://www.youtube.com/youareheredaily____________"+
|
||||||
"__________________________________________...", url.Description)
|
"__________________________________________...", url.Description)
|
||||||
assert.Equal("https://pbs.twimg.com/cards/player-placeholder.png", url.ThumbnailRemoteUrl)
|
assert.Equal("https://pbs.twimg.com/cards/player-placeholder.png", url.ThumbnailRemoteUrl)
|
||||||
assert.Equal("player-placeholder.png", url.ThumbnailLocalPath)
|
assert.Equal("player-placeholder.png", url.ThumbnailLocalPath)
|
||||||
assert.Equal(UserID(10228272), url.SiteID)
|
assert.Equal(UserID(10228272), url.SiteID)
|
||||||
assert.True(url.HasThumbnail)
|
assert.True(url.HasThumbnail)
|
||||||
assert.False(url.IsContentDownloaded)
|
assert.False(url.IsContentDownloaded)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestParseAPIUrlCardWithoutThumbnail(t *testing.T) {
|
func TestParseAPIUrlCardWithoutThumbnail(t *testing.T) {
|
||||||
assert := assert.New(t)
|
assert := assert.New(t)
|
||||||
data, err := os.ReadFile("test_responses/tweet_content/url_card_without_thumbnail.json")
|
data, err := os.ReadFile("test_responses/tweet_content/url_card_without_thumbnail.json")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
var apiCard APICard
|
var apiCard APICard
|
||||||
err = json.Unmarshal(data, &apiCard)
|
err = json.Unmarshal(data, &apiCard)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
url := ParseAPIUrlCard(apiCard)
|
url := ParseAPIUrlCard(apiCard)
|
||||||
assert.Equal("en.m.wikipedia.org", url.Domain)
|
assert.Equal("en.m.wikipedia.org", url.Domain)
|
||||||
assert.Equal("Entryism - Wikipedia", url.Title)
|
assert.Equal("Entryism - Wikipedia", url.Title)
|
||||||
assert.Equal("", url.Description)
|
assert.Equal("", url.Description)
|
||||||
assert.True(url.HasCard)
|
assert.True(url.HasCard)
|
||||||
assert.False(url.HasThumbnail)
|
assert.False(url.HasThumbnail)
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Should check if a url is a tweet url, and if so, parse it
|
* Should check if a url is a tweet url, and if so, parse it
|
||||||
*/
|
*/
|
||||||
func TestParseTweetUrl(t *testing.T) {
|
func TestParseTweetUrl(t *testing.T) {
|
||||||
assert:= assert.New(t)
|
assert := assert.New(t)
|
||||||
|
|
||||||
// Test valid tweet url
|
// Test valid tweet url
|
||||||
url := "https://twitter.com/kanesays23/status/1429583672827465730"
|
url := "https://twitter.com/kanesays23/status/1429583672827465730"
|
||||||
handle, id, is_ok := TryParseTweetUrl(url)
|
handle, id, is_ok := TryParseTweetUrl(url)
|
||||||
assert.True(is_ok)
|
assert.True(is_ok)
|
||||||
assert.Equal(UserHandle("kanesays23"), handle)
|
assert.Equal(UserHandle("kanesays23"), handle)
|
||||||
assert.Equal(TweetID(1429583672827465730), id)
|
assert.Equal(TweetID(1429583672827465730), id)
|
||||||
|
|
||||||
// Test url with GET params
|
// Test url with GET params
|
||||||
handle, id, is_ok = TryParseTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720?s=20")
|
handle, id, is_ok = TryParseTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720?s=20")
|
||||||
assert.True(is_ok)
|
assert.True(is_ok)
|
||||||
assert.Equal(UserHandle("NerdNoticing"), handle)
|
assert.Equal(UserHandle("NerdNoticing"), handle)
|
||||||
assert.Equal(TweetID(1263192389050654720), id)
|
assert.Equal(TweetID(1263192389050654720), id)
|
||||||
|
|
||||||
// Test invalid url
|
// Test invalid url
|
||||||
_, _, is_ok = TryParseTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720s=20")
|
_, _, is_ok = TryParseTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720s=20")
|
||||||
assert.False(is_ok)
|
assert.False(is_ok)
|
||||||
|
|
||||||
// Test empty string
|
// Test empty string
|
||||||
_, _, is_ok = TryParseTweetUrl("")
|
_, _, is_ok = TryParseTweetUrl("")
|
||||||
assert.False(is_ok)
|
assert.False(is_ok)
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Should extract a user handle from a tweet URL, or fail if URL is invalid
|
* Should extract a user handle from a tweet URL, or fail if URL is invalid
|
||||||
*/
|
*/
|
||||||
func TestParseHandleFromTweetUrl(t *testing.T) {
|
func TestParseHandleFromTweetUrl(t *testing.T) {
|
||||||
assert := assert.New(t)
|
assert := assert.New(t)
|
||||||
|
|
||||||
// Test valid tweet url
|
// Test valid tweet url
|
||||||
url := "https://twitter.com/kanesays23/status/1429583672827465730"
|
url := "https://twitter.com/kanesays23/status/1429583672827465730"
|
||||||
result, err := ParseHandleFromTweetUrl(url)
|
result, err := ParseHandleFromTweetUrl(url)
|
||||||
assert.NoError(err)
|
assert.NoError(err)
|
||||||
assert.Equal(UserHandle("kanesays23"), result)
|
assert.Equal(UserHandle("kanesays23"), result)
|
||||||
|
|
||||||
// Test url with GET params
|
// Test url with GET params
|
||||||
result, err = ParseHandleFromTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720?s=20")
|
result, err = ParseHandleFromTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720?s=20")
|
||||||
assert.NoError(err)
|
assert.NoError(err)
|
||||||
assert.Equal(UserHandle("NerdNoticing"), result)
|
assert.Equal(UserHandle("NerdNoticing"), result)
|
||||||
|
|
||||||
// Test invalid url
|
// Test invalid url
|
||||||
_, err = ParseHandleFromTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720s=20")
|
_, err = ParseHandleFromTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720s=20")
|
||||||
assert.Error(err)
|
assert.Error(err)
|
||||||
|
|
||||||
// Test empty string
|
// Test empty string
|
||||||
_, err = ParseHandleFromTweetUrl("")
|
_, err = ParseHandleFromTweetUrl("")
|
||||||
assert.Error(err)
|
assert.Error(err)
|
||||||
}
|
}
|
||||||
|
292
scraper/user.go
292
scraper/user.go
@ -1,12 +1,12 @@
|
|||||||
package scraper
|
package scraper
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"strings"
|
"path"
|
||||||
"regexp"
|
"regexp"
|
||||||
"path"
|
"strings"
|
||||||
|
|
||||||
"offline_twitter/terminal_utils"
|
"offline_twitter/terminal_utils"
|
||||||
)
|
)
|
||||||
|
|
||||||
const DEFAULT_PROFILE_IMAGE_URL = "https://abs.twimg.com/sticky/default_profile_images/default_profile.png"
|
const DEFAULT_PROFILE_IMAGE_URL = "https://abs.twimg.com/sticky/default_profile_images/default_profile.png"
|
||||||
@ -15,47 +15,48 @@ type UserID int64
|
|||||||
type UserHandle string
|
type UserHandle string
|
||||||
|
|
||||||
func JoinArrayOfHandles(handles []UserHandle) string {
|
func JoinArrayOfHandles(handles []UserHandle) string {
|
||||||
ret := []string{}
|
ret := []string{}
|
||||||
for _, h := range handles {
|
for _, h := range handles {
|
||||||
ret = append(ret, string(h))
|
ret = append(ret, string(h))
|
||||||
}
|
}
|
||||||
return strings.Join(ret, ",")
|
return strings.Join(ret, ",")
|
||||||
}
|
}
|
||||||
|
|
||||||
type User struct {
|
type User struct {
|
||||||
ID UserID
|
ID UserID
|
||||||
DisplayName string
|
DisplayName string
|
||||||
Handle UserHandle
|
Handle UserHandle
|
||||||
Bio string
|
Bio string
|
||||||
FollowingCount int
|
FollowingCount int
|
||||||
FollowersCount int
|
FollowersCount int
|
||||||
Location string
|
Location string
|
||||||
Website string
|
Website string
|
||||||
JoinDate Timestamp
|
JoinDate Timestamp
|
||||||
IsPrivate bool
|
IsPrivate bool
|
||||||
IsVerified bool
|
IsVerified bool
|
||||||
IsBanned bool
|
IsBanned bool
|
||||||
ProfileImageUrl string
|
IsDeleted bool
|
||||||
ProfileImageLocalPath string
|
ProfileImageUrl string
|
||||||
BannerImageUrl string
|
ProfileImageLocalPath string
|
||||||
BannerImageLocalPath string
|
BannerImageUrl string
|
||||||
|
BannerImageLocalPath string
|
||||||
|
|
||||||
PinnedTweetID TweetID
|
PinnedTweetID TweetID
|
||||||
PinnedTweet *Tweet
|
PinnedTweet *Tweet
|
||||||
|
|
||||||
IsFollowed bool
|
IsFollowed bool
|
||||||
IsContentDownloaded bool
|
IsContentDownloaded bool
|
||||||
IsNeedingFakeID bool
|
IsNeedingFakeID bool
|
||||||
IsIdFake bool
|
IsIdFake bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func (u User) String() string {
|
func (u User) String() string {
|
||||||
var verified string
|
var verified string
|
||||||
if u.IsVerified {
|
if u.IsVerified {
|
||||||
verified = "[\u2713]"
|
verified = "[\u2713]"
|
||||||
}
|
}
|
||||||
ret := fmt.Sprintf(
|
ret := fmt.Sprintf(
|
||||||
`%s%s
|
`%s%s
|
||||||
@%s
|
@%s
|
||||||
%s
|
%s
|
||||||
|
|
||||||
@ -65,115 +66,112 @@ Joined %s
|
|||||||
%s
|
%s
|
||||||
%s
|
%s
|
||||||
`,
|
`,
|
||||||
u.DisplayName,
|
u.DisplayName,
|
||||||
verified,
|
verified,
|
||||||
u.Handle,
|
u.Handle,
|
||||||
terminal_utils.WrapText(u.Bio, 60),
|
terminal_utils.WrapText(u.Bio, 60),
|
||||||
u.FollowingCount,
|
u.FollowingCount,
|
||||||
u.FollowersCount,
|
u.FollowersCount,
|
||||||
terminal_utils.FormatDate(u.JoinDate.Time),
|
terminal_utils.FormatDate(u.JoinDate.Time),
|
||||||
u.Location,
|
u.Location,
|
||||||
u.Website,
|
u.Website,
|
||||||
)
|
)
|
||||||
if u.PinnedTweet != nil {
|
if u.PinnedTweet != nil {
|
||||||
ret += "\n" + terminal_utils.WrapText(u.PinnedTweet.Text, 60)
|
ret += "\n" + terminal_utils.WrapText(u.PinnedTweet.Text, 60)
|
||||||
} else {
|
} else {
|
||||||
println("Pinned tweet id:", u.PinnedTweetID)
|
println("Pinned tweet id:", u.PinnedTweetID)
|
||||||
}
|
}
|
||||||
return ret
|
return ret
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Unknown Users with handles are only created by direct GetUser calls (either `twitter fetch_user`
|
* Unknown Users with handles are only created by direct GetUser calls (either `twitter fetch_user`
|
||||||
* subcommand or as part of tombstone user fetching.)
|
* subcommand or as part of tombstone user fetching.)
|
||||||
*/
|
*/
|
||||||
func GetUnknownUserWithHandle(handle UserHandle) User {
|
func GetUnknownUserWithHandle(handle UserHandle) User {
|
||||||
return User{
|
return User{
|
||||||
ID: UserID(0), // 2^62 + 1...
|
ID: UserID(0), // 2^62 + 1...
|
||||||
DisplayName: string(handle),
|
DisplayName: string(handle),
|
||||||
Handle: handle,
|
Handle: handle,
|
||||||
Bio: "<blank>",
|
Bio: "<blank>",
|
||||||
FollowersCount: 0,
|
FollowersCount: 0,
|
||||||
FollowingCount: 0,
|
FollowingCount: 0,
|
||||||
Location: "<blank>",
|
Location: "<blank>",
|
||||||
Website:"<blank>",
|
Website: "<blank>",
|
||||||
JoinDate: TimestampFromUnix(0),
|
JoinDate: TimestampFromUnix(0),
|
||||||
IsVerified: false,
|
IsVerified: false,
|
||||||
IsPrivate: false,
|
IsPrivate: false,
|
||||||
IsNeedingFakeID: true,
|
IsNeedingFakeID: true,
|
||||||
IsIdFake: true,
|
IsIdFake: true,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Turn an APIUser, as returned from the scraper, into a properly structured User object
|
// Turn an APIUser, as returned from the scraper, into a properly structured User object
|
||||||
func ParseSingleUser(apiUser APIUser) (ret User, err error) {
|
func ParseSingleUser(apiUser APIUser) (ret User, err error) {
|
||||||
if apiUser.DoesntExist {
|
if apiUser.DoesntExist {
|
||||||
// User may have been deleted, or there was a typo. There's no data to parse
|
// User may have been deleted, or there was a typo. There's no data to parse
|
||||||
if apiUser.ScreenName == "" {
|
if apiUser.ScreenName == "" {
|
||||||
panic("ScreenName is empty!")
|
panic("ScreenName is empty!")
|
||||||
}
|
}
|
||||||
ret = GetUnknownUserWithHandle(UserHandle(apiUser.ScreenName))
|
ret = GetUnknownUserWithHandle(UserHandle(apiUser.ScreenName))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
ret.ID = UserID(apiUser.ID)
|
ret.ID = UserID(apiUser.ID)
|
||||||
ret.Handle = UserHandle(apiUser.ScreenName)
|
ret.Handle = UserHandle(apiUser.ScreenName)
|
||||||
if apiUser.IsBanned {
|
if apiUser.IsBanned {
|
||||||
// Banned users won't have any further info, so just return here
|
// Banned users won't have any further info, so just return here
|
||||||
ret.IsBanned = true
|
ret.IsBanned = true
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
ret.DisplayName = apiUser.Name
|
ret.DisplayName = apiUser.Name
|
||||||
ret.Bio = apiUser.Description
|
ret.Bio = apiUser.Description
|
||||||
ret.FollowingCount = apiUser.FriendsCount
|
ret.FollowingCount = apiUser.FriendsCount
|
||||||
ret.FollowersCount = apiUser.FollowersCount
|
ret.FollowersCount = apiUser.FollowersCount
|
||||||
ret.Location = apiUser.Location
|
ret.Location = apiUser.Location
|
||||||
if len(apiUser.Entities.URL.Urls) > 0 {
|
if len(apiUser.Entities.URL.Urls) > 0 {
|
||||||
ret.Website = apiUser.Entities.URL.Urls[0].ExpandedURL
|
ret.Website = apiUser.Entities.URL.Urls[0].ExpandedURL
|
||||||
}
|
}
|
||||||
ret.JoinDate, err = TimestampFromString(apiUser.CreatedAt)
|
ret.JoinDate, err = TimestampFromString(apiUser.CreatedAt)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
err = fmt.Errorf("Error parsing time on user ID %d: %w", ret.ID, err)
|
err = fmt.Errorf("Error parsing time on user ID %d: %w", ret.ID, err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
ret.IsPrivate = apiUser.Protected
|
ret.IsPrivate = apiUser.Protected
|
||||||
ret.IsVerified = apiUser.Verified
|
ret.IsVerified = apiUser.Verified
|
||||||
ret.ProfileImageUrl = apiUser.ProfileImageURLHTTPS
|
ret.ProfileImageUrl = apiUser.ProfileImageURLHTTPS
|
||||||
|
|
||||||
|
if regexp.MustCompile(`_normal\.\w{2,4}`).MatchString(ret.ProfileImageUrl) {
|
||||||
|
ret.ProfileImageUrl = strings.ReplaceAll(ret.ProfileImageUrl, "_normal.", ".")
|
||||||
|
}
|
||||||
|
ret.BannerImageUrl = apiUser.ProfileBannerURL
|
||||||
|
|
||||||
if regexp.MustCompile(`_normal\.\w{2,4}`).MatchString(ret.ProfileImageUrl) {
|
ret.ProfileImageLocalPath = ret.compute_profile_image_local_path()
|
||||||
ret.ProfileImageUrl = strings.ReplaceAll(ret.ProfileImageUrl, "_normal.", ".")
|
ret.BannerImageLocalPath = ret.compute_banner_image_local_path()
|
||||||
}
|
|
||||||
ret.BannerImageUrl = apiUser.ProfileBannerURL
|
|
||||||
|
|
||||||
ret.ProfileImageLocalPath = ret.compute_profile_image_local_path()
|
if len(apiUser.PinnedTweetIdsStr) > 0 {
|
||||||
ret.BannerImageLocalPath = ret.compute_banner_image_local_path()
|
ret.PinnedTweetID = TweetID(idstr_to_int(apiUser.PinnedTweetIdsStr[0]))
|
||||||
|
}
|
||||||
if len(apiUser.PinnedTweetIdsStr) > 0 {
|
return
|
||||||
ret.PinnedTweetID = TweetID(idstr_to_int(apiUser.PinnedTweetIdsStr[0]))
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Calls API#GetUser and returns the parsed result
|
// Calls API#GetUser and returns the parsed result
|
||||||
func GetUser(handle UserHandle) (User, error) {
|
func GetUser(handle UserHandle) (User, error) {
|
||||||
api := API{}
|
api := API{}
|
||||||
apiUser, err := api.GetUser(handle)
|
apiUser, err := api.GetUser(handle)
|
||||||
if apiUser.ScreenName == "" {
|
if apiUser.ScreenName == "" {
|
||||||
apiUser.ScreenName = string(handle)
|
apiUser.ScreenName = string(handle)
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return User{}, err
|
return User{}, err
|
||||||
}
|
}
|
||||||
return ParseSingleUser(apiUser)
|
return ParseSingleUser(apiUser)
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Make a filename for the profile image, that hopefully won't clobber other ones
|
* Make a filename for the profile image, that hopefully won't clobber other ones
|
||||||
*/
|
*/
|
||||||
func (u User) compute_profile_image_local_path() string {
|
func (u User) compute_profile_image_local_path() string {
|
||||||
return string(u.Handle) + "_profile_" + path.Base(u.ProfileImageUrl)
|
return string(u.Handle) + "_profile_" + path.Base(u.ProfileImageUrl)
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -182,34 +180,34 @@ func (u User) compute_profile_image_local_path() string {
|
|||||||
* If there is no banner image, just return nothing.
|
* If there is no banner image, just return nothing.
|
||||||
*/
|
*/
|
||||||
func (u User) compute_banner_image_local_path() string {
|
func (u User) compute_banner_image_local_path() string {
|
||||||
if u.BannerImageUrl == "" {
|
if u.BannerImageUrl == "" {
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
base_name := path.Base(u.BannerImageUrl)
|
base_name := path.Base(u.BannerImageUrl)
|
||||||
|
|
||||||
// Check if it has an extension (e.g., ".png" or ".jpeg")
|
// Check if it has an extension (e.g., ".png" or ".jpeg")
|
||||||
if !regexp.MustCompile(`\.\w{2,4}$`).MatchString(base_name) {
|
if !regexp.MustCompile(`\.\w{2,4}$`).MatchString(base_name) {
|
||||||
// If it doesn't have an extension, add one
|
// If it doesn't have an extension, add one
|
||||||
base_name += ".jpg"
|
base_name += ".jpg"
|
||||||
}
|
}
|
||||||
return string(u.Handle) + "_banner_" + base_name
|
return string(u.Handle) + "_banner_" + base_name
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the URL where we would expect to find a User's tiny profile image
|
* Get the URL where we would expect to find a User's tiny profile image
|
||||||
*/
|
*/
|
||||||
func (u User) GetTinyProfileImageUrl() string {
|
func (u User) GetTinyProfileImageUrl() string {
|
||||||
// If profile image is empty, then just use the default profile image
|
// If profile image is empty, then just use the default profile image
|
||||||
if u.ProfileImageUrl == "" {
|
if u.ProfileImageUrl == "" {
|
||||||
return DEFAULT_PROFILE_IMAGE_URL
|
return DEFAULT_PROFILE_IMAGE_URL
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check that the format is as expected
|
// Check that the format is as expected
|
||||||
r := regexp.MustCompile(`(\.\w{2,4})$`)
|
r := regexp.MustCompile(`(\.\w{2,4})$`)
|
||||||
if !r.MatchString(u.ProfileImageUrl) {
|
if !r.MatchString(u.ProfileImageUrl) {
|
||||||
panic(fmt.Errorf("Weird profile image url (here is the file extension?): %s", u.ProfileImageUrl))
|
panic(fmt.Errorf("Weird profile image url (here is the file extension?): %s", u.ProfileImageUrl))
|
||||||
}
|
}
|
||||||
return r.ReplaceAllString(u.ProfileImageUrl, "_normal$1")
|
return r.ReplaceAllString(u.ProfileImageUrl, "_normal$1")
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -217,8 +215,8 @@ func (u User) GetTinyProfileImageUrl() string {
|
|||||||
* If user has a blank or default profile image, return a non-personalized default path.
|
* If user has a blank or default profile image, return a non-personalized default path.
|
||||||
*/
|
*/
|
||||||
func (u User) GetTinyProfileImageLocalPath() string {
|
func (u User) GetTinyProfileImageLocalPath() string {
|
||||||
if u.ProfileImageUrl == "" {
|
if u.ProfileImageUrl == "" {
|
||||||
return path.Base(u.GetTinyProfileImageUrl())
|
return path.Base(u.GetTinyProfileImageUrl())
|
||||||
}
|
}
|
||||||
return string(u.Handle) + "_profile_" + path.Base(u.GetTinyProfileImageUrl())
|
return string(u.Handle) + "_profile_" + path.Base(u.GetTinyProfileImageUrl())
|
||||||
}
|
}
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
package scraper
|
package scraper
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
|
||||||
"errors"
|
"errors"
|
||||||
|
"fmt"
|
||||||
)
|
)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -33,7 +33,6 @@ func GetUserFeedFor(user_id UserID, min_tweets int) (trove TweetTrove, err error
|
|||||||
return ParseTweetResponse(tweet_response)
|
return ParseTweetResponse(tweet_response)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
func GetUserFeedGraphqlFor(user_id UserID, min_tweets int) (trove TweetTrove, err error) {
|
func GetUserFeedGraphqlFor(user_id UserID, min_tweets int) (trove TweetTrove, err error) {
|
||||||
api := API{}
|
api := API{}
|
||||||
api_response, err := api.GetGraphqlFeedFor(user_id, "")
|
api_response, err := api.GetGraphqlFeedFor(user_id, "")
|
||||||
|
@ -1,14 +1,14 @@
|
|||||||
package scraper_test
|
package scraper_test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"testing"
|
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"os"
|
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"os"
|
||||||
|
"testing"
|
||||||
|
|
||||||
"github.com/jarcoal/httpmock"
|
"github.com/jarcoal/httpmock"
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
. "offline_twitter/scraper"
|
. "offline_twitter/scraper"
|
||||||
)
|
)
|
||||||
@ -31,7 +31,7 @@ func TestParseSingleUser(t *testing.T) {
|
|||||||
assert.Equal(UserID(44067298), user.ID)
|
assert.Equal(UserID(44067298), user.ID)
|
||||||
assert.Equal("Michael Malice", user.DisplayName)
|
assert.Equal("Michael Malice", user.DisplayName)
|
||||||
assert.Equal(UserHandle("michaelmalice"), user.Handle)
|
assert.Equal(UserHandle("michaelmalice"), user.Handle)
|
||||||
assert.Equal("Author of Dear Reader, The New Right & The Anarchist Handbook\nHost of \"YOUR WELCOME\" \nSubject of Ego & Hubris by " +
|
assert.Equal("Author of Dear Reader, The New Right & The Anarchist Handbook\nHost of \"YOUR WELCOME\" \nSubject of Ego & Hubris by "+
|
||||||
"Harvey Pekar\nUnderwear Model\nHe/Him ⚑", user.Bio)
|
"Harvey Pekar\nUnderwear Model\nHe/Him ⚑", user.Bio)
|
||||||
assert.Equal(941, user.FollowingCount)
|
assert.Equal(941, user.FollowingCount)
|
||||||
assert.Equal(208589, user.FollowersCount)
|
assert.Equal(208589, user.FollowersCount)
|
||||||
@ -39,7 +39,7 @@ func TestParseSingleUser(t *testing.T) {
|
|||||||
assert.Equal("https://amzn.to/3oInafv", user.Website)
|
assert.Equal("https://amzn.to/3oInafv", user.Website)
|
||||||
assert.Equal(int64(1243920952), user.JoinDate.Unix())
|
assert.Equal(int64(1243920952), user.JoinDate.Unix())
|
||||||
assert.False(user.IsPrivate)
|
assert.False(user.IsPrivate)
|
||||||
assert.True (user.IsVerified)
|
assert.True(user.IsVerified)
|
||||||
assert.False(user.IsBanned)
|
assert.False(user.IsBanned)
|
||||||
assert.Equal("https://pbs.twimg.com/profile_images/1064051934812913664/Lbwdb_C9.jpg", user.ProfileImageUrl)
|
assert.Equal("https://pbs.twimg.com/profile_images/1064051934812913664/Lbwdb_C9.jpg", user.ProfileImageUrl)
|
||||||
assert.Equal("https://pbs.twimg.com/profile_images/1064051934812913664/Lbwdb_C9_normal.jpg", user.GetTinyProfileImageUrl())
|
assert.Equal("https://pbs.twimg.com/profile_images/1064051934812913664/Lbwdb_C9_normal.jpg", user.GetTinyProfileImageUrl())
|
||||||
@ -90,7 +90,7 @@ func TestParseDeletedUser(t *testing.T) {
|
|||||||
handle := "Some Random Deleted User"
|
handle := "Some Random Deleted User"
|
||||||
|
|
||||||
apiUser := user_resp.ConvertToAPIUser()
|
apiUser := user_resp.ConvertToAPIUser()
|
||||||
apiUser.ScreenName = string(handle) // This is done in scraper.GetUser, since users are retrieved by handle anyway
|
apiUser.ScreenName = string(handle) // This is done in scraper.GetUser, since users are retrieved by handle anyway
|
||||||
|
|
||||||
user, err := ParseSingleUser(apiUser)
|
user, err := ParseSingleUser(apiUser)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
package scraper
|
package scraper
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"sort"
|
"path"
|
||||||
"path"
|
"sort"
|
||||||
)
|
)
|
||||||
|
|
||||||
type VideoID int64
|
type VideoID int64
|
||||||
@ -12,61 +12,61 @@ type VideoID int64
|
|||||||
// from someone else).
|
// from someone else).
|
||||||
|
|
||||||
type Video struct {
|
type Video struct {
|
||||||
ID VideoID
|
ID VideoID
|
||||||
TweetID TweetID
|
TweetID TweetID
|
||||||
Width int
|
Width int
|
||||||
Height int
|
Height int
|
||||||
RemoteURL string
|
RemoteURL string
|
||||||
LocalFilename string
|
LocalFilename string
|
||||||
|
|
||||||
ThumbnailRemoteUrl string
|
ThumbnailRemoteUrl string
|
||||||
ThumbnailLocalPath string `db:"thumbnail_local_filename"`
|
ThumbnailLocalPath string `db:"thumbnail_local_filename"`
|
||||||
Duration int // milliseconds
|
Duration int // milliseconds
|
||||||
ViewCount int
|
ViewCount int
|
||||||
|
|
||||||
IsDownloaded bool
|
IsDownloaded bool
|
||||||
IsGif bool
|
IsGif bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func ParseAPIVideo(apiVideo APIExtendedMedia, tweet_id TweetID) Video {
|
func ParseAPIVideo(apiVideo APIExtendedMedia, tweet_id TweetID) Video {
|
||||||
variants := apiVideo.VideoInfo.Variants
|
variants := apiVideo.VideoInfo.Variants
|
||||||
sort.Sort(variants)
|
sort.Sort(variants)
|
||||||
|
|
||||||
var view_count int
|
var view_count int
|
||||||
|
|
||||||
r := apiVideo.Ext.MediaStats.R
|
r := apiVideo.Ext.MediaStats.R
|
||||||
|
|
||||||
switch r.(type) {
|
switch r.(type) {
|
||||||
case string:
|
case string:
|
||||||
view_count = 0
|
view_count = 0
|
||||||
case map[string]interface{}:
|
case map[string]interface{}:
|
||||||
OK_entry, ok := r.(map[string]interface{})["ok"]
|
OK_entry, ok := r.(map[string]interface{})["ok"]
|
||||||
if !ok {
|
if !ok {
|
||||||
panic("No 'ok' value found in the R!")
|
panic("No 'ok' value found in the R!")
|
||||||
}
|
}
|
||||||
view_count_str, ok := OK_entry.(map[string]interface{})["viewCount"]
|
view_count_str, ok := OK_entry.(map[string]interface{})["viewCount"]
|
||||||
view_count = int_or_panic(view_count_str.(string))
|
view_count = int_or_panic(view_count_str.(string))
|
||||||
if !ok {
|
if !ok {
|
||||||
panic("No 'viewCount' value found in the OK!")
|
panic("No 'viewCount' value found in the OK!")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
local_filename := fmt.Sprintf("%d.mp4", tweet_id)
|
local_filename := fmt.Sprintf("%d.mp4", tweet_id)
|
||||||
|
|
||||||
return Video{
|
return Video{
|
||||||
ID: VideoID(apiVideo.ID),
|
ID: VideoID(apiVideo.ID),
|
||||||
TweetID: tweet_id,
|
TweetID: tweet_id,
|
||||||
Width: apiVideo.OriginalInfo.Width,
|
Width: apiVideo.OriginalInfo.Width,
|
||||||
Height: apiVideo.OriginalInfo.Height,
|
Height: apiVideo.OriginalInfo.Height,
|
||||||
RemoteURL: variants[0].URL,
|
RemoteURL: variants[0].URL,
|
||||||
LocalFilename: local_filename,
|
LocalFilename: local_filename,
|
||||||
|
|
||||||
ThumbnailRemoteUrl: apiVideo.MediaURLHttps,
|
ThumbnailRemoteUrl: apiVideo.MediaURLHttps,
|
||||||
ThumbnailLocalPath: path.Base(apiVideo.MediaURLHttps),
|
ThumbnailLocalPath: path.Base(apiVideo.MediaURLHttps),
|
||||||
Duration: apiVideo.VideoInfo.Duration,
|
Duration: apiVideo.VideoInfo.Duration,
|
||||||
ViewCount: view_count,
|
ViewCount: view_count,
|
||||||
|
|
||||||
IsDownloaded: false,
|
IsDownloaded: false,
|
||||||
IsGif: apiVideo.Type == "animated_gif",
|
IsGif: apiVideo.Type == "animated_gif",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,37 +1,37 @@
|
|||||||
package scraper_test
|
package scraper_test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"testing"
|
"encoding/json"
|
||||||
"os"
|
"os"
|
||||||
"encoding/json"
|
"testing"
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
. "offline_twitter/scraper"
|
. "offline_twitter/scraper"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestParseAPIVideo(t *testing.T) {
|
func TestParseAPIVideo(t *testing.T) {
|
||||||
assert := assert.New(t)
|
assert := assert.New(t)
|
||||||
data, err := os.ReadFile("test_responses/tweet_content/video.json")
|
data, err := os.ReadFile("test_responses/tweet_content/video.json")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
var apivideo APIExtendedMedia
|
var apivideo APIExtendedMedia
|
||||||
err = json.Unmarshal(data, &apivideo)
|
err = json.Unmarshal(data, &apivideo)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
tweet_id := TweetID(28)
|
tweet_id := TweetID(28)
|
||||||
video := ParseAPIVideo(apivideo, tweet_id)
|
video := ParseAPIVideo(apivideo, tweet_id)
|
||||||
assert.Equal(VideoID(1418951950020845568), video.ID)
|
assert.Equal(VideoID(1418951950020845568), video.ID)
|
||||||
assert.Equal(tweet_id, video.TweetID)
|
assert.Equal(tweet_id, video.TweetID)
|
||||||
assert.Equal(1280, video.Height)
|
assert.Equal(1280, video.Height)
|
||||||
assert.Equal(720, video.Width)
|
assert.Equal(720, video.Width)
|
||||||
assert.Equal("https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12", video.RemoteURL)
|
assert.Equal("https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12", video.RemoteURL)
|
||||||
assert.Equal("28.mp4", video.LocalFilename)
|
assert.Equal("28.mp4", video.LocalFilename)
|
||||||
assert.Equal("https://pbs.twimg.com/ext_tw_video_thumb/1418951950020845568/pu/img/eUTaYYfuAJ8FyjUi.jpg", video.ThumbnailRemoteUrl)
|
assert.Equal("https://pbs.twimg.com/ext_tw_video_thumb/1418951950020845568/pu/img/eUTaYYfuAJ8FyjUi.jpg", video.ThumbnailRemoteUrl)
|
||||||
assert.Equal("eUTaYYfuAJ8FyjUi.jpg", video.ThumbnailLocalPath)
|
assert.Equal("eUTaYYfuAJ8FyjUi.jpg", video.ThumbnailLocalPath)
|
||||||
assert.Equal(275952, video.ViewCount)
|
assert.Equal(275952, video.ViewCount)
|
||||||
assert.Equal(88300, video.Duration)
|
assert.Equal(88300, video.Duration)
|
||||||
assert.False(video.IsDownloaded)
|
assert.False(video.IsDownloaded)
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user