Add 'gofmt' linter

This commit is contained in:
Alessio 2022-03-13 17:09:43 -07:00
parent 223734d001
commit d1d80a91cd
30 changed files with 714 additions and 733 deletions

View File

@ -27,6 +27,7 @@ linters:
- wrapcheck
- lll
- godox
- gofmt
- errorlint
- nolintlint
@ -203,9 +204,9 @@ linters-settings:
keywords: # default keywords are TODO, BUG, and FIXME, these can be overwritten by this setting
- XXX
# gofmt:
# # simplify code: gofmt with `-s` option, true by default
# simplify: true
gofmt:
# simplify code: gofmt with `-s` option, true by default
simplify: true
# gofumpt:
# # Select the Go version to target. The default is `1.15`.

View File

@ -6,9 +6,9 @@ import (
"fmt"
"math/rand"
"github.com/go-test/deep"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/go-test/deep"
"offline_twitter/scraper"
)
@ -76,7 +76,7 @@ func TestModifyUser(t *testing.T) {
fake_user.FollowersCount = 2000
fake_user.JoinDate = scraper.TimestampFromUnix(2000)
fake_user.ProfileImageUrl = "asdf2"
fake_user.IsContentDownloaded = false // test No Worsening
fake_user.IsContentDownloaded = false // test No Worsening
// Save the modified user
err = profile.SaveUser(&fake_user)

View File

@ -3,9 +3,9 @@ package persistence
import (
"errors"
"fmt"
"os"
"regexp"
"strings"
"os"
)
var NotInDatabase = errors.New("Not in database")
@ -35,7 +35,7 @@ func file_exists(path string) bool {
* https://stackoverflow.com/questions/56616196/how-to-convert-camel-case-string-to-snake-case#56616250
*/
func ToSnakeCase(str string) string {
snake := regexp.MustCompile("(.)_?([A-Z][a-z]+)").ReplaceAllString(str, "${1}_${2}")
snake = regexp.MustCompile("([a-z0-9])_?([A-Z])").ReplaceAllString(snake, "${1}_${2}")
return strings.ToLower(snake)
snake := regexp.MustCompile("(.)_?([A-Z][a-z]+)").ReplaceAllString(str, "${1}_${2}")
snake = regexp.MustCompile("([a-z0-9])_?([A-Z])").ReplaceAllString(snake, "${1}_${2}")
return strings.ToLower(snake)
}

View File

@ -70,13 +70,13 @@ func create_stable_user() scraper.User {
func create_image_from_id(id int) scraper.Image {
filename := fmt.Sprintf("image%d.jpg", id)
return scraper.Image{
ID: scraper.ImageID(id),
TweetID: -1,
Width: id * 10,
Height: id * 5,
RemoteURL: filename,
ID: scraper.ImageID(id),
TweetID: -1,
Width: id * 10,
Height: id * 5,
RemoteURL: filename,
LocalFilename: filename,
IsDownloaded: false,
IsDownloaded: false,
}
}
@ -86,18 +86,18 @@ func create_image_from_id(id int) scraper.Image {
func create_video_from_id(id int) scraper.Video {
filename := fmt.Sprintf("video%d.jpg", id)
return scraper.Video{
ID: scraper.VideoID(id),
TweetID: -1,
Width: id * 10,
Height: id * 5,
RemoteURL: filename,
LocalFilename: filename,
ID: scraper.VideoID(id),
TweetID: -1,
Width: id * 10,
Height: id * 5,
RemoteURL: filename,
LocalFilename: filename,
ThumbnailRemoteUrl: filename,
ThumbnailLocalPath: filename,
Duration: 10000,
ViewCount: 200,
IsDownloaded: false,
IsGif: false,
Duration: 10000,
ViewCount: 200,
IsDownloaded: false,
IsGif: false,
}
}
@ -265,13 +265,13 @@ func create_dummy_tombstone() scraper.Tweet {
tweet_id := scraper.TweetID(rand.Int())
return scraper.Tweet{
ID: tweet_id,
UserID: -1,
ID: tweet_id,
UserID: -1,
TombstoneType: "deleted",
IsStub: true,
Mentions: []scraper.UserHandle{},
IsStub: true,
Mentions: []scraper.UserHandle{},
ReplyMentions: []scraper.UserHandle{},
Hashtags: []string{},
Hashtags: []string{},
}
}

View File

@ -7,29 +7,27 @@ import (
"offline_twitter/terminal_utils"
)
const ENGINE_DATABASE_VERSION = 11
type VersionMismatchError struct {
EngineVersion int
EngineVersion int
DatabaseVersion int
}
func (e VersionMismatchError) Error() string {
return fmt.Sprintf(
`This profile was created with database schema version %d, which is newer than this application's database schema version, %d.
`This profile was created with database schema version %d, which is newer than this application's database schema version, %d.
Please upgrade this application to a newer version to use this profile. Or downgrade the profile's schema version, somehow.`,
e.DatabaseVersion, e.EngineVersion,
e.DatabaseVersion, e.EngineVersion,
)
}
/**
* The Nth entry is the migration that moves you from version N to version N+1.
* `len(MIGRATIONS)` should always equal `ENGINE_DATABASE_VERSION`.
*/
var MIGRATIONS = []string{
`create table polls (rowid integer primary key,
`create table polls (rowid integer primary key,
id integer unique not null check(typeof(id) = 'integer'),
tweet_id integer not null,
num_choices integer not null,
@ -50,25 +48,25 @@ var MIGRATIONS = []string{
foreign key(tweet_id) references tweets(id)
);`,
`alter table tweets add column is_conversation_scraped boolean default 0;
`alter table tweets add column is_conversation_scraped boolean default 0;
alter table tweets add column last_scraped_at integer not null default 0`,
`update tombstone_types set tombstone_text = 'This Tweet is from a suspended account' where rowid = 2;
`update tombstone_types set tombstone_text = 'This Tweet is from a suspended account' where rowid = 2;
insert into tombstone_types (rowid, short_name, tombstone_text)
values (5, 'violated', 'This Tweet violated the Twitter Rules'),
(6, 'no longer exists', 'This Tweet is from an account that no longer exists')`,
`alter table videos add column thumbnail_remote_url text not null default "missing";
`alter table videos add column thumbnail_remote_url text not null default "missing";
alter table videos add column thumbnail_local_filename text not null default "missing"`,
`alter table videos add column duration integer not null default 0;
`alter table videos add column duration integer not null default 0;
alter table videos add column view_count integer not null default 0`,
`alter table users add column is_banned boolean default 0`,
`alter table urls add column short_text text not null default ""`,
`insert into tombstone_types (rowid, short_name, tombstone_text) values (7, 'age-restricted', 'Age-restricted adult content. '
`alter table users add column is_banned boolean default 0`,
`alter table urls add column short_text text not null default ""`,
`insert into tombstone_types (rowid, short_name, tombstone_text) values (7, 'age-restricted', 'Age-restricted adult content. '
|| 'This content might not be appropriate for people under 18 years old. To view this media, youll need to log in to Twitter')`,
`alter table users add column is_followed boolean default 0`,
`create table fake_user_sequence(latest_fake_id integer not null);
`alter table users add column is_followed boolean default 0`,
`create table fake_user_sequence(latest_fake_id integer not null);
insert into fake_user_sequence values(0x4000000000000000);
alter table users add column is_id_fake boolean default 0;`,
`delete from urls where rowid in (select urls.rowid from tweets join urls on tweets.id = urls.tweet_id where urls.text like
`delete from urls where rowid in (select urls.rowid from tweets join urls on tweets.id = urls.tweet_id where urls.text like
'https://twitter.com/%/status/' || tweets.quoted_tweet_id || "%")`,
}

View File

@ -2,12 +2,13 @@ package persistence_test
import (
"testing"
"os"
"github.com/stretchr/testify/require"
"offline_twitter/scraper"
"offline_twitter/persistence"
"offline_twitter/scraper"
)
func TestVersionUpgrade(t *testing.T) {
@ -25,7 +26,7 @@ func TestVersionUpgrade(t *testing.T) {
require.False(profile.IsTweetInDatabase(test_tweet_id), "Test tweet shouldn't be in db yet")
persistence.MIGRATIONS = append(persistence.MIGRATIONS, test_migration)
err := profile.UpgradeFromXToY(persistence.ENGINE_DATABASE_VERSION, persistence.ENGINE_DATABASE_VERSION + 1)
err := profile.UpgradeFromXToY(persistence.ENGINE_DATABASE_VERSION, persistence.ENGINE_DATABASE_VERSION+1)
require.NoError(err)
require.True(profile.IsTweetInDatabase(test_tweet_id), "Migration should have created the tweet, but it didn't")

View File

@ -5,8 +5,8 @@ import (
)
var (
END_OF_FEED = fmt.Errorf("End of feed")
DOESNT_EXIST = fmt.Errorf("Doesn't exist")
EXTERNAL_API_ERROR = fmt.Errorf("Unexpected result from external API")
API_PARSE_ERROR = fmt.Errorf("Couldn't parse the result returned from the API")
END_OF_FEED = fmt.Errorf("End of feed")
DOESNT_EXIST = fmt.Errorf("Doesn't exist")
EXTERNAL_API_ERROR = fmt.Errorf("Unexpected result from external API")
API_PARSE_ERROR = fmt.Errorf("Couldn't parse the result returned from the API")
)

View File

@ -1,33 +1,33 @@
package scraper
import (
"encoding/json"
"fmt"
"html"
"time"
"strings"
"encoding/json"
"strconv"
"sort"
"strconv"
"strings"
"time"
)
type APIMedia struct {
ID int64 `json:"id_str,string"`
MediaURLHttps string `json:"media_url_https"`
Type string `json:"type"`
URL string `json:"url"`
OriginalInfo struct {
Width int `json:"width"`
Height int `json:"height"`
OriginalInfo struct {
Width int `json:"width"`
Height int `json:"height"`
} `json:"original_info"`
}
type SortableVariants []struct {
Bitrate int `json:"bitrate,omitempty"`
URL string `json:"url"`
Bitrate int `json:"bitrate,omitempty"`
URL string `json:"url"`
}
func (v SortableVariants) Len() int { return len(v) }
func (v SortableVariants) Swap(i, j int) { v[i], v[j] = v[j], v[i] }
func (v SortableVariants) Len() int { return len(v) }
func (v SortableVariants) Swap(i, j int) { v[i], v[j] = v[j], v[i] }
func (v SortableVariants) Less(i, j int) bool { return v[i].Bitrate > v[j].Bitrate }
type APIExtendedMedia struct {
@ -35,12 +35,12 @@ type APIExtendedMedia struct {
MediaURLHttps string `json:"media_url_https"`
Type string `json:"type"`
VideoInfo struct {
Variants SortableVariants `json:"variants"`
Duration int `json:"duration_millis"`
Variants SortableVariants `json:"variants"`
Duration int `json:"duration_millis"`
} `json:"video_info"`
OriginalInfo struct {
Width int `json:"width"`
Height int `json:"height"`
Width int `json:"width"`
Height int `json:"height"`
} `json:"original_info"`
Ext struct {
MediaStats struct {
@ -74,9 +74,9 @@ type APICard struct {
} `json:"description"`
Thumbnail struct {
ImageValue struct {
Url string `json:"url"`
Width int `json:"width"`
Height int `json:"height"`
Url string `json:"url"`
Width int `json:"width"`
Height int `json:"height"`
} `json:"image_value"`
} `json:"thumbnail_image_large"`
PlayerImage struct {
@ -128,18 +128,18 @@ type APICard struct {
}
type APITweet struct {
ID int64 `json:"id_str,string"`
ConversationID int64 `json:"conversation_id_str,string"`
CreatedAt string `json:"created_at"`
FavoriteCount int `json:"favorite_count"`
FullText string `json:"full_text"`
DisplayTextRange []int `json:"display_text_range"`
Entities struct {
ID int64 `json:"id_str,string"`
ConversationID int64 `json:"conversation_id_str,string"`
CreatedAt string `json:"created_at"`
FavoriteCount int `json:"favorite_count"`
FullText string `json:"full_text"`
DisplayTextRange []int `json:"display_text_range"`
Entities struct {
Hashtags []struct {
Text string `json:"text"`
} `json:"hashtags"`
Media []APIMedia `json:"media"`
URLs []struct {
URLs []struct {
ExpandedURL string `json:"expanded_url"`
ShortenedUrl string `json:"url"`
} `json:"urls"`
@ -147,30 +147,30 @@ type APITweet struct {
UserName string `json:"screen_name"`
UserID int64 `json:"id_str,string"`
} `json:"user_mentions"`
ReplyMentions string // The leading part of the text which is cut off by "DisplayTextRange"
ReplyMentions string // The leading part of the text which is cut off by "DisplayTextRange"
} `json:"entities"`
ExtendedEntities struct {
Media []APIExtendedMedia `json:"media"`
} `json:"extended_entities"`
InReplyToStatusID int64 `json:"in_reply_to_status_id_str,string"`
InReplyToUserID int64 `json:"in_reply_to_user_id_str,string"`
InReplyToScreenName string `json:"in_reply_to_screen_name"`
ReplyCount int `json:"reply_count"`
RetweetCount int `json:"retweet_count"`
QuoteCount int `json:"quote_count"`
RetweetedStatusIDStr string `json:"retweeted_status_id_str"` // Can be empty string
InReplyToStatusID int64 `json:"in_reply_to_status_id_str,string"`
InReplyToUserID int64 `json:"in_reply_to_user_id_str,string"`
InReplyToScreenName string `json:"in_reply_to_screen_name"`
ReplyCount int `json:"reply_count"`
RetweetCount int `json:"retweet_count"`
QuoteCount int `json:"quote_count"`
RetweetedStatusIDStr string `json:"retweeted_status_id_str"` // Can be empty string
RetweetedStatusID int64
QuotedStatusIDStr string `json:"quoted_status_id_str"` // Can be empty string
QuotedStatusIDStr string `json:"quoted_status_id_str"` // Can be empty string
QuotedStatusID int64
QuotedStatusPermalink struct {
ShortURL string `json:"url"`
ExpandedURL string `json:"expanded"`
} `json:"quoted_status_permalink"`
Time time.Time `json:"time"`
UserID int64 `json:"user_id_str,string"`
UserHandle string
Card APICard `json:"card"`
TombstoneText string
Time time.Time `json:"time"`
UserID int64 `json:"user_id_str,string"`
UserHandle string
Card APICard `json:"card"`
TombstoneText string
}
func (t *APITweet) NormalizeContent() {
@ -183,7 +183,7 @@ func (t *APITweet) NormalizeContent() {
t.RetweetedStatusID = int64(id)
}
if (len(t.DisplayTextRange) == 2) {
if len(t.DisplayTextRange) == 2 {
t.Entities.ReplyMentions = strings.TrimSpace(string([]rune(t.FullText)[0:t.DisplayTextRange[0]]))
t.FullText = string([]rune(t.FullText)[t.DisplayTextRange[0]:t.DisplayTextRange[1]])
}
@ -217,7 +217,6 @@ func (t APITweet) String() string {
return string(data)
}
type APIUser struct {
CreatedAt string `json:"created_at"`
Description string `json:"description"`
@ -235,7 +234,7 @@ type APIUser struct {
ListedCount int `json:"listed_count"`
Name string `json:"name"`
Location string `json:"location"`
PinnedTweetIdsStr []string `json:"pinned_tweet_ids_str"` // Dunno how to type-convert an array
PinnedTweetIdsStr []string `json:"pinned_tweet_ids_str"` // Dunno how to type-convert an array
ProfileBannerURL string `json:"profile_banner_url"`
ProfileImageURLHTTPS string `json:"profile_image_url_https"`
Protected bool `json:"protected"`
@ -246,7 +245,6 @@ type APIUser struct {
DoesntExist bool
}
type UserResponse struct {
Data struct {
User struct {
@ -255,11 +253,12 @@ type UserResponse struct {
} `json:"user"`
} `json:"data"`
Errors []struct {
Message string `json:"message"`
Name string `json:"name"`
Code int `json:"code"`
Message string `json:"message"`
Name string `json:"name"`
Code int `json:"code"`
} `json:"errors"`
}
func (u UserResponse) ConvertToAPIUser() APIUser {
ret := u.Data.User.Legacy
ret.ID = u.Data.User.ID
@ -279,9 +278,9 @@ func (u UserResponse) ConvertToAPIUser() APIUser {
}
type Entry struct {
EntryID string `json:"entryId"`
SortIndex int64 `json:"sortIndex,string"`
Content struct {
EntryID string `json:"entryId"`
SortIndex int64 `json:"sortIndex,string"`
Content struct {
Item struct {
Content struct {
Tombstone struct {
@ -303,12 +302,15 @@ type Entry struct {
} `json:"operation"`
} `json:"content"`
}
func (e Entry) GetTombstoneText() string {
return e.Content.Item.Content.Tombstone.TombstoneInfo.RichText.Text
}
type SortableEntries []Entry
func (e SortableEntries) Len() int { return len(e) }
func (e SortableEntries) Swap(i, j int) { e[i], e[j] = e[j], e[i] }
func (e SortableEntries) Len() int { return len(e) }
func (e SortableEntries) Swap(i, j int) { e[i], e[j] = e[j], e[i] }
func (e SortableEntries) Less(i, j int) bool { return e[i].SortIndex > e[j].SortIndex }
type TweetResponse struct {
@ -329,15 +331,16 @@ type TweetResponse struct {
}
var tombstone_types = map[string]string{
"This Tweet was deleted by the Tweet author. Learn more": "deleted",
"This Tweet is from a suspended account. Learn more": "suspended",
"This Tweet was deleted by the Tweet author. Learn more": "deleted",
"This Tweet is from a suspended account. Learn more": "suspended",
"Youre unable to view this Tweet because this account owner limits who can view their Tweets. Learn more": "hidden",
"This Tweet is unavailable. Learn more": "unavailable",
"This Tweet violated the Twitter Rules. Learn more": "violated",
"This Tweet is from an account that no longer exists. Learn more": "no longer exists",
"This Tweet is unavailable. Learn more": "unavailable",
"This Tweet violated the Twitter Rules. Learn more": "violated",
"This Tweet is from an account that no longer exists. Learn more": "no longer exists",
"Age-restricted adult content. This content might not be appropriate for people under 18 years old. To view this media, " +
"youll need to log in to Twitter. Learn more": "age-restricted",
}
/**
* Insert tweets into GlobalObjects for each tombstone. Returns a list of users that need to
* be fetched for tombstones.
@ -379,8 +382,8 @@ func (t *TweetResponse) HandleTombstones() []UserHandle {
if entry.GetTombstoneText() != "" {
// Try to reconstruct the tombstone tweet
var tombstoned_tweet APITweet
tombstoned_tweet.ID = int64(i) // Set a default to prevent clobbering other tombstones
if i + 1 < len(entries) && entries[i+1].Content.Item.Content.Tweet.ID != 0 {
tombstoned_tweet.ID = int64(i) // Set a default to prevent clobbering other tombstones
if i+1 < len(entries) && entries[i+1].Content.Item.Content.Tweet.ID != 0 {
next_tweet_id := entries[i+1].Content.Item.Content.Tweet.ID
api_tweet, ok := t.GlobalObjects.Tweets[fmt.Sprint(next_tweet_id)]
if !ok {
@ -390,7 +393,7 @@ func (t *TweetResponse) HandleTombstones() []UserHandle {
tombstoned_tweet.UserID = api_tweet.InReplyToUserID
ret = append(ret, UserHandle(api_tweet.InReplyToScreenName))
}
if i - 1 >= 0 && entries[i-1].Content.Item.Content.Tweet.ID != 0 {
if i-1 >= 0 && entries[i-1].Content.Item.Content.Tweet.ID != 0 {
prev_tweet_id := entries[i-1].Content.Item.Content.Tweet.ID
_, ok := t.GlobalObjects.Tweets[fmt.Sprint(prev_tweet_id)]
if !ok {
@ -416,7 +419,7 @@ func (t *TweetResponse) HandleTombstones() []UserHandle {
func (t *TweetResponse) GetCursor() string {
entries := t.Timeline.Instructions[0].AddEntries.Entries
if len(entries) > 0 {
last_entry := entries[len(entries) - 1]
last_entry := entries[len(entries)-1]
if strings.Contains(last_entry.EntryID, "cursor") {
return last_entry.Content.Operation.Cursor.Value
}
@ -424,7 +427,7 @@ func (t *TweetResponse) GetCursor() string {
// Next, try the other format ("replaceEntry")
instructions := t.Timeline.Instructions
last_replace_entry := instructions[len(instructions) - 1].ReplaceEntry.Entry
last_replace_entry := instructions[len(instructions)-1].ReplaceEntry.Entry
if strings.Contains(last_replace_entry.EntryID, "cursor") {
return last_replace_entry.Content.Operation.Cursor.Value
}
@ -450,7 +453,6 @@ func (t *TweetResponse) IsEndOfFeed() bool {
return true
}
func idstr_to_int(idstr string) int64 {
id, err := strconv.Atoi(idstr)
if err != nil {

View File

@ -1,9 +1,9 @@
package scraper_test
import (
"testing"
"os"
"encoding/json"
"os"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
@ -11,17 +11,16 @@ import (
. "offline_twitter/scraper"
)
func TestNormalizeContent(t *testing.T) {
assert := assert.New(t)
test_cases := []struct {
filename string
eventual_full_text string
quoted_status_id TweetID
in_reply_to_id TweetID
filename string
eventual_full_text string
quoted_status_id TweetID
in_reply_to_id TweetID
retweeted_status_id TweetID
reply_mentions string
} {
reply_mentions string
}{
{"test_responses/single_tweets/tweet_that_is_a_reply_with_gif.json", "", 0, 1395882872729477131, 0, "@michaelmalice"},
{"test_responses/single_tweets/tweet_with_image.json", "this saddens me every time", 0, 0, 0, ""},
{"test_responses/single_tweets/tweet_that_is_a_reply.json", "Noted", 0, 1396194494710788100, 0, "@RvaTeddy @michaelmalice"},
@ -48,7 +47,7 @@ func TestNormalizeContent(t *testing.T) {
}
var tweet APITweet
err = json.Unmarshal(data, &tweet)
assert.NoError(err, "Failed at " + v.filename)
assert.NoError(err, "Failed at "+v.filename)
tweet.NormalizeContent()
@ -60,7 +59,6 @@ func TestNormalizeContent(t *testing.T) {
}
}
func TestUserProfileToAPIUser(t *testing.T) {
assert := assert.New(t)
data, err := os.ReadFile("test_responses/michael_malice_user_profile.json")
@ -76,7 +74,6 @@ func TestUserProfileToAPIUser(t *testing.T) {
assert.Equal(user_resp.Data.User.Legacy.FollowersCount, result.FollowersCount)
}
func TestGetCursor(t *testing.T) {
assert := assert.New(t)
data, err := os.ReadFile("test_responses/midriffs_anarchist_cookbook.json")
@ -91,13 +88,12 @@ func TestGetCursor(t *testing.T) {
tweet_resp.GetCursor())
}
func TestIsEndOfFeed(t *testing.T) {
assert := assert.New(t)
test_cases := []struct {
filename string
filename string
is_end_of_feed bool
} {
}{
{"test_responses/michael_malice_feed.json", false},
{"test_responses/kwiber_end_of_feed.json", true},
}
@ -113,7 +109,6 @@ func TestIsEndOfFeed(t *testing.T) {
}
}
func TestHandleTombstonesHidden(t *testing.T) {
assert := assert.New(t)
data, err := os.ReadFile("test_responses/tombstones/tombstone_hidden_1.json")

View File

@ -13,13 +13,13 @@ import (
)
type CardValue struct {
Type string `json:"type"`
Type string `json:"type"`
StringValue string `json:"string_value"`
ImageValue struct {
ImageValue struct {
AltText string `json:"alt"`
Height int `json:"height"`
Width int `json:"width"`
Url string `json:"url"`
Height int `json:"height"`
Width int `json:"width"`
Url string `json:"url"`
} `json:"image_value"`
UserValue struct {
ID int64 `json:"id_str,string"`
@ -30,13 +30,14 @@ type CardValue struct {
type APIV2Card struct {
Legacy struct {
BindingValues []struct {
Key string `json:"key"`
Key string `json:"key"`
Value CardValue `json:"value"`
} `json:"binding_values"`
Name string `json:"name"`
Url string `json:"url"`
Url string `json:"url"`
} `json:"legacy"`
}
func (card APIV2Card) ParseAsUrl() Url {
values := make(map[string]CardValue)
for _, obj := range card.Legacy.BindingValues {
@ -121,6 +122,7 @@ type APIV2UserResult struct {
} `json:"result"`
} `json:"user_results"`
}
func (u APIV2UserResult) ToUser() User {
user, err := ParseSingleUser(u.UserResults.Result.Legacy)
if err != nil {
@ -131,16 +133,16 @@ func (u APIV2UserResult) ToUser() User {
}
type _Result struct {
ID int64 `json:"rest_id,string"`
Legacy APIV2Tweet `json:"legacy"`
ID int64 `json:"rest_id,string"`
Legacy APIV2Tweet `json:"legacy"`
Tombstone *struct {
Text struct {
Text string `json:"text"`
} `json:"text"`
} `json:"tombstone"`
Core *APIV2UserResult `json:"core"`
Card APIV2Card `json:"card"`
QuotedStatusResult *APIV2Result `json:"quoted_status_result"`
Core *APIV2UserResult `json:"core"`
Card APIV2Card `json:"card"`
QuotedStatusResult *APIV2Result `json:"quoted_status_result"`
}
type APIV2Result struct {
@ -149,11 +151,12 @@ type APIV2Result struct {
Tweet _Result `json:"tweet"`
} `json:"result"`
}
func (api_result APIV2Result) ToTweetTrove(ignore_null_entries bool) TweetTrove {
ret := NewTweetTrove()
// Start by checking if this is a null entry in a feed
if api_result.Result.Tombstone != nil && ignore_null_entries{
if api_result.Result.Tombstone != nil && ignore_null_entries {
// TODO: this is becoming really spaghetti. Why do we need a separate execution path for this?
return ret
}
@ -221,7 +224,7 @@ func (api_result APIV2Result) ToTweetTrove(ignore_null_entries bool) TweetTrove
continue
}
found = true
url.Text = main_tweet.Urls[i].Text // Copy the expanded URL over, since the card doesn't have it in the new API
url.Text = main_tweet.Urls[i].Text // Copy the expanded URL over, since the card doesn't have it in the new API
main_tweet.Urls[i] = url
}
if !found {
@ -245,6 +248,7 @@ type APIV2Tweet struct {
RetweetedStatusResult *APIV2Result `json:"retweeted_status_result"`
APITweet
}
func (api_v2_tweet APIV2Tweet) ToTweetTrove() TweetTrove {
ret := NewTweetTrove()
@ -253,7 +257,6 @@ func (api_v2_tweet APIV2Tweet) ToTweetTrove() TweetTrove {
orig_tweet_trove := api_v2_tweet.RetweetedStatusResult.ToTweetTrove(false)
ret.MergeWith(orig_tweet_trove)
retweet := Retweet{}
var err error
retweet.RetweetID = TweetID(api_v2_tweet.ID)
@ -277,25 +280,24 @@ func (api_v2_tweet APIV2Tweet) ToTweetTrove() TweetTrove {
}
type APIV2Entry struct {
EntryID string `json:"entryId"`
SortIndex int64 `json:"sortIndex,string"`
Content struct {
EntryID string `json:"entryId"`
SortIndex int64 `json:"sortIndex,string"`
Content struct {
ItemContent struct {
EntryType string `json:"entryType"`
EntryType string `json:"entryType"`
TweetResults APIV2Result `json:"tweet_results"`
} `json:"itemContent"`
// Cursors
EntryType string `json:"entryType"`
Value string `json:"value"`
EntryType string `json:"entryType"`
Value string `json:"value"`
CursorType string `json:"cursorType"`
} `json:"content"`
}
type APIV2Instruction struct {
Type string `json:"type"`
Entries []APIV2Entry`json:"entries"`
Type string `json:"type"`
Entries []APIV2Entry `json:"entries"`
}
type APIV2Response struct {
@ -324,7 +326,7 @@ func (api_response APIV2Response) GetMainInstruction() *APIV2Instruction {
func (api_response APIV2Response) GetCursorBottom() string {
entries := api_response.GetMainInstruction().Entries
last_entry := entries[len(entries) - 1]
last_entry := entries[len(entries)-1]
if last_entry.Content.CursorType != "Bottom" {
panic("No bottom cursor found")
}
@ -349,7 +351,7 @@ func (api_response APIV2Response) IsEmpty() bool {
*/
func (api_response APIV2Response) ToTweetTrove() (TweetTrove, error) {
ret := NewTweetTrove()
for _, entry := range api_response.GetMainInstruction().Entries { // TODO: the second Instruction is the pinned tweet
for _, entry := range api_response.GetMainInstruction().Entries { // TODO: the second Instruction is the pinned tweet
if !strings.HasPrefix(entry.EntryID, "tweet-") {
continue
}
@ -363,12 +365,11 @@ func (api_response APIV2Response) ToTweetTrove() (TweetTrove, error) {
return ret, nil
}
func get_graphql_user_timeline_url(user_id UserID, cursor string) string {
if cursor != "" {
return "https://twitter.com/i/api/graphql/CwLU7qTfeu0doqhSr6tW4A/UserTweetsAndReplies?variables=%7B%22userId%22%3A%22" + fmt.Sprint(user_id) + "%22%2C%22count%22%3A40%2C%22cursor%22%3A%22" + url.QueryEscape(cursor) + "%22%2C%22includePromotedContent%22%3Atrue%2C%22withCommunity%22%3Atrue%2C%22withSuperFollowsUserFields%22%3Atrue%2C%22withBirdwatchPivots%22%3Afalse%2C%22withDownvotePerspective%22%3Afalse%2C%22withReactionsMetadata%22%3Afalse%2C%22withReactionsPerspective%22%3Afalse%2C%22withSuperFollowsTweetFields%22%3Atrue%2C%22withVoice%22%3Atrue%2C%22withV2Timeline%22%3Afalse%2C%22__fs_interactive_text%22%3Afalse%2C%22__fs_responsive_web_uc_gql_enabled%22%3Afalse%2C%22__fs_dont_mention_me_view_api_enabled%22%3Afalse%7D" // nolint:lll // It's a URL, come on
return "https://twitter.com/i/api/graphql/CwLU7qTfeu0doqhSr6tW4A/UserTweetsAndReplies?variables=%7B%22userId%22%3A%22" + fmt.Sprint(user_id) + "%22%2C%22count%22%3A40%2C%22cursor%22%3A%22" + url.QueryEscape(cursor) + "%22%2C%22includePromotedContent%22%3Atrue%2C%22withCommunity%22%3Atrue%2C%22withSuperFollowsUserFields%22%3Atrue%2C%22withBirdwatchPivots%22%3Afalse%2C%22withDownvotePerspective%22%3Afalse%2C%22withReactionsMetadata%22%3Afalse%2C%22withReactionsPerspective%22%3Afalse%2C%22withSuperFollowsTweetFields%22%3Atrue%2C%22withVoice%22%3Atrue%2C%22withV2Timeline%22%3Afalse%2C%22__fs_interactive_text%22%3Afalse%2C%22__fs_responsive_web_uc_gql_enabled%22%3Afalse%2C%22__fs_dont_mention_me_view_api_enabled%22%3Afalse%7D" // nolint:lll // It's a URL, come on
}
return "https://twitter.com/i/api/graphql/CwLU7qTfeu0doqhSr6tW4A/UserTweetsAndReplies?variables=%7B%22userId%22%3A%22" + fmt.Sprint(user_id) + "%22%2C%22count%22%3A40%2C%22includePromotedContent%22%3Afalse%2C%22withCommunity%22%3Atrue%2C%22withSuperFollowsUserFields%22%3Atrue%2C%22withBirdwatchPivots%22%3Afalse%2C%22withDownvotePerspective%22%3Afalse%2C%22withReactionsMetadata%22%3Afalse%2C%22withReactionsPerspective%22%3Afalse%2C%22withSuperFollowsTweetFields%22%3Atrue%2C%22withVoice%22%3Atrue%2C%22withV2Timeline%22%3Afalse%2C%22__fs_interactive_text%22%3Afalse%2C%22__fs_dont_mention_me_view_api_enabled%22%3Afalse%7D" // nolint:lll // It's a URL, come on
return "https://twitter.com/i/api/graphql/CwLU7qTfeu0doqhSr6tW4A/UserTweetsAndReplies?variables=%7B%22userId%22%3A%22" + fmt.Sprint(user_id) + "%22%2C%22count%22%3A40%2C%22includePromotedContent%22%3Afalse%2C%22withCommunity%22%3Atrue%2C%22withSuperFollowsUserFields%22%3Atrue%2C%22withBirdwatchPivots%22%3Afalse%2C%22withDownvotePerspective%22%3Afalse%2C%22withReactionsMetadata%22%3Afalse%2C%22withReactionsPerspective%22%3Afalse%2C%22withSuperFollowsTweetFields%22%3Atrue%2C%22withVoice%22%3Atrue%2C%22withV2Timeline%22%3Afalse%2C%22__fs_interactive_text%22%3Afalse%2C%22__fs_dont_mention_me_view_api_enabled%22%3Afalse%7D" // nolint:lll // It's a URL, come on
}
/**
@ -446,7 +447,7 @@ func (api API) GetMoreTweetsFromGraphqlFeed(user_id UserID, response *APIV2Respo
}
if fresh_response.IsEmpty() {
// Response has a pinned tweet, but no other content: end of feed has been reached
return END_OF_FEED // TODO: check that there actually is a pinned tweet and the request didn't just fail lol
return END_OF_FEED // TODO: check that there actually is a pinned tweet and the request didn't just fail lol
}
last_response = &fresh_response

View File

@ -1,10 +1,10 @@
package scraper_test
import (
"testing"
"os"
"encoding/json"
"fmt"
"os"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
@ -34,7 +34,7 @@ func TestAPIV2ParseUser(t *testing.T) {
assert.Equal(user.ID, UserID(44067298))
assert.Equal(user.DisplayName, "Michael Malice")
assert.Equal(user.Handle, UserHandle("michaelmalice"))
assert.Equal(user.Bio, "Author of Dear Reader, The New Right & The Anarchist Handbook\nHost of \"YOUR WELCOME\" \nSubject of Ego & " +
assert.Equal(user.Bio, "Author of Dear Reader, The New Right & The Anarchist Handbook\nHost of \"YOUR WELCOME\" \nSubject of Ego & "+
"Hubris by Harvey Pekar\nHe/Him ⚑\n@SheathUnderwear Model")
assert.Equal(user.FollowingCount, 964)
assert.Equal(user.FollowersCount, 334571)
@ -70,7 +70,7 @@ func TestAPIV2ParseTweet(t *testing.T) {
assert.True(ok)
assert.Equal(tweet.ID, TweetID(1485708879174508550))
assert.Equal(tweet.UserID, UserID(44067298))
assert.Equal(tweet.Text, "If Boris Johnson is driven out of office, it wouldn't mark the first time the Tories had four PMs in a " +
assert.Equal(tweet.Text, "If Boris Johnson is driven out of office, it wouldn't mark the first time the Tories had four PMs in a "+
"row\nThey had previously governed the UK for 13 years with 4 PMs, from 1951-1964")
assert.Equal(tweet.PostedAt.Unix(), int64(1643055574))
assert.Equal(tweet.QuotedTweetID, TweetID(0))
@ -133,7 +133,7 @@ func TestAPIV2ParseTweetWithQuotedTweet(t *testing.T) {
assert.True(ok)
assert.Equal(TweetID(1485690410899021826), quote_tweet.ID)
assert.Equal(TweetID(1485690069079846915), quote_tweet.QuotedTweetID)
assert.Equal("Hatred is powerless in and of itself despite all the agitprop to the contrary\nHatred didnt stop Trump's election, " +
assert.Equal("Hatred is powerless in and of itself despite all the agitprop to the contrary\nHatred didnt stop Trump's election, "+
"for example", quote_tweet.Text)
// Should be 2 users: quoter and quoted
@ -182,7 +182,7 @@ func TestAPIV2ParseRetweet(t *testing.T) {
// Check the video
v := tweet.Videos[0]
assert.Equal("https://pbs.twimg.com/ext_tw_video_thumb/1485627274594590721/pu/img/O6mMKrsqWl8WcMy1.jpg", v.ThumbnailRemoteUrl)
assert.Equal(0, v.ViewCount) // TODO: make this work
assert.Equal(0, v.ViewCount) // TODO: make this work
assert.Equal(720, v.Height)
assert.Equal(720, v.Width)
assert.Equal(30066, v.Duration)
@ -200,7 +200,6 @@ func TestAPIV2ParseRetweet(t *testing.T) {
assert.Equal(UserID(44067298), retweeting_user.ID)
assert.Equal(UserHandle("michaelmalice"), retweeting_user.Handle)
// Should be 1 retweet
assert.Equal(1, len(trove.Retweets))
retweet, ok := trove.Retweets[1485699748514476037]
@ -270,7 +269,6 @@ func TestAPIV2ParseRetweetedQuoteTweet(t *testing.T) {
assert.Equal(UserID(599817378), retweet.RetweetedByID)
}
/**
* Parse tweet with quoted tombstone
*/
@ -300,13 +298,12 @@ func TestAPIV2ParseTweetWithQuotedTombstone(t *testing.T) {
assert.True(ok)
assert.Equal(TweetID(1485774025347371008), tombstoned_tweet.ID)
assert.Equal("no longer exists", tombstoned_tweet.TombstoneType)
assert.True (tombstoned_tweet.IsStub)
assert.True(tombstoned_tweet.IsStub)
assert.Equal(UserHandle("coltnkat"), tombstoned_tweet.UserHandle)
assert.Equal(0, len(trove.Retweets))
}
/**
* Parse a tweet with a link
*/
@ -326,7 +323,7 @@ func TestAPIV2ParseTweetWithURL(t *testing.T) {
assert.Equal(1, len(trove.Tweets))
tweet, ok := trove.Tweets[1485695695025803264]
assert.True(ok)
assert.Equal("This led to what I discussed as \"anguish signaling,\" where progs competed in proclaiming their distress both to " +
assert.Equal("This led to what I discussed as \"anguish signaling,\" where progs competed in proclaiming their distress both to "+
"show they were the Good Guys but also to get the pack to regroup, akin to wolves howling.", tweet.Text)
assert.Equal(1, len(tweet.Urls))
@ -335,7 +332,7 @@ func TestAPIV2ParseTweetWithURL(t *testing.T) {
assert.Equal("observer.com", url.Domain)
assert.Equal("Why Evangelical Progressives Need to Demonstrate Anguish Publicly", url.Title)
assert.Equal("https://observer.com/2016/12/why-evangelical-progressives-need-to-demonstrate-anguish-publicly/", url.Text)
assert.Equal("The concept of “virtue signaling” gained a great deal of currency in this past year. Its a way to demonstrate to " +
assert.Equal("The concept of “virtue signaling” gained a great deal of currency in this past year. Its a way to demonstrate to "+
"others that one is a good person without having to do anything", url.Description)
assert.Equal("https://pbs.twimg.com/card_img/1485694664640507911/WsproWyP?format=jpg&name=600x600", url.ThumbnailRemoteUrl)
assert.Equal(600, url.ThumbnailWidth)
@ -439,10 +436,9 @@ func TestAPIV2ParseTweetWithPoll(t *testing.T) {
assert.Equal(int64(1643137976), poll.VotingEndsAt.Unix())
assert.Equal(int64(1643055638), poll.LastUpdatedAt.Unix())
assert.Equal(1440 * 60, poll.VotingDuration)
assert.Equal(1440*60, poll.VotingDuration)
}
func TestParseAPIV2UserFeed(t *testing.T) {
data, err := os.ReadFile("test_responses/api_v2/user_feed_apiv2.json")
if err != nil {
@ -495,7 +491,6 @@ func TestParseAPIV2UserFeed(t *testing.T) {
fmt.Printf("%d Users, %d Tweets, %d Retweets\n", len(tweet_trove.Users), len(tweet_trove.Tweets), len(tweet_trove.Retweets))
}
/**
* Should correctly identify an "empty" response
*/
@ -562,13 +557,12 @@ func TestAPIV2TombstoneEntry(t *testing.T) {
err = json.Unmarshal(data, &tweet_result)
require.NoError(t, err)
trove := tweet_result.ToTweetTrove(true) // 'true' indicates to ignore empty entries
trove := tweet_result.ToTweetTrove(true) // 'true' indicates to ignore empty entries
assert.Len(trove.Tweets, 0)
assert.Len(trove.Users, 0)
assert.Len(trove.Retweets, 0)
}
func TestTweetWithWarning(t *testing.T) {
assert := assert.New(t)
data, err := os.ReadFile("test_responses/api_v2/tweet_with_warning.json")

View File

@ -1,29 +1,29 @@
package scraper
import (
"path"
"path"
)
type ImageID int64
type Image struct {
ID ImageID
TweetID TweetID
Width int
Height int
RemoteURL string
LocalFilename string
IsDownloaded bool
ID ImageID
TweetID TweetID
Width int
Height int
RemoteURL string
LocalFilename string
IsDownloaded bool
}
func ParseAPIMedia(apiMedia APIMedia) Image {
local_filename := path.Base(apiMedia.MediaURLHttps)
return Image{
ID: ImageID(apiMedia.ID),
RemoteURL: apiMedia.MediaURLHttps,
Width: apiMedia.OriginalInfo.Width,
Height: apiMedia.OriginalInfo.Height,
LocalFilename: local_filename,
IsDownloaded: false,
}
local_filename := path.Base(apiMedia.MediaURLHttps)
return Image{
ID: ImageID(apiMedia.ID),
RemoteURL: apiMedia.MediaURLHttps,
Width: apiMedia.OriginalInfo.Width,
Height: apiMedia.OriginalInfo.Height,
LocalFilename: local_filename,
IsDownloaded: false,
}
}

View File

@ -1,31 +1,31 @@
package scraper_test
import (
"testing"
"os"
"encoding/json"
"encoding/json"
"os"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
. "offline_twitter/scraper"
. "offline_twitter/scraper"
)
func TestParseAPIMedia(t *testing.T) {
assert := assert.New(t)
data, err := os.ReadFile("test_responses/tweet_content/image.json")
if err != nil {
panic(err)
}
var apimedia APIMedia
err = json.Unmarshal(data, &apimedia)
require.NoError(t, err)
assert := assert.New(t)
data, err := os.ReadFile("test_responses/tweet_content/image.json")
if err != nil {
panic(err)
}
var apimedia APIMedia
err = json.Unmarshal(data, &apimedia)
require.NoError(t, err)
image := ParseAPIMedia(apimedia)
assert.Equal(ImageID(1395882862289772553), image.ID)
assert.Equal("https://pbs.twimg.com/media/E18sEUrWYAk8dBl.jpg", image.RemoteURL)
assert.Equal(593, image.Width)
assert.Equal(239, image.Height)
assert.Equal("E18sEUrWYAk8dBl.jpg", image.LocalFilename)
assert.False(image.IsDownloaded)
image := ParseAPIMedia(apimedia)
assert.Equal(ImageID(1395882862289772553), image.ID)
assert.Equal("https://pbs.twimg.com/media/E18sEUrWYAk8dBl.jpg", image.RemoteURL)
assert.Equal(593, image.Width)
assert.Equal(239, image.Height)
assert.Equal("E18sEUrWYAk8dBl.jpg", image.LocalFilename)
assert.False(image.IsDownloaded)
}

View File

@ -2,11 +2,10 @@ package scraper
import (
"fmt"
"time"
"net/http"
"time"
)
/**
* Return the expanded version of a short URL. Input must be a real short URL.
*/
@ -21,7 +20,7 @@ func ExpandShortUrl(short_url string) string {
resp, err := client.Get(short_url)
if err != nil {
panic(err) // TODO: handle timeouts
panic(err) // TODO: handle timeouts
}
if resp.StatusCode != 301 {
panic(fmt.Errorf("Unknown status code returned when expanding short url %q: %s\n %w", short_url, resp.Status, EXTERNAL_API_ERROR))

View File

@ -6,12 +6,11 @@ import (
"net/http"
"net/http/httptest"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/assert"
. "offline_twitter/scraper"
)
func TestExpandShortUrl(t *testing.T) {
redirecting_to := "redirect target"
srvr := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {

View File

@ -1,82 +1,82 @@
package scraper
import (
"strings"
"strconv"
"net/url"
"net/url"
"strconv"
"strings"
)
type PollID int64
type Poll struct {
ID PollID
TweetID TweetID
NumChoices int
ID PollID
TweetID TweetID
NumChoices int
Choice1 string
Choice1_Votes int
Choice2 string
Choice2_Votes int
Choice3 string
Choice3_Votes int
Choice4 string
Choice4_Votes int
Choice1 string
Choice1_Votes int
Choice2 string
Choice2_Votes int
Choice3 string
Choice3_Votes int
Choice4 string
Choice4_Votes int
VotingDuration int // In seconds
VotingEndsAt Timestamp
VotingDuration int // In seconds
VotingEndsAt Timestamp
LastUpdatedAt Timestamp `db:"last_scraped_at"`
LastUpdatedAt Timestamp `db:"last_scraped_at"`
}
func ParseAPIPoll(apiCard APICard) Poll {
card_url, err := url.Parse(apiCard.ShortenedUrl)
if err != nil {
panic(err)
}
id := int_or_panic(card_url.Hostname())
card_url, err := url.Parse(apiCard.ShortenedUrl)
if err != nil {
panic(err)
}
id := int_or_panic(card_url.Hostname())
ret := Poll{}
ret.ID = PollID(id)
ret.NumChoices = parse_num_choices(apiCard.Name)
ret.VotingDuration = int_or_panic(apiCard.BindingValues.DurationMinutes.StringValue) * 60
ret.VotingEndsAt, err = TimestampFromString(apiCard.BindingValues.EndDatetimeUTC.StringValue)
if err != nil {
panic(err)
}
ret.LastUpdatedAt, err = TimestampFromString(apiCard.BindingValues.LastUpdatedAt.StringValue)
if err != nil {
panic(err)
}
ret := Poll{}
ret.ID = PollID(id)
ret.NumChoices = parse_num_choices(apiCard.Name)
ret.VotingDuration = int_or_panic(apiCard.BindingValues.DurationMinutes.StringValue) * 60
ret.VotingEndsAt, err = TimestampFromString(apiCard.BindingValues.EndDatetimeUTC.StringValue)
if err != nil {
panic(err)
}
ret.LastUpdatedAt, err = TimestampFromString(apiCard.BindingValues.LastUpdatedAt.StringValue)
if err != nil {
panic(err)
}
ret.Choice1 = apiCard.BindingValues.Choice1.StringValue
ret.Choice1_Votes = int_or_panic(apiCard.BindingValues.Choice1_Count.StringValue)
ret.Choice2 = apiCard.BindingValues.Choice2.StringValue
ret.Choice2_Votes = int_or_panic(apiCard.BindingValues.Choice2_Count.StringValue)
ret.Choice1 = apiCard.BindingValues.Choice1.StringValue
ret.Choice1_Votes = int_or_panic(apiCard.BindingValues.Choice1_Count.StringValue)
ret.Choice2 = apiCard.BindingValues.Choice2.StringValue
ret.Choice2_Votes = int_or_panic(apiCard.BindingValues.Choice2_Count.StringValue)
if ret.NumChoices > 2 {
ret.Choice3 = apiCard.BindingValues.Choice3.StringValue
ret.Choice3_Votes = int_or_panic(apiCard.BindingValues.Choice3_Count.StringValue)
}
if ret.NumChoices > 3 {
ret.Choice4 = apiCard.BindingValues.Choice4.StringValue
ret.Choice4_Votes = int_or_panic(apiCard.BindingValues.Choice4_Count.StringValue)
}
if ret.NumChoices > 2 {
ret.Choice3 = apiCard.BindingValues.Choice3.StringValue
ret.Choice3_Votes = int_or_panic(apiCard.BindingValues.Choice3_Count.StringValue)
}
if ret.NumChoices > 3 {
ret.Choice4 = apiCard.BindingValues.Choice4.StringValue
ret.Choice4_Votes = int_or_panic(apiCard.BindingValues.Choice4_Count.StringValue)
}
return ret
return ret
}
func parse_num_choices(card_name string) int {
if strings.Index(card_name, "poll") != 0 || strings.Index(card_name, "choice") != 5 {
panic("Not valid card name: " + card_name)
}
if strings.Index(card_name, "poll") != 0 || strings.Index(card_name, "choice") != 5 {
panic("Not valid card name: " + card_name)
}
return int_or_panic(card_name[4:5])
return int_or_panic(card_name[4:5])
}
func int_or_panic(s string) int {
result, err := strconv.Atoi(s)
if err != nil {
panic(err)
}
return result
result, err := strconv.Atoi(s)
if err != nil {
panic(err)
}
return result
}

View File

@ -1,67 +1,67 @@
package scraper_test
import (
"testing"
"os"
"encoding/json"
"encoding/json"
"os"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
. "offline_twitter/scraper"
. "offline_twitter/scraper"
)
func TestParsePoll2Choices(t *testing.T) {
assert := assert.New(t)
data, err := os.ReadFile("test_responses/tweet_content/poll_card_2_options.json")
if err != nil {
panic(err)
}
var apiCard APICard
err = json.Unmarshal(data, &apiCard)
require.NoError(t, err)
assert := assert.New(t)
data, err := os.ReadFile("test_responses/tweet_content/poll_card_2_options.json")
if err != nil {
panic(err)
}
var apiCard APICard
err = json.Unmarshal(data, &apiCard)
require.NoError(t, err)
poll := ParseAPIPoll(apiCard)
assert.Equal(PollID(1457419248461131776), poll.ID)
assert.Equal(2, poll.NumChoices)
assert.Equal(60 * 60 * 24, poll.VotingDuration)
assert.Equal(int64(1636397201), poll.VotingEndsAt.Unix())
assert.Equal(int64(1636318755), poll.LastUpdatedAt.Unix())
poll := ParseAPIPoll(apiCard)
assert.Equal(PollID(1457419248461131776), poll.ID)
assert.Equal(2, poll.NumChoices)
assert.Equal(60*60*24, poll.VotingDuration)
assert.Equal(int64(1636397201), poll.VotingEndsAt.Unix())
assert.Equal(int64(1636318755), poll.LastUpdatedAt.Unix())
assert.Less(poll.LastUpdatedAt.Unix(), poll.VotingEndsAt.Unix())
assert.Equal("Yes", poll.Choice1)
assert.Equal("No", poll.Choice2)
assert.Equal(529, poll.Choice1_Votes)
assert.Equal(2182, poll.Choice2_Votes)
assert.Less(poll.LastUpdatedAt.Unix(), poll.VotingEndsAt.Unix())
assert.Equal("Yes", poll.Choice1)
assert.Equal("No", poll.Choice2)
assert.Equal(529, poll.Choice1_Votes)
assert.Equal(2182, poll.Choice2_Votes)
}
func TestParsePoll4Choices(t *testing.T) {
assert := assert.New(t)
data, err := os.ReadFile("test_responses/tweet_content/poll_card_4_options_ended.json")
if err != nil {
panic(err)
}
var apiCard APICard
err = json.Unmarshal(data, &apiCard)
require.NoError(t, err)
assert := assert.New(t)
data, err := os.ReadFile("test_responses/tweet_content/poll_card_4_options_ended.json")
if err != nil {
panic(err)
}
var apiCard APICard
err = json.Unmarshal(data, &apiCard)
require.NoError(t, err)
poll := ParseAPIPoll(apiCard)
assert.Equal(PollID(1455611588854140929), poll.ID)
assert.Equal(4, poll.NumChoices)
assert.Equal(60 * 60 * 24, poll.VotingDuration)
assert.Equal(int64(1635966221), poll.VotingEndsAt.Unix())
assert.Equal(int64(1635966226), poll.LastUpdatedAt.Unix())
assert.Greater(poll.LastUpdatedAt.Unix(), poll.VotingEndsAt.Unix())
poll := ParseAPIPoll(apiCard)
assert.Equal(PollID(1455611588854140929), poll.ID)
assert.Equal(4, poll.NumChoices)
assert.Equal(60*60*24, poll.VotingDuration)
assert.Equal(int64(1635966221), poll.VotingEndsAt.Unix())
assert.Equal(int64(1635966226), poll.LastUpdatedAt.Unix())
assert.Greater(poll.LastUpdatedAt.Unix(), poll.VotingEndsAt.Unix())
assert.Equal("Alec Baldwin", poll.Choice1)
assert.Equal(1669, poll.Choice1_Votes)
assert.Equal("Alec Baldwin", poll.Choice1)
assert.Equal(1669, poll.Choice1_Votes)
assert.Equal("Andew Cuomo", poll.Choice2)
assert.Equal(272, poll.Choice2_Votes)
assert.Equal("Andew Cuomo", poll.Choice2)
assert.Equal(272, poll.Choice2_Votes)
assert.Equal("George Floyd", poll.Choice3)
assert.Equal(829, poll.Choice3_Votes)
assert.Equal("George Floyd", poll.Choice3)
assert.Equal(829, poll.Choice3_Votes)
assert.Equal("Derek Chauvin", poll.Choice4)
assert.Equal(2397, poll.Choice4_Votes)
assert.Equal("Derek Chauvin", poll.Choice4)
assert.Equal(2397, poll.Choice4_Votes)
}

View File

@ -1,12 +1,12 @@
package scraper
type Retweet struct {
RetweetID TweetID
TweetID TweetID
Tweet *Tweet
RetweetedByID UserID `db:"retweeted_by"`
RetweetedBy *User
RetweetedAt Timestamp
RetweetID TweetID
TweetID TweetID
Tweet *Tweet
RetweetedByID UserID `db:"retweeted_by"`
RetweetedBy *User
RetweetedAt Timestamp
}
func ParseSingleRetweet(apiTweet APITweet) (ret Retweet, err error) {

View File

@ -5,8 +5,8 @@ import (
"os"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
. "offline_twitter/scraper"
)

View File

@ -5,7 +5,7 @@ import (
)
func TimestampToDateString(timestamp int) string {
panic("???") // TODO
panic("???") // TODO
}
/**

View File

@ -1,9 +1,9 @@
package scraper
import (
"time"
"fmt"
"strings"
"time"
"offline_twitter/terminal_utils"
)
@ -13,18 +13,18 @@ const DEFAULT_MAX_REPLIES_EAGER_LOAD = 50
type TweetID int64
type Tweet struct {
ID TweetID
UserID UserID
UserHandle UserHandle // For processing tombstones
User *User
Text string
PostedAt Timestamp
NumLikes int
NumRetweets int
NumReplies int
NumQuoteTweets int
InReplyToID TweetID
QuotedTweetID TweetID
ID TweetID
UserID UserID
UserHandle UserHandle // For processing tombstones
User *User
Text string
PostedAt Timestamp
NumLikes int
NumRetweets int
NumReplies int
NumQuoteTweets int
InReplyToID TweetID
QuotedTweetID TweetID
Images []Image
Videos []Video
@ -35,14 +35,13 @@ type Tweet struct {
Polls []Poll
TombstoneType string
IsStub bool
IsStub bool
IsContentDownloaded bool
IsContentDownloaded bool
IsConversationScraped bool
LastScrapedAt Timestamp
LastScrapedAt Timestamp
}
func (t Tweet) String() string {
var author string
if t.User != nil {
@ -52,7 +51,7 @@ func (t Tweet) String() string {
}
ret := fmt.Sprintf(
`%s
`%s
%s
%s
Replies: %d RT: %d QT: %d Likes: %d
@ -67,11 +66,11 @@ Replies: %d RT: %d QT: %d Likes: %d
)
if len(t.Images) > 0 {
ret += fmt.Sprintf(terminal_utils.COLOR_GREEN + "images: %d\n" + terminal_utils.COLOR_RESET, len(t.Images))
ret += fmt.Sprintf(terminal_utils.COLOR_GREEN+"images: %d\n"+terminal_utils.COLOR_RESET, len(t.Images))
}
if len(t.Urls) > 0 {
ret += "urls: [\n"
for _, url := range(t.Urls) {
for _, url := range t.Urls {
ret += " " + url.Text + "\n"
}
ret += "]"
@ -90,7 +89,7 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
ret.Text = apiTweet.FullText
// Process "posted-at" date and time
if apiTweet.TombstoneText == "" { // Skip time parsing for tombstones
if apiTweet.TombstoneText == "" { // Skip time parsing for tombstones
ret.PostedAt, err = TimestampFromString(apiTweet.CreatedAt)
if err != nil {
return Tweet{}, fmt.Errorf("Error parsing time on tweet ID %d:\n %w", ret.ID, err)
@ -125,7 +124,7 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
// Process images
for _, media := range apiTweet.Entities.Media {
if media.Type != "photo" { // TODO: remove this eventually
if media.Type != "photo" { // TODO: remove this eventually
panic(fmt.Errorf("Unknown media type %q:\n %w", media.Type, EXTERNAL_API_ERROR))
}
new_image := ParseAPIMedia(media)
@ -151,7 +150,6 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
}
}
// Process videos
for _, entity := range apiTweet.ExtendedEntities.Media {
if entity.Type != "video" && entity.Type != "animated_gif" {
@ -175,13 +173,12 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
// Process tombstones and other metadata
ret.TombstoneType = apiTweet.TombstoneText
ret.IsStub = !(ret.TombstoneType == "")
ret.LastScrapedAt = TimestampFromUnix(0) // Caller will change this for the tweet that was actually scraped
ret.IsConversationScraped = false // Safe due to the "No Worsening" principle
ret.LastScrapedAt = TimestampFromUnix(0) // Caller will change this for the tweet that was actually scraped
ret.IsConversationScraped = false // Safe due to the "No Worsening" principle
return
}
/**
* Get a single tweet with no replies from the API.
*
@ -206,7 +203,6 @@ func GetTweet(id TweetID) (Tweet, error) {
return ParseSingleTweet(single_tweet)
}
/**
* Return a list of tweets, including the original and the rest of its thread,
* along with a list of associated users.
@ -227,7 +223,7 @@ func GetTweetFull(id TweetID) (trove TweetTrove, err error) {
return
}
if len(tweet_response.GlobalObjects.Tweets) < DEFAULT_MAX_REPLIES_EAGER_LOAD &&
tweet_response.GetCursor() != "" {
tweet_response.GetCursor() != "" {
err = api.GetMoreReplies(id, &tweet_response, DEFAULT_MAX_REPLIES_EAGER_LOAD)
if err != nil {
err = fmt.Errorf("Error getting more tweet replies: %d\n %w", id, err)

View File

@ -5,13 +5,13 @@ import (
"os"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
. "offline_twitter/scraper"
)
func load_tweet_from_file(filename string) Tweet{
func load_tweet_from_file(filename string) Tweet {
data, err := os.ReadFile(filename)
if err != nil {
panic(err)
@ -28,12 +28,11 @@ func load_tweet_from_file(filename string) Tweet{
return tweet
}
func TestParseSingleTweet(t *testing.T) {
assert := assert.New(t)
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_unicode_chars.json")
assert.Equal("The fact that @michaelmalice new book The Anarchist Handbook is just absolutely destroying on the charts is the " +
assert.Equal("The fact that @michaelmalice new book The Anarchist Handbook is just absolutely destroying on the charts is the "+
"largest white pill Ive swallowed in years.", tweet.Text)
assert.Len(tweet.Mentions, 1)
assert.Contains(tweet.Mentions, UserHandle("michaelmalice"))
@ -73,7 +72,7 @@ func TestParseTweetWithQuotedTweetAndLink(t *testing.T) {
assert := assert.New(t)
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_quoted_tweet_and_url.json")
assert.Equal("This is video hes talking about. Please watch. Is there a single US politician capable of doing this with the " +
assert.Equal("This is video hes talking about. Please watch. Is there a single US politician capable of doing this with the "+
"weasels and rats running American industry today?", tweet.Text)
assert.Equal(TweetID(1497997890999898115), tweet.QuotedTweetID)
@ -135,7 +134,7 @@ func TestParseTweetWithMultipleUrls(t *testing.T) {
assert.False(tweet.Urls[0].HasCard)
assert.False(tweet.Urls[1].HasCard)
assert.True (tweet.Urls[2].HasCard)
assert.True(tweet.Urls[2].HasCard)
assert.Equal("Bidens victory came from the suburbs", tweet.Urls[2].Title)
}
@ -166,12 +165,11 @@ func TestTweetWithPoll(t *testing.T) {
assert.Equal(624, p.Choice2_Votes)
assert.Equal(778, p.Choice3_Votes)
assert.Equal(1138, p.Choice4_Votes)
assert.Equal(1440 * 60, p.VotingDuration)
assert.Equal(1440*60, p.VotingDuration)
assert.Equal(int64(1638331934), p.VotingEndsAt.Unix())
assert.Equal(int64(1638331935), p.LastUpdatedAt.Unix())
}
func TestParseTweetResponse(t *testing.T) {
assert := assert.New(t)
data, err := os.ReadFile("test_responses/michael_malice_feed.json")
@ -186,7 +184,7 @@ func TestParseTweetResponse(t *testing.T) {
require.NoError(t, err)
tweets, retweets, users := trove.Transform()
assert.Len(tweets, 29 - 3)
assert.Len(tweets, 29-3)
assert.Len(retweets, 3)
assert.Len(users, 9)
}

View File

@ -8,9 +8,9 @@ import (
)
type TweetTrove struct {
Tweets map[TweetID]Tweet
Users map[UserID]User
Retweets map[TweetID]Retweet
Tweets map[TweetID]Tweet
Users map[UserID]User
Retweets map[TweetID]Retweet
TombstoneUsers []UserHandle
}
@ -38,7 +38,7 @@ func (trove TweetTrove) Transform() (tweets []Tweet, retweets []Retweet, users [
retweets = append(retweets, val)
}
return
} // TODO: refactor until this function isn't needed anymore
} // TODO: refactor until this function isn't needed anymore
/**
* Search for a user by handle. Second param is whether the user was found or not.

View File

@ -2,28 +2,28 @@ package scraper
import (
"fmt"
"net/url"
"path"
"regexp"
"net/url"
)
type Url struct {
TweetID TweetID
Domain string
Text string
ShortText string
Title string
Description string
ThumbnailWidth int
ThumbnailHeight int
Domain string
Text string
ShortText string
Title string
Description string
ThumbnailWidth int
ThumbnailHeight int
ThumbnailRemoteUrl string
ThumbnailLocalPath string
CreatorID UserID
SiteID UserID
CreatorID UserID
SiteID UserID
HasCard bool
HasThumbnail bool
HasCard bool
HasThumbnail bool
IsContentDownloaded bool
}
@ -86,7 +86,7 @@ func TryParseTweetUrl(url string) (UserHandle, TweetID, bool) {
if matches == nil {
return UserHandle(""), TweetID(0), false
}
if len(matches) != 3 { // matches[0] is the full string
if len(matches) != 3 { // matches[0] is the full string
panic(matches)
}
return UserHandle(matches[1]), TweetID(int_or_panic(matches[2])), true

View File

@ -1,153 +1,153 @@
package scraper_test
import (
"testing"
"os"
"encoding/json"
"encoding/json"
"os"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
. "offline_twitter/scraper"
. "offline_twitter/scraper"
)
func TestParseAPIUrlCard(t *testing.T) {
assert := assert.New(t)
data, err := os.ReadFile("test_responses/tweet_content/url_card.json")
if err != nil {
panic(err)
}
var apiCard APICard
err = json.Unmarshal(data, &apiCard)
require.NoError(t, err)
assert := assert.New(t)
data, err := os.ReadFile("test_responses/tweet_content/url_card.json")
if err != nil {
panic(err)
}
var apiCard APICard
err = json.Unmarshal(data, &apiCard)
require.NoError(t, err)
url := ParseAPIUrlCard(apiCard)
assert.Equal("reason.com", url.Domain)
assert.Equal("L.A. Teachers Union Leader: 'There's No Such Thing As Learning Loss'", url.Title)
assert.Equal("\"Its OK that our babies may not have learned all their times tables,\" says Cecily Myart-Cruz. \"They learned " +
"resilience.\"", url.Description)
assert.Equal(600, url.ThumbnailWidth)
assert.Equal(315, url.ThumbnailHeight)
assert.Equal("https://pbs.twimg.com/card_img/1434998862305968129/odDi9EqO?format=jpg&name=600x600", url.ThumbnailRemoteUrl)
assert.Equal("odDi9EqO_600x600.jpg", url.ThumbnailLocalPath)
assert.Equal(UserID(155581583), url.CreatorID)
assert.Equal(UserID(16467567), url.SiteID)
assert.True(url.HasThumbnail)
assert.False(url.IsContentDownloaded)
url := ParseAPIUrlCard(apiCard)
assert.Equal("reason.com", url.Domain)
assert.Equal("L.A. Teachers Union Leader: 'There's No Such Thing As Learning Loss'", url.Title)
assert.Equal("\"Its OK that our babies may not have learned all their times tables,\" says Cecily Myart-Cruz. \"They learned "+
"resilience.\"", url.Description)
assert.Equal(600, url.ThumbnailWidth)
assert.Equal(315, url.ThumbnailHeight)
assert.Equal("https://pbs.twimg.com/card_img/1434998862305968129/odDi9EqO?format=jpg&name=600x600", url.ThumbnailRemoteUrl)
assert.Equal("odDi9EqO_600x600.jpg", url.ThumbnailLocalPath)
assert.Equal(UserID(155581583), url.CreatorID)
assert.Equal(UserID(16467567), url.SiteID)
assert.True(url.HasThumbnail)
assert.False(url.IsContentDownloaded)
}
func TestParseAPIUrlCardWithPlayer(t *testing.T) {
assert := assert.New(t)
data, err := os.ReadFile("test_responses/tweet_content/url_card_with_player.json")
if err != nil {
panic(err)
}
var apiCard APICard
err = json.Unmarshal(data, &apiCard)
require.NoError(t, err)
assert := assert.New(t)
data, err := os.ReadFile("test_responses/tweet_content/url_card_with_player.json")
if err != nil {
panic(err)
}
var apiCard APICard
err = json.Unmarshal(data, &apiCard)
require.NoError(t, err)
url := ParseAPIUrlCard(apiCard)
assert.Equal("www.youtube.com", url.Domain)
assert.Equal("The Politically Incorrect Guide to the Constitution (Starring Tom...", url.Title)
assert.Equal("Watch this episode on LBRY/Odysee: https://odysee.com/@capitalresearch:5/the-politically-incorrect-guide-to-the:8" +
"Watch this episode on Rumble: https://rumble...", url.Description)
assert.Equal("https://pbs.twimg.com/card_img/1437849456423194639/_1t0btyt?format=jpg&name=800x320_1", url.ThumbnailRemoteUrl)
assert.Equal("_1t0btyt_800x320_1.jpg", url.ThumbnailLocalPath)
assert.Equal(UserID(10228272), url.SiteID)
assert.True(url.HasThumbnail)
assert.False(url.IsContentDownloaded)
url := ParseAPIUrlCard(apiCard)
assert.Equal("www.youtube.com", url.Domain)
assert.Equal("The Politically Incorrect Guide to the Constitution (Starring Tom...", url.Title)
assert.Equal("Watch this episode on LBRY/Odysee: https://odysee.com/@capitalresearch:5/the-politically-incorrect-guide-to-the:8"+
"Watch this episode on Rumble: https://rumble...", url.Description)
assert.Equal("https://pbs.twimg.com/card_img/1437849456423194639/_1t0btyt?format=jpg&name=800x320_1", url.ThumbnailRemoteUrl)
assert.Equal("_1t0btyt_800x320_1.jpg", url.ThumbnailLocalPath)
assert.Equal(UserID(10228272), url.SiteID)
assert.True(url.HasThumbnail)
assert.False(url.IsContentDownloaded)
}
func TestParseAPIUrlCardWithPlayerAndPlaceholderThumbnail(t *testing.T) {
assert := assert.New(t)
data, err := os.ReadFile("test_responses/tweet_content/url_card_with_player_placeholder_image.json")
if err != nil {
panic(err)
}
var apiCard APICard
err = json.Unmarshal(data, &apiCard)
require.NoError(t, err)
assert := assert.New(t)
data, err := os.ReadFile("test_responses/tweet_content/url_card_with_player_placeholder_image.json")
if err != nil {
panic(err)
}
var apiCard APICard
err = json.Unmarshal(data, &apiCard)
require.NoError(t, err)
url := ParseAPIUrlCard(apiCard)
assert.Equal("www.youtube.com", url.Domain)
assert.Equal("Did Michael Malice Turn Me into an Anarchist? | Ep 181", url.Title)
assert.Equal("SUBSCRIBE TO THE NEW SHOW W/ ELIJAH & SYDNEY: \"YOU ARE HERE\"YT: https://www.youtube.com/youareheredaily____________" +
"__________________________________________...", url.Description)
assert.Equal("https://pbs.twimg.com/cards/player-placeholder.png", url.ThumbnailRemoteUrl)
assert.Equal("player-placeholder.png", url.ThumbnailLocalPath)
assert.Equal(UserID(10228272), url.SiteID)
assert.True(url.HasThumbnail)
assert.False(url.IsContentDownloaded)
url := ParseAPIUrlCard(apiCard)
assert.Equal("www.youtube.com", url.Domain)
assert.Equal("Did Michael Malice Turn Me into an Anarchist? | Ep 181", url.Title)
assert.Equal("SUBSCRIBE TO THE NEW SHOW W/ ELIJAH & SYDNEY: \"YOU ARE HERE\"YT: https://www.youtube.com/youareheredaily____________"+
"__________________________________________...", url.Description)
assert.Equal("https://pbs.twimg.com/cards/player-placeholder.png", url.ThumbnailRemoteUrl)
assert.Equal("player-placeholder.png", url.ThumbnailLocalPath)
assert.Equal(UserID(10228272), url.SiteID)
assert.True(url.HasThumbnail)
assert.False(url.IsContentDownloaded)
}
func TestParseAPIUrlCardWithoutThumbnail(t *testing.T) {
assert := assert.New(t)
data, err := os.ReadFile("test_responses/tweet_content/url_card_without_thumbnail.json")
if err != nil {
panic(err)
}
var apiCard APICard
err = json.Unmarshal(data, &apiCard)
require.NoError(t, err)
assert := assert.New(t)
data, err := os.ReadFile("test_responses/tweet_content/url_card_without_thumbnail.json")
if err != nil {
panic(err)
}
var apiCard APICard
err = json.Unmarshal(data, &apiCard)
require.NoError(t, err)
url := ParseAPIUrlCard(apiCard)
assert.Equal("en.m.wikipedia.org", url.Domain)
assert.Equal("Entryism - Wikipedia", url.Title)
assert.Equal("", url.Description)
assert.True(url.HasCard)
assert.False(url.HasThumbnail)
url := ParseAPIUrlCard(apiCard)
assert.Equal("en.m.wikipedia.org", url.Domain)
assert.Equal("Entryism - Wikipedia", url.Title)
assert.Equal("", url.Description)
assert.True(url.HasCard)
assert.False(url.HasThumbnail)
}
/**
* Should check if a url is a tweet url, and if so, parse it
*/
func TestParseTweetUrl(t *testing.T) {
assert:= assert.New(t)
assert := assert.New(t)
// Test valid tweet url
url := "https://twitter.com/kanesays23/status/1429583672827465730"
handle, id, is_ok := TryParseTweetUrl(url)
assert.True(is_ok)
assert.Equal(UserHandle("kanesays23"), handle)
assert.Equal(TweetID(1429583672827465730), id)
// Test valid tweet url
url := "https://twitter.com/kanesays23/status/1429583672827465730"
handle, id, is_ok := TryParseTweetUrl(url)
assert.True(is_ok)
assert.Equal(UserHandle("kanesays23"), handle)
assert.Equal(TweetID(1429583672827465730), id)
// Test url with GET params
handle, id, is_ok = TryParseTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720?s=20")
assert.True(is_ok)
assert.Equal(UserHandle("NerdNoticing"), handle)
assert.Equal(TweetID(1263192389050654720), id)
// Test url with GET params
handle, id, is_ok = TryParseTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720?s=20")
assert.True(is_ok)
assert.Equal(UserHandle("NerdNoticing"), handle)
assert.Equal(TweetID(1263192389050654720), id)
// Test invalid url
_, _, is_ok = TryParseTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720s=20")
assert.False(is_ok)
// Test invalid url
_, _, is_ok = TryParseTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720s=20")
assert.False(is_ok)
// Test empty string
_, _, is_ok = TryParseTweetUrl("")
assert.False(is_ok)
// Test empty string
_, _, is_ok = TryParseTweetUrl("")
assert.False(is_ok)
}
/**
* Should extract a user handle from a tweet URL, or fail if URL is invalid
*/
func TestParseHandleFromTweetUrl(t *testing.T) {
assert := assert.New(t)
assert := assert.New(t)
// Test valid tweet url
url := "https://twitter.com/kanesays23/status/1429583672827465730"
result, err := ParseHandleFromTweetUrl(url)
assert.NoError(err)
assert.Equal(UserHandle("kanesays23"), result)
// Test valid tweet url
url := "https://twitter.com/kanesays23/status/1429583672827465730"
result, err := ParseHandleFromTweetUrl(url)
assert.NoError(err)
assert.Equal(UserHandle("kanesays23"), result)
// Test url with GET params
result, err = ParseHandleFromTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720?s=20")
assert.NoError(err)
assert.Equal(UserHandle("NerdNoticing"), result)
// Test url with GET params
result, err = ParseHandleFromTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720?s=20")
assert.NoError(err)
assert.Equal(UserHandle("NerdNoticing"), result)
// Test invalid url
_, err = ParseHandleFromTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720s=20")
assert.Error(err)
// Test invalid url
_, err = ParseHandleFromTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720s=20")
assert.Error(err)
// Test empty string
_, err = ParseHandleFromTweetUrl("")
assert.Error(err)
// Test empty string
_, err = ParseHandleFromTweetUrl("")
assert.Error(err)
}

View File

@ -1,12 +1,12 @@
package scraper
import (
"fmt"
"strings"
"regexp"
"path"
"fmt"
"path"
"regexp"
"strings"
"offline_twitter/terminal_utils"
"offline_twitter/terminal_utils"
)
const DEFAULT_PROFILE_IMAGE_URL = "https://abs.twimg.com/sticky/default_profile_images/default_profile.png"
@ -15,47 +15,48 @@ type UserID int64
type UserHandle string
func JoinArrayOfHandles(handles []UserHandle) string {
ret := []string{}
for _, h := range handles {
ret = append(ret, string(h))
}
return strings.Join(ret, ",")
ret := []string{}
for _, h := range handles {
ret = append(ret, string(h))
}
return strings.Join(ret, ",")
}
type User struct {
ID UserID
DisplayName string
Handle UserHandle
Bio string
FollowingCount int
FollowersCount int
Location string
Website string
JoinDate Timestamp
IsPrivate bool
IsVerified bool
IsBanned bool
ProfileImageUrl string
ProfileImageLocalPath string
BannerImageUrl string
BannerImageLocalPath string
ID UserID
DisplayName string
Handle UserHandle
Bio string
FollowingCount int
FollowersCount int
Location string
Website string
JoinDate Timestamp
IsPrivate bool
IsVerified bool
IsBanned bool
IsDeleted bool
ProfileImageUrl string
ProfileImageLocalPath string
BannerImageUrl string
BannerImageLocalPath string
PinnedTweetID TweetID
PinnedTweet *Tweet
PinnedTweetID TweetID
PinnedTweet *Tweet
IsFollowed bool
IsContentDownloaded bool
IsNeedingFakeID bool
IsIdFake bool
IsFollowed bool
IsContentDownloaded bool
IsNeedingFakeID bool
IsIdFake bool
}
func (u User) String() string {
var verified string
if u.IsVerified {
verified = "[\u2713]"
}
ret := fmt.Sprintf(
`%s%s
var verified string
if u.IsVerified {
verified = "[\u2713]"
}
ret := fmt.Sprintf(
`%s%s
@%s
%s
@ -65,115 +66,112 @@ Joined %s
%s
%s
`,
u.DisplayName,
verified,
u.Handle,
terminal_utils.WrapText(u.Bio, 60),
u.FollowingCount,
u.FollowersCount,
terminal_utils.FormatDate(u.JoinDate.Time),
u.Location,
u.Website,
)
if u.PinnedTweet != nil {
ret += "\n" + terminal_utils.WrapText(u.PinnedTweet.Text, 60)
} else {
println("Pinned tweet id:", u.PinnedTweetID)
}
return ret
u.DisplayName,
verified,
u.Handle,
terminal_utils.WrapText(u.Bio, 60),
u.FollowingCount,
u.FollowersCount,
terminal_utils.FormatDate(u.JoinDate.Time),
u.Location,
u.Website,
)
if u.PinnedTweet != nil {
ret += "\n" + terminal_utils.WrapText(u.PinnedTweet.Text, 60)
} else {
println("Pinned tweet id:", u.PinnedTweetID)
}
return ret
}
/**
* Unknown Users with handles are only created by direct GetUser calls (either `twitter fetch_user`
* subcommand or as part of tombstone user fetching.)
*/
func GetUnknownUserWithHandle(handle UserHandle) User {
return User{
ID: UserID(0), // 2^62 + 1...
DisplayName: string(handle),
Handle: handle,
Bio: "<blank>",
FollowersCount: 0,
FollowingCount: 0,
Location: "<blank>",
Website:"<blank>",
JoinDate: TimestampFromUnix(0),
IsVerified: false,
IsPrivate: false,
IsNeedingFakeID: true,
IsIdFake: true,
}
return User{
ID: UserID(0), // 2^62 + 1...
DisplayName: string(handle),
Handle: handle,
Bio: "<blank>",
FollowersCount: 0,
FollowingCount: 0,
Location: "<blank>",
Website: "<blank>",
JoinDate: TimestampFromUnix(0),
IsVerified: false,
IsPrivate: false,
IsNeedingFakeID: true,
IsIdFake: true,
}
}
// Turn an APIUser, as returned from the scraper, into a properly structured User object
func ParseSingleUser(apiUser APIUser) (ret User, err error) {
if apiUser.DoesntExist {
// User may have been deleted, or there was a typo. There's no data to parse
if apiUser.ScreenName == "" {
panic("ScreenName is empty!")
}
ret = GetUnknownUserWithHandle(UserHandle(apiUser.ScreenName))
return
}
ret.ID = UserID(apiUser.ID)
ret.Handle = UserHandle(apiUser.ScreenName)
if apiUser.IsBanned {
// Banned users won't have any further info, so just return here
ret.IsBanned = true
return
}
ret.DisplayName = apiUser.Name
ret.Bio = apiUser.Description
ret.FollowingCount = apiUser.FriendsCount
ret.FollowersCount = apiUser.FollowersCount
ret.Location = apiUser.Location
if len(apiUser.Entities.URL.Urls) > 0 {
ret.Website = apiUser.Entities.URL.Urls[0].ExpandedURL
}
ret.JoinDate, err = TimestampFromString(apiUser.CreatedAt)
if err != nil {
err = fmt.Errorf("Error parsing time on user ID %d: %w", ret.ID, err)
return
}
ret.IsPrivate = apiUser.Protected
ret.IsVerified = apiUser.Verified
ret.ProfileImageUrl = apiUser.ProfileImageURLHTTPS
if apiUser.DoesntExist {
// User may have been deleted, or there was a typo. There's no data to parse
if apiUser.ScreenName == "" {
panic("ScreenName is empty!")
}
ret = GetUnknownUserWithHandle(UserHandle(apiUser.ScreenName))
return
}
ret.ID = UserID(apiUser.ID)
ret.Handle = UserHandle(apiUser.ScreenName)
if apiUser.IsBanned {
// Banned users won't have any further info, so just return here
ret.IsBanned = true
return
}
ret.DisplayName = apiUser.Name
ret.Bio = apiUser.Description
ret.FollowingCount = apiUser.FriendsCount
ret.FollowersCount = apiUser.FollowersCount
ret.Location = apiUser.Location
if len(apiUser.Entities.URL.Urls) > 0 {
ret.Website = apiUser.Entities.URL.Urls[0].ExpandedURL
}
ret.JoinDate, err = TimestampFromString(apiUser.CreatedAt)
if err != nil {
err = fmt.Errorf("Error parsing time on user ID %d: %w", ret.ID, err)
return
}
ret.IsPrivate = apiUser.Protected
ret.IsVerified = apiUser.Verified
ret.ProfileImageUrl = apiUser.ProfileImageURLHTTPS
if regexp.MustCompile(`_normal\.\w{2,4}`).MatchString(ret.ProfileImageUrl) {
ret.ProfileImageUrl = strings.ReplaceAll(ret.ProfileImageUrl, "_normal.", ".")
}
ret.BannerImageUrl = apiUser.ProfileBannerURL
if regexp.MustCompile(`_normal\.\w{2,4}`).MatchString(ret.ProfileImageUrl) {
ret.ProfileImageUrl = strings.ReplaceAll(ret.ProfileImageUrl, "_normal.", ".")
}
ret.BannerImageUrl = apiUser.ProfileBannerURL
ret.ProfileImageLocalPath = ret.compute_profile_image_local_path()
ret.BannerImageLocalPath = ret.compute_banner_image_local_path()
ret.ProfileImageLocalPath = ret.compute_profile_image_local_path()
ret.BannerImageLocalPath = ret.compute_banner_image_local_path()
if len(apiUser.PinnedTweetIdsStr) > 0 {
ret.PinnedTweetID = TweetID(idstr_to_int(apiUser.PinnedTweetIdsStr[0]))
}
return
if len(apiUser.PinnedTweetIdsStr) > 0 {
ret.PinnedTweetID = TweetID(idstr_to_int(apiUser.PinnedTweetIdsStr[0]))
}
return
}
// Calls API#GetUser and returns the parsed result
func GetUser(handle UserHandle) (User, error) {
api := API{}
apiUser, err := api.GetUser(handle)
if apiUser.ScreenName == "" {
apiUser.ScreenName = string(handle)
}
if err != nil {
return User{}, err
}
return ParseSingleUser(apiUser)
api := API{}
apiUser, err := api.GetUser(handle)
if apiUser.ScreenName == "" {
apiUser.ScreenName = string(handle)
}
if err != nil {
return User{}, err
}
return ParseSingleUser(apiUser)
}
/**
* Make a filename for the profile image, that hopefully won't clobber other ones
*/
func (u User) compute_profile_image_local_path() string {
return string(u.Handle) + "_profile_" + path.Base(u.ProfileImageUrl)
return string(u.Handle) + "_profile_" + path.Base(u.ProfileImageUrl)
}
/**
@ -182,34 +180,34 @@ func (u User) compute_profile_image_local_path() string {
* If there is no banner image, just return nothing.
*/
func (u User) compute_banner_image_local_path() string {
if u.BannerImageUrl == "" {
return ""
}
base_name := path.Base(u.BannerImageUrl)
if u.BannerImageUrl == "" {
return ""
}
base_name := path.Base(u.BannerImageUrl)
// Check if it has an extension (e.g., ".png" or ".jpeg")
if !regexp.MustCompile(`\.\w{2,4}$`).MatchString(base_name) {
// If it doesn't have an extension, add one
base_name += ".jpg"
}
return string(u.Handle) + "_banner_" + base_name
// Check if it has an extension (e.g., ".png" or ".jpeg")
if !regexp.MustCompile(`\.\w{2,4}$`).MatchString(base_name) {
// If it doesn't have an extension, add one
base_name += ".jpg"
}
return string(u.Handle) + "_banner_" + base_name
}
/**
* Get the URL where we would expect to find a User's tiny profile image
*/
func (u User) GetTinyProfileImageUrl() string {
// If profile image is empty, then just use the default profile image
if u.ProfileImageUrl == "" {
return DEFAULT_PROFILE_IMAGE_URL
}
// If profile image is empty, then just use the default profile image
if u.ProfileImageUrl == "" {
return DEFAULT_PROFILE_IMAGE_URL
}
// Check that the format is as expected
r := regexp.MustCompile(`(\.\w{2,4})$`)
if !r.MatchString(u.ProfileImageUrl) {
panic(fmt.Errorf("Weird profile image url (here is the file extension?): %s", u.ProfileImageUrl))
}
return r.ReplaceAllString(u.ProfileImageUrl, "_normal$1")
// Check that the format is as expected
r := regexp.MustCompile(`(\.\w{2,4})$`)
if !r.MatchString(u.ProfileImageUrl) {
panic(fmt.Errorf("Weird profile image url (here is the file extension?): %s", u.ProfileImageUrl))
}
return r.ReplaceAllString(u.ProfileImageUrl, "_normal$1")
}
/**
@ -217,8 +215,8 @@ func (u User) GetTinyProfileImageUrl() string {
* If user has a blank or default profile image, return a non-personalized default path.
*/
func (u User) GetTinyProfileImageLocalPath() string {
if u.ProfileImageUrl == "" {
return path.Base(u.GetTinyProfileImageUrl())
}
return string(u.Handle) + "_profile_" + path.Base(u.GetTinyProfileImageUrl())
if u.ProfileImageUrl == "" {
return path.Base(u.GetTinyProfileImageUrl())
}
return string(u.Handle) + "_profile_" + path.Base(u.GetTinyProfileImageUrl())
}

View File

@ -1,8 +1,8 @@
package scraper
import (
"fmt"
"errors"
"fmt"
)
/**
@ -33,7 +33,6 @@ func GetUserFeedFor(user_id UserID, min_tweets int) (trove TweetTrove, err error
return ParseTweetResponse(tweet_response)
}
func GetUserFeedGraphqlFor(user_id UserID, min_tweets int) (trove TweetTrove, err error) {
api := API{}
api_response, err := api.GetGraphqlFeedFor(user_id, "")

View File

@ -1,14 +1,14 @@
package scraper_test
import (
"testing"
"encoding/json"
"os"
"net/http"
"os"
"testing"
"github.com/jarcoal/httpmock"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
. "offline_twitter/scraper"
)
@ -31,7 +31,7 @@ func TestParseSingleUser(t *testing.T) {
assert.Equal(UserID(44067298), user.ID)
assert.Equal("Michael Malice", user.DisplayName)
assert.Equal(UserHandle("michaelmalice"), user.Handle)
assert.Equal("Author of Dear Reader, The New Right & The Anarchist Handbook\nHost of \"YOUR WELCOME\" \nSubject of Ego & Hubris by " +
assert.Equal("Author of Dear Reader, The New Right & The Anarchist Handbook\nHost of \"YOUR WELCOME\" \nSubject of Ego & Hubris by "+
"Harvey Pekar\nUnderwear Model\nHe/Him ⚑", user.Bio)
assert.Equal(941, user.FollowingCount)
assert.Equal(208589, user.FollowersCount)
@ -39,7 +39,7 @@ func TestParseSingleUser(t *testing.T) {
assert.Equal("https://amzn.to/3oInafv", user.Website)
assert.Equal(int64(1243920952), user.JoinDate.Unix())
assert.False(user.IsPrivate)
assert.True (user.IsVerified)
assert.True(user.IsVerified)
assert.False(user.IsBanned)
assert.Equal("https://pbs.twimg.com/profile_images/1064051934812913664/Lbwdb_C9.jpg", user.ProfileImageUrl)
assert.Equal("https://pbs.twimg.com/profile_images/1064051934812913664/Lbwdb_C9_normal.jpg", user.GetTinyProfileImageUrl())
@ -90,7 +90,7 @@ func TestParseDeletedUser(t *testing.T) {
handle := "Some Random Deleted User"
apiUser := user_resp.ConvertToAPIUser()
apiUser.ScreenName = string(handle) // This is done in scraper.GetUser, since users are retrieved by handle anyway
apiUser.ScreenName = string(handle) // This is done in scraper.GetUser, since users are retrieved by handle anyway
user, err := ParseSingleUser(apiUser)
require.NoError(t, err)

View File

@ -1,9 +1,9 @@
package scraper
import (
"fmt"
"sort"
"path"
"fmt"
"path"
"sort"
)
type VideoID int64
@ -12,61 +12,61 @@ type VideoID int64
// from someone else).
type Video struct {
ID VideoID
TweetID TweetID
Width int
Height int
RemoteURL string
LocalFilename string
ID VideoID
TweetID TweetID
Width int
Height int
RemoteURL string
LocalFilename string
ThumbnailRemoteUrl string
ThumbnailLocalPath string `db:"thumbnail_local_filename"`
Duration int // milliseconds
ViewCount int
ThumbnailRemoteUrl string
ThumbnailLocalPath string `db:"thumbnail_local_filename"`
Duration int // milliseconds
ViewCount int
IsDownloaded bool
IsGif bool
IsDownloaded bool
IsGif bool
}
func ParseAPIVideo(apiVideo APIExtendedMedia, tweet_id TweetID) Video {
variants := apiVideo.VideoInfo.Variants
sort.Sort(variants)
variants := apiVideo.VideoInfo.Variants
sort.Sort(variants)
var view_count int
var view_count int
r := apiVideo.Ext.MediaStats.R
r := apiVideo.Ext.MediaStats.R
switch r.(type) {
case string:
view_count = 0
case map[string]interface{}:
OK_entry, ok := r.(map[string]interface{})["ok"]
if !ok {
panic("No 'ok' value found in the R!")
}
view_count_str, ok := OK_entry.(map[string]interface{})["viewCount"]
view_count = int_or_panic(view_count_str.(string))
if !ok {
panic("No 'viewCount' value found in the OK!")
}
}
switch r.(type) {
case string:
view_count = 0
case map[string]interface{}:
OK_entry, ok := r.(map[string]interface{})["ok"]
if !ok {
panic("No 'ok' value found in the R!")
}
view_count_str, ok := OK_entry.(map[string]interface{})["viewCount"]
view_count = int_or_panic(view_count_str.(string))
if !ok {
panic("No 'viewCount' value found in the OK!")
}
}
local_filename := fmt.Sprintf("%d.mp4", tweet_id)
local_filename := fmt.Sprintf("%d.mp4", tweet_id)
return Video{
ID: VideoID(apiVideo.ID),
TweetID: tweet_id,
Width: apiVideo.OriginalInfo.Width,
Height: apiVideo.OriginalInfo.Height,
RemoteURL: variants[0].URL,
LocalFilename: local_filename,
return Video{
ID: VideoID(apiVideo.ID),
TweetID: tweet_id,
Width: apiVideo.OriginalInfo.Width,
Height: apiVideo.OriginalInfo.Height,
RemoteURL: variants[0].URL,
LocalFilename: local_filename,
ThumbnailRemoteUrl: apiVideo.MediaURLHttps,
ThumbnailLocalPath: path.Base(apiVideo.MediaURLHttps),
Duration: apiVideo.VideoInfo.Duration,
ViewCount: view_count,
ThumbnailRemoteUrl: apiVideo.MediaURLHttps,
ThumbnailLocalPath: path.Base(apiVideo.MediaURLHttps),
Duration: apiVideo.VideoInfo.Duration,
ViewCount: view_count,
IsDownloaded: false,
IsGif: apiVideo.Type == "animated_gif",
}
IsDownloaded: false,
IsGif: apiVideo.Type == "animated_gif",
}
}

View File

@ -1,37 +1,37 @@
package scraper_test
import (
"testing"
"os"
"encoding/json"
"encoding/json"
"os"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
. "offline_twitter/scraper"
. "offline_twitter/scraper"
)
func TestParseAPIVideo(t *testing.T) {
assert := assert.New(t)
data, err := os.ReadFile("test_responses/tweet_content/video.json")
if err != nil {
panic(err)
}
var apivideo APIExtendedMedia
err = json.Unmarshal(data, &apivideo)
require.NoError(t, err)
assert := assert.New(t)
data, err := os.ReadFile("test_responses/tweet_content/video.json")
if err != nil {
panic(err)
}
var apivideo APIExtendedMedia
err = json.Unmarshal(data, &apivideo)
require.NoError(t, err)
tweet_id := TweetID(28)
video := ParseAPIVideo(apivideo, tweet_id)
assert.Equal(VideoID(1418951950020845568), video.ID)
assert.Equal(tweet_id, video.TweetID)
assert.Equal(1280, video.Height)
assert.Equal(720, video.Width)
assert.Equal("https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12", video.RemoteURL)
assert.Equal("28.mp4", video.LocalFilename)
assert.Equal("https://pbs.twimg.com/ext_tw_video_thumb/1418951950020845568/pu/img/eUTaYYfuAJ8FyjUi.jpg", video.ThumbnailRemoteUrl)
assert.Equal("eUTaYYfuAJ8FyjUi.jpg", video.ThumbnailLocalPath)
assert.Equal(275952, video.ViewCount)
assert.Equal(88300, video.Duration)
assert.False(video.IsDownloaded)
tweet_id := TweetID(28)
video := ParseAPIVideo(apivideo, tweet_id)
assert.Equal(VideoID(1418951950020845568), video.ID)
assert.Equal(tweet_id, video.TweetID)
assert.Equal(1280, video.Height)
assert.Equal(720, video.Width)
assert.Equal("https://video.twimg.com/ext_tw_video/1418951950020845568/pu/vid/720x1280/sm4iL9_f8Lclh0aa.mp4?tag=12", video.RemoteURL)
assert.Equal("28.mp4", video.LocalFilename)
assert.Equal("https://pbs.twimg.com/ext_tw_video_thumb/1418951950020845568/pu/img/eUTaYYfuAJ8FyjUi.jpg", video.ThumbnailRemoteUrl)
assert.Equal("eUTaYYfuAJ8FyjUi.jpg", video.ThumbnailLocalPath)
assert.Equal(275952, video.ViewCount)
assert.Equal(88300, video.Duration)
assert.False(video.IsDownloaded)
}