From c09b77f4978bcacc49ded67d98a2b998ceb4e043 Mon Sep 17 00:00:00 2001 From: Alessio Date: Fri, 17 Feb 2023 13:07:12 -0500 Subject: [PATCH] Add ability to use saved sessions --- cmd/tests.sh | 9 +++++++++ cmd/twitter/help_message.txt | 20 +++++++++++++++++++- cmd/twitter/main.go | 33 +++++++++++++++++++++++++-------- go.mod | 2 +- go.sum | 6 ++++-- scraper/api_request_utils.go | 12 ++++++++++++ scraper/search.go | 5 ++--- scraper/space.go | 3 +-- scraper/tweet.go | 8 +++----- scraper/user.go | 3 +-- scraper/user_feed.go | 10 ++++------ 11 files changed, 81 insertions(+), 30 deletions(-) diff --git a/cmd/tests.sh b/cmd/tests.sh index 9aa8327..3f9cebe 100755 --- a/cmd/tests.sh +++ b/cmd/tests.sh @@ -300,12 +300,21 @@ tw unfollow cernovich test "$(sqlite3 twitter.db "select count(*) from users where is_followed = 1")" = "0" # Testing login +# TODO authentication: use an environment var for the password tw login offline_twatter S1pKIW#eRT016iA@OFcK test -f Offline_Twatter.session test "$(jq .UserHandle Offline_Twatter.session)" = "\"Offline_Twatter\"" test "$(jq .IsAuthenticated Offline_Twatter.session)" = "true" jq .CSRFToken Offline_Twatter.session | grep -P '"\w+"' +# When not logged in, age-restricted tweet should fail to fetch +tw fetch_user PandasAndVidya +tw fetch_tweet_only https://twitter.com/PandasAndVidya/status/1562714727968428032 || true # This one is expected to fail +test "$(sqlite3 twitter.db "select count(*) from tweets where id = 156271472796842803")" == "0" + +# Fetch an age-restricted tweet while logged in +tw --session Offline_Twatter fetch_tweet_only https://twitter.com/PandasAndVidya/status/1562714727968428032 +test "$(sqlite3 twitter.db "select count(*) from tweets where id = 156271472796842803")" == "0" # TODO: Maybe this file should be broken up into multiple test scripts diff --git a/cmd/twitter/help_message.txt b/cmd/twitter/help_message.txt index e03c7d1..f8a3f2d 100644 --- a/cmd/twitter/help_message.txt +++ b/cmd/twitter/help_message.txt @@ -1,4 +1,4 @@ -Usage: twitter [--profile ] [] +Usage: twitter [--profile ] [--session ] [] This application downloads tweets from twitter and saves them in a SQLite database. : @@ -6,11 +6,29 @@ This application downloads tweets from twitter and saves them in a SQLite databa By default, will use the current working directory. Ignored if is "create_profile". +: + Optional. Only needed when making authenticated requests. + If specified, the named session file (this value, appended with ".session" extension) will be used + when making API requests. + : create_profile is the directory to create. It must not exist already. will be ignored if provided. + login + is the username or email to log in as. + An optional second target can be provided which will be used as the password; if not, a password + prompt (non-echoing) will be given so you can enter the password. + On success, the `login` operation will create (or overwrite) a ".session" file with the user + handle of the user it corresponds to, which contains session tokens and cookies allowing you to + make subsequent requests as that user. + For example, `twitter login offline_twatter` will prompt for a password, then attempt to log in as + @offline_twatter, and if successful, a file `Offline_Twatter.session` will be created. + It may be converted to uppercase (offline_twatter => Offline_Twatter) if that's the canonical + capitalization that twitter uses, even though handles are case insensitive. + + fetch_user download_user_content is the user handle. diff --git a/cmd/twitter/main.go b/cmd/twitter/main.go index 33895ca..25fe047 100644 --- a/cmd/twitter/main.go +++ b/cmd/twitter/main.go @@ -3,9 +3,10 @@ package main import ( "flag" "fmt" - "os" - log "github.com/sirupsen/logrus" + "golang.org/x/term" + "os" + "syscall" "offline_twitter/persistence" "offline_twitter/scraper" @@ -28,6 +29,8 @@ func main() { show_version_flag := flag.Bool("version", false, "") flag.BoolVar(show_version_flag, "v", false, "") + session_name := flag.String("session", "", "Name of session file to use") + how_many := flag.Int("n", 50, "") flag.IntVar(how_many, "number", 50, "") @@ -86,21 +89,34 @@ func main() { } profile, err = persistence.LoadProfile(*profile_dir) - if err != nil { die(fmt.Sprintf("Could not load profile: %s", err.Error()), true, 2) } - if len(args) == 3 && args[0] == "login" { - username := args[1] - password := args[2] - - login(username, password) + if *session_name != "" { + scraper.InitApi(profile.LoadSession(scraper.UserHandle(*session_name))) + // fmt.Printf("Operating as user: @%s\n", scraper.the_api.UserHandle) + } else { + scraper.InitApi(scraper.NewGuestSession()) } switch operation { case "create_profile": create_profile(target) + case "login": + var password string + if len(args) == 2 { + fmt.Printf("Password for @%s: ", target) + bytes_password, err := term.ReadPassword(int(syscall.Stdin)) + if err != nil { + panic(err) + } + fmt.Println() + password = string(bytes_password) + } else { + password = args[2] + } + login(target, password) case "fetch_user": fetch_user(scraper.UserHandle(target)) case "download_user_content": @@ -135,6 +151,7 @@ func main() { // - password: twitter account password func login(username string, password string) { + // Skip the scraper.the_api variable, just use a local one since no scraping is happening api := scraper.NewGuestSession() api.LogIn(username, password) diff --git a/go.mod b/go.mod index 29b0a0e..bbe33e0 100644 --- a/go.mod +++ b/go.mod @@ -9,6 +9,6 @@ require ( github.com/mattn/go-sqlite3 v1.14.7 github.com/sirupsen/logrus v1.8.1 github.com/stretchr/testify v1.7.0 - golang.org/x/sys v0.3.0 // indirect + golang.org/x/term v0.5.0 gopkg.in/yaml.v2 v2.4.0 ) diff --git a/go.sum b/go.sum index 36c103f..87a671a 100644 --- a/go.sum +++ b/go.sum @@ -23,8 +23,10 @@ github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXf github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.3.0 h1:w8ZOecv6NaNa/zC8944JTU3vz4u6Lagfk4RPQxv92NQ= -golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0 h1:MUK/U/4lj1t1oPg0HfuXDN/Z1wv31ZJ/YcPiGccS4DU= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.5.0 h1:n2a8QNdAb0sZNpU9R1ALUXBbY+w51fCQDN+7EdxNBsY= +golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= diff --git a/scraper/api_request_utils.go b/scraper/api_request_utils.go index 8db5438..c85ca70 100644 --- a/scraper/api_request_utils.go +++ b/scraper/api_request_utils.go @@ -24,6 +24,18 @@ type API struct { CSRFToken string } +// Use a global API variable since it is needed in so many utility functions (e.g., +// tweet_trove.FillSpaceDetails, tweet_trove.FetchTombstoneUsers, etc.); this avoids having +// to inject it everywhere. +// +// Should be set by the caller (main program) depending on the session file used. +var the_api API + +// Initializer for the global api variable +func InitApi(newApi API) { + the_api = newApi +} + type api_outstruct struct { Cookies []*http.Cookie UserHandle UserHandle diff --git a/scraper/search.go b/scraper/search.go index ebf0c0f..e7c3880 100644 --- a/scraper/search.go +++ b/scraper/search.go @@ -18,14 +18,13 @@ func TimestampToDateString(timestamp int) string { * - videos */ func Search(query string, min_results int) (trove TweetTrove, err error) { - api := NewGuestSession() - tweet_response, err := api.Search(query, "") + tweet_response, err := the_api.Search(query, "") if err != nil { return } if len(tweet_response.GlobalObjects.Tweets) < min_results && tweet_response.GetCursor() != "" { - err = api.GetMoreTweetsFromSearch(query, &tweet_response, min_results) + err = the_api.GetMoreTweetsFromSearch(query, &tweet_response, min_results) if errors.Is(err, END_OF_FEED) { println("End of feed!") } else if err != nil { diff --git a/scraper/space.go b/scraper/space.go index 00a4bb1..37859e0 100644 --- a/scraper/space.go +++ b/scraper/space.go @@ -38,8 +38,7 @@ func ParseAPISpace(apiCard APICard) Space { } func FetchSpaceDetail(id SpaceID) (TweetTrove, error) { - api := NewGuestSession() - space_response, err := api.GetSpace(id) + space_response, err := the_api.GetSpace(id) if err != nil { return TweetTrove{}, fmt.Errorf("Error in API call to fetch Space (id %q):\n %w", id, err) } diff --git a/scraper/tweet.go b/scraper/tweet.go index d62e01b..91bdc4e 100644 --- a/scraper/tweet.go +++ b/scraper/tweet.go @@ -212,8 +212,7 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) { * returns: the single Tweet */ func GetTweet(id TweetID) (Tweet, error) { - api := NewGuestSession() - tweet_response, err := api.GetTweet(id, "") + tweet_response, err := the_api.GetTweet(id, "") if err != nil { return Tweet{}, fmt.Errorf("Error in API call:\n %w", err) } @@ -240,15 +239,14 @@ func GetTweet(id TweetID) (Tweet, error) { * returns: the tweet, list of its replies and context, and users associated with those replies */ func GetTweetFull(id TweetID) (trove TweetTrove, err error) { - api := NewGuestSession() - tweet_response, err := api.GetTweet(id, "") + tweet_response, err := the_api.GetTweet(id, "") if err != nil { err = fmt.Errorf("Error getting tweet: %d\n %w", id, err) return } if len(tweet_response.GlobalObjects.Tweets) < DEFAULT_MAX_REPLIES_EAGER_LOAD && tweet_response.GetCursor() != "" { - err = api.GetMoreReplies(id, &tweet_response, DEFAULT_MAX_REPLIES_EAGER_LOAD) + err = the_api.GetMoreReplies(id, &tweet_response, DEFAULT_MAX_REPLIES_EAGER_LOAD) if err != nil { err = fmt.Errorf("Error getting more tweet replies: %d\n %w", id, err) return diff --git a/scraper/user.go b/scraper/user.go index 3d4e1dd..0e340f2 100644 --- a/scraper/user.go +++ b/scraper/user.go @@ -156,8 +156,7 @@ func ParseSingleUser(apiUser APIUser) (ret User, err error) { // Calls API#GetUser and returns the parsed result func GetUser(handle UserHandle) (User, error) { - api := NewGuestSession() - apiUser, err := api.GetUser(handle) + apiUser, err := the_api.GetUser(handle) if apiUser.ScreenName == "" { apiUser.ScreenName = string(handle) } diff --git a/scraper/user_feed.go b/scraper/user_feed.go index e92880c..acf1c9c 100644 --- a/scraper/user_feed.go +++ b/scraper/user_feed.go @@ -16,15 +16,14 @@ import ( * returns: a slice of Tweets, Retweets, and Users */ func GetUserFeedFor(user_id UserID, min_tweets int) (trove TweetTrove, err error) { - api := NewGuestSession() - tweet_response, err := api.GetFeedFor(user_id, "") + tweet_response, err := the_api.GetFeedFor(user_id, "") if err != nil { err = fmt.Errorf("Error calling API to fetch user feed: UserID %d\n %w", user_id, err) return } if len(tweet_response.GlobalObjects.Tweets) < min_tweets && tweet_response.GetCursor() != "" { - err = api.GetMoreTweetsFromFeed(user_id, &tweet_response, min_tweets) + err = the_api.GetMoreTweetsFromFeed(user_id, &tweet_response, min_tweets) if err != nil && !errors.Is(err, END_OF_FEED) { return } @@ -34,15 +33,14 @@ func GetUserFeedFor(user_id UserID, min_tweets int) (trove TweetTrove, err error } func GetUserFeedGraphqlFor(user_id UserID, min_tweets int) (trove TweetTrove, err error) { - api := NewGuestSession() - api_response, err := api.GetGraphqlFeedFor(user_id, "") + api_response, err := the_api.GetGraphqlFeedFor(user_id, "") if err != nil { err = fmt.Errorf("Error calling API to fetch user feed: UserID %d\n %w", user_id, err) return } if len(api_response.GetMainInstruction().Entries) < min_tweets && api_response.GetCursorBottom() != "" { - err = api.GetMoreTweetsFromGraphqlFeed(user_id, &api_response, min_tweets) + err = the_api.GetMoreTweetsFromGraphqlFeed(user_id, &api_response, min_tweets) if err != nil && !errors.Is(err, END_OF_FEED) { return }