Add ability to use saved sessions

This commit is contained in:
Alessio 2023-02-17 13:07:12 -05:00
parent 933ee9a7f9
commit c09b77f497
11 changed files with 81 additions and 30 deletions

View File

@ -300,12 +300,21 @@ tw unfollow cernovich
test "$(sqlite3 twitter.db "select count(*) from users where is_followed = 1")" = "0" test "$(sqlite3 twitter.db "select count(*) from users where is_followed = 1")" = "0"
# Testing login # Testing login
# TODO authentication: use an environment var for the password
tw login offline_twatter S1pKIW#eRT016iA@OFcK tw login offline_twatter S1pKIW#eRT016iA@OFcK
test -f Offline_Twatter.session test -f Offline_Twatter.session
test "$(jq .UserHandle Offline_Twatter.session)" = "\"Offline_Twatter\"" test "$(jq .UserHandle Offline_Twatter.session)" = "\"Offline_Twatter\""
test "$(jq .IsAuthenticated Offline_Twatter.session)" = "true" test "$(jq .IsAuthenticated Offline_Twatter.session)" = "true"
jq .CSRFToken Offline_Twatter.session | grep -P '"\w+"' jq .CSRFToken Offline_Twatter.session | grep -P '"\w+"'
# When not logged in, age-restricted tweet should fail to fetch
tw fetch_user PandasAndVidya
tw fetch_tweet_only https://twitter.com/PandasAndVidya/status/1562714727968428032 || true # This one is expected to fail
test "$(sqlite3 twitter.db "select count(*) from tweets where id = 156271472796842803")" == "0"
# Fetch an age-restricted tweet while logged in
tw --session Offline_Twatter fetch_tweet_only https://twitter.com/PandasAndVidya/status/1562714727968428032
test "$(sqlite3 twitter.db "select count(*) from tweets where id = 156271472796842803")" == "0"
# TODO: Maybe this file should be broken up into multiple test scripts # TODO: Maybe this file should be broken up into multiple test scripts

View File

@ -1,4 +1,4 @@
Usage: twitter [--profile <profile_dir>] [<flags>] <operation> <TARGET> Usage: twitter [--profile <profile_dir>] [--session <session_name>] [<flags>] <operation> <TARGET>
This application downloads tweets from twitter and saves them in a SQLite database. This application downloads tweets from twitter and saves them in a SQLite database.
<profile_dir>: <profile_dir>:
@ -6,11 +6,29 @@ This application downloads tweets from twitter and saves them in a SQLite databa
By default, will use the current working directory. By default, will use the current working directory.
Ignored if <operation> is "create_profile". Ignored if <operation> is "create_profile".
<session_name>:
Optional. Only needed when making authenticated requests.
If specified, the named session file (this value, appended with ".session" extension) will be used
when making API requests.
<operation>: <operation>:
create_profile create_profile
<TARGET> is the directory to create. It must not exist already. <TARGET> is the directory to create. It must not exist already.
<profile_dir> will be ignored if provided. <profile_dir> will be ignored if provided.
login
<TARGET> is the username or email to log in as.
An optional second target can be provided which will be used as the password; if not, a password
prompt (non-echoing) will be given so you can enter the password.
On success, the `login` operation will create (or overwrite) a ".session" file with the user
handle of the user it corresponds to, which contains session tokens and cookies allowing you to
make subsequent requests as that user.
For example, `twitter login offline_twatter` will prompt for a password, then attempt to log in as
@offline_twatter, and if successful, a file `Offline_Twatter.session` will be created.
It may be converted to uppercase (offline_twatter => Offline_Twatter) if that's the canonical
capitalization that twitter uses, even though handles are case insensitive.
fetch_user fetch_user
download_user_content download_user_content
<TARGET> is the user handle. <TARGET> is the user handle.

View File

@ -3,9 +3,10 @@ package main
import ( import (
"flag" "flag"
"fmt" "fmt"
"os"
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
"golang.org/x/term"
"os"
"syscall"
"offline_twitter/persistence" "offline_twitter/persistence"
"offline_twitter/scraper" "offline_twitter/scraper"
@ -28,6 +29,8 @@ func main() {
show_version_flag := flag.Bool("version", false, "") show_version_flag := flag.Bool("version", false, "")
flag.BoolVar(show_version_flag, "v", false, "") flag.BoolVar(show_version_flag, "v", false, "")
session_name := flag.String("session", "", "Name of session file to use")
how_many := flag.Int("n", 50, "") how_many := flag.Int("n", 50, "")
flag.IntVar(how_many, "number", 50, "") flag.IntVar(how_many, "number", 50, "")
@ -86,21 +89,34 @@ func main() {
} }
profile, err = persistence.LoadProfile(*profile_dir) profile, err = persistence.LoadProfile(*profile_dir)
if err != nil { if err != nil {
die(fmt.Sprintf("Could not load profile: %s", err.Error()), true, 2) die(fmt.Sprintf("Could not load profile: %s", err.Error()), true, 2)
} }
if len(args) == 3 && args[0] == "login" { if *session_name != "" {
username := args[1] scraper.InitApi(profile.LoadSession(scraper.UserHandle(*session_name)))
password := args[2] // fmt.Printf("Operating as user: @%s\n", scraper.the_api.UserHandle)
} else {
login(username, password) scraper.InitApi(scraper.NewGuestSession())
} }
switch operation { switch operation {
case "create_profile": case "create_profile":
create_profile(target) create_profile(target)
case "login":
var password string
if len(args) == 2 {
fmt.Printf("Password for @%s: ", target)
bytes_password, err := term.ReadPassword(int(syscall.Stdin))
if err != nil {
panic(err)
}
fmt.Println()
password = string(bytes_password)
} else {
password = args[2]
}
login(target, password)
case "fetch_user": case "fetch_user":
fetch_user(scraper.UserHandle(target)) fetch_user(scraper.UserHandle(target))
case "download_user_content": case "download_user_content":
@ -135,6 +151,7 @@ func main() {
// - password: twitter account password // - password: twitter account password
func login(username string, password string) { func login(username string, password string) {
// Skip the scraper.the_api variable, just use a local one since no scraping is happening
api := scraper.NewGuestSession() api := scraper.NewGuestSession()
api.LogIn(username, password) api.LogIn(username, password)

2
go.mod
View File

@ -9,6 +9,6 @@ require (
github.com/mattn/go-sqlite3 v1.14.7 github.com/mattn/go-sqlite3 v1.14.7
github.com/sirupsen/logrus v1.8.1 github.com/sirupsen/logrus v1.8.1
github.com/stretchr/testify v1.7.0 github.com/stretchr/testify v1.7.0
golang.org/x/sys v0.3.0 // indirect golang.org/x/term v0.5.0
gopkg.in/yaml.v2 v2.4.0 gopkg.in/yaml.v2 v2.4.0
) )

6
go.sum
View File

@ -23,8 +23,10 @@ github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXf
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.3.0 h1:w8ZOecv6NaNa/zC8944JTU3vz4u6Lagfk4RPQxv92NQ= golang.org/x/sys v0.5.0 h1:MUK/U/4lj1t1oPg0HfuXDN/Z1wv31ZJ/YcPiGccS4DU=
golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.5.0 h1:n2a8QNdAb0sZNpU9R1ALUXBbY+w51fCQDN+7EdxNBsY=
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=

View File

@ -24,6 +24,18 @@ type API struct {
CSRFToken string CSRFToken string
} }
// Use a global API variable since it is needed in so many utility functions (e.g.,
// tweet_trove.FillSpaceDetails, tweet_trove.FetchTombstoneUsers, etc.); this avoids having
// to inject it everywhere.
//
// Should be set by the caller (main program) depending on the session file used.
var the_api API
// Initializer for the global api variable
func InitApi(newApi API) {
the_api = newApi
}
type api_outstruct struct { type api_outstruct struct {
Cookies []*http.Cookie Cookies []*http.Cookie
UserHandle UserHandle UserHandle UserHandle

View File

@ -18,14 +18,13 @@ func TimestampToDateString(timestamp int) string {
* - videos * - videos
*/ */
func Search(query string, min_results int) (trove TweetTrove, err error) { func Search(query string, min_results int) (trove TweetTrove, err error) {
api := NewGuestSession() tweet_response, err := the_api.Search(query, "")
tweet_response, err := api.Search(query, "")
if err != nil { if err != nil {
return return
} }
if len(tweet_response.GlobalObjects.Tweets) < min_results && tweet_response.GetCursor() != "" { if len(tweet_response.GlobalObjects.Tweets) < min_results && tweet_response.GetCursor() != "" {
err = api.GetMoreTweetsFromSearch(query, &tweet_response, min_results) err = the_api.GetMoreTweetsFromSearch(query, &tweet_response, min_results)
if errors.Is(err, END_OF_FEED) { if errors.Is(err, END_OF_FEED) {
println("End of feed!") println("End of feed!")
} else if err != nil { } else if err != nil {

View File

@ -38,8 +38,7 @@ func ParseAPISpace(apiCard APICard) Space {
} }
func FetchSpaceDetail(id SpaceID) (TweetTrove, error) { func FetchSpaceDetail(id SpaceID) (TweetTrove, error) {
api := NewGuestSession() space_response, err := the_api.GetSpace(id)
space_response, err := api.GetSpace(id)
if err != nil { if err != nil {
return TweetTrove{}, fmt.Errorf("Error in API call to fetch Space (id %q):\n %w", id, err) return TweetTrove{}, fmt.Errorf("Error in API call to fetch Space (id %q):\n %w", id, err)
} }

View File

@ -212,8 +212,7 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
* returns: the single Tweet * returns: the single Tweet
*/ */
func GetTweet(id TweetID) (Tweet, error) { func GetTweet(id TweetID) (Tweet, error) {
api := NewGuestSession() tweet_response, err := the_api.GetTweet(id, "")
tweet_response, err := api.GetTweet(id, "")
if err != nil { if err != nil {
return Tweet{}, fmt.Errorf("Error in API call:\n %w", err) return Tweet{}, fmt.Errorf("Error in API call:\n %w", err)
} }
@ -240,15 +239,14 @@ func GetTweet(id TweetID) (Tweet, error) {
* returns: the tweet, list of its replies and context, and users associated with those replies * returns: the tweet, list of its replies and context, and users associated with those replies
*/ */
func GetTweetFull(id TweetID) (trove TweetTrove, err error) { func GetTweetFull(id TweetID) (trove TweetTrove, err error) {
api := NewGuestSession() tweet_response, err := the_api.GetTweet(id, "")
tweet_response, err := api.GetTweet(id, "")
if err != nil { if err != nil {
err = fmt.Errorf("Error getting tweet: %d\n %w", id, err) err = fmt.Errorf("Error getting tweet: %d\n %w", id, err)
return return
} }
if len(tweet_response.GlobalObjects.Tweets) < DEFAULT_MAX_REPLIES_EAGER_LOAD && if len(tweet_response.GlobalObjects.Tweets) < DEFAULT_MAX_REPLIES_EAGER_LOAD &&
tweet_response.GetCursor() != "" { tweet_response.GetCursor() != "" {
err = api.GetMoreReplies(id, &tweet_response, DEFAULT_MAX_REPLIES_EAGER_LOAD) err = the_api.GetMoreReplies(id, &tweet_response, DEFAULT_MAX_REPLIES_EAGER_LOAD)
if err != nil { if err != nil {
err = fmt.Errorf("Error getting more tweet replies: %d\n %w", id, err) err = fmt.Errorf("Error getting more tweet replies: %d\n %w", id, err)
return return

View File

@ -156,8 +156,7 @@ func ParseSingleUser(apiUser APIUser) (ret User, err error) {
// Calls API#GetUser and returns the parsed result // Calls API#GetUser and returns the parsed result
func GetUser(handle UserHandle) (User, error) { func GetUser(handle UserHandle) (User, error) {
api := NewGuestSession() apiUser, err := the_api.GetUser(handle)
apiUser, err := api.GetUser(handle)
if apiUser.ScreenName == "" { if apiUser.ScreenName == "" {
apiUser.ScreenName = string(handle) apiUser.ScreenName = string(handle)
} }

View File

@ -16,15 +16,14 @@ import (
* returns: a slice of Tweets, Retweets, and Users * returns: a slice of Tweets, Retweets, and Users
*/ */
func GetUserFeedFor(user_id UserID, min_tweets int) (trove TweetTrove, err error) { func GetUserFeedFor(user_id UserID, min_tweets int) (trove TweetTrove, err error) {
api := NewGuestSession() tweet_response, err := the_api.GetFeedFor(user_id, "")
tweet_response, err := api.GetFeedFor(user_id, "")
if err != nil { if err != nil {
err = fmt.Errorf("Error calling API to fetch user feed: UserID %d\n %w", user_id, err) err = fmt.Errorf("Error calling API to fetch user feed: UserID %d\n %w", user_id, err)
return return
} }
if len(tweet_response.GlobalObjects.Tweets) < min_tweets && tweet_response.GetCursor() != "" { if len(tweet_response.GlobalObjects.Tweets) < min_tweets && tweet_response.GetCursor() != "" {
err = api.GetMoreTweetsFromFeed(user_id, &tweet_response, min_tweets) err = the_api.GetMoreTweetsFromFeed(user_id, &tweet_response, min_tweets)
if err != nil && !errors.Is(err, END_OF_FEED) { if err != nil && !errors.Is(err, END_OF_FEED) {
return return
} }
@ -34,15 +33,14 @@ func GetUserFeedFor(user_id UserID, min_tweets int) (trove TweetTrove, err error
} }
func GetUserFeedGraphqlFor(user_id UserID, min_tweets int) (trove TweetTrove, err error) { func GetUserFeedGraphqlFor(user_id UserID, min_tweets int) (trove TweetTrove, err error) {
api := NewGuestSession() api_response, err := the_api.GetGraphqlFeedFor(user_id, "")
api_response, err := api.GetGraphqlFeedFor(user_id, "")
if err != nil { if err != nil {
err = fmt.Errorf("Error calling API to fetch user feed: UserID %d\n %w", user_id, err) err = fmt.Errorf("Error calling API to fetch user feed: UserID %d\n %w", user_id, err)
return return
} }
if len(api_response.GetMainInstruction().Entries) < min_tweets && api_response.GetCursorBottom() != "" { if len(api_response.GetMainInstruction().Entries) < min_tweets && api_response.GetCursorBottom() != "" {
err = api.GetMoreTweetsFromGraphqlFeed(user_id, &api_response, min_tweets) err = the_api.GetMoreTweetsFromGraphqlFeed(user_id, &api_response, min_tweets)
if err != nil && !errors.Is(err, END_OF_FEED) { if err != nil && !errors.Is(err, END_OF_FEED) {
return return
} }