Add ability to use saved sessions
This commit is contained in:
parent
933ee9a7f9
commit
c09b77f497
@ -300,12 +300,21 @@ tw unfollow cernovich
|
|||||||
test "$(sqlite3 twitter.db "select count(*) from users where is_followed = 1")" = "0"
|
test "$(sqlite3 twitter.db "select count(*) from users where is_followed = 1")" = "0"
|
||||||
|
|
||||||
# Testing login
|
# Testing login
|
||||||
|
# TODO authentication: use an environment var for the password
|
||||||
tw login offline_twatter S1pKIW#eRT016iA@OFcK
|
tw login offline_twatter S1pKIW#eRT016iA@OFcK
|
||||||
test -f Offline_Twatter.session
|
test -f Offline_Twatter.session
|
||||||
test "$(jq .UserHandle Offline_Twatter.session)" = "\"Offline_Twatter\""
|
test "$(jq .UserHandle Offline_Twatter.session)" = "\"Offline_Twatter\""
|
||||||
test "$(jq .IsAuthenticated Offline_Twatter.session)" = "true"
|
test "$(jq .IsAuthenticated Offline_Twatter.session)" = "true"
|
||||||
jq .CSRFToken Offline_Twatter.session | grep -P '"\w+"'
|
jq .CSRFToken Offline_Twatter.session | grep -P '"\w+"'
|
||||||
|
|
||||||
|
# When not logged in, age-restricted tweet should fail to fetch
|
||||||
|
tw fetch_user PandasAndVidya
|
||||||
|
tw fetch_tweet_only https://twitter.com/PandasAndVidya/status/1562714727968428032 || true # This one is expected to fail
|
||||||
|
test "$(sqlite3 twitter.db "select count(*) from tweets where id = 156271472796842803")" == "0"
|
||||||
|
|
||||||
|
# Fetch an age-restricted tweet while logged in
|
||||||
|
tw --session Offline_Twatter fetch_tweet_only https://twitter.com/PandasAndVidya/status/1562714727968428032
|
||||||
|
test "$(sqlite3 twitter.db "select count(*) from tweets where id = 156271472796842803")" == "0"
|
||||||
|
|
||||||
# TODO: Maybe this file should be broken up into multiple test scripts
|
# TODO: Maybe this file should be broken up into multiple test scripts
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
Usage: twitter [--profile <profile_dir>] [<flags>] <operation> <TARGET>
|
Usage: twitter [--profile <profile_dir>] [--session <session_name>] [<flags>] <operation> <TARGET>
|
||||||
This application downloads tweets from twitter and saves them in a SQLite database.
|
This application downloads tweets from twitter and saves them in a SQLite database.
|
||||||
|
|
||||||
<profile_dir>:
|
<profile_dir>:
|
||||||
@ -6,11 +6,29 @@ This application downloads tweets from twitter and saves them in a SQLite databa
|
|||||||
By default, will use the current working directory.
|
By default, will use the current working directory.
|
||||||
Ignored if <operation> is "create_profile".
|
Ignored if <operation> is "create_profile".
|
||||||
|
|
||||||
|
<session_name>:
|
||||||
|
Optional. Only needed when making authenticated requests.
|
||||||
|
If specified, the named session file (this value, appended with ".session" extension) will be used
|
||||||
|
when making API requests.
|
||||||
|
|
||||||
<operation>:
|
<operation>:
|
||||||
create_profile
|
create_profile
|
||||||
<TARGET> is the directory to create. It must not exist already.
|
<TARGET> is the directory to create. It must not exist already.
|
||||||
<profile_dir> will be ignored if provided.
|
<profile_dir> will be ignored if provided.
|
||||||
|
|
||||||
|
login
|
||||||
|
<TARGET> is the username or email to log in as.
|
||||||
|
An optional second target can be provided which will be used as the password; if not, a password
|
||||||
|
prompt (non-echoing) will be given so you can enter the password.
|
||||||
|
On success, the `login` operation will create (or overwrite) a ".session" file with the user
|
||||||
|
handle of the user it corresponds to, which contains session tokens and cookies allowing you to
|
||||||
|
make subsequent requests as that user.
|
||||||
|
For example, `twitter login offline_twatter` will prompt for a password, then attempt to log in as
|
||||||
|
@offline_twatter, and if successful, a file `Offline_Twatter.session` will be created.
|
||||||
|
It may be converted to uppercase (offline_twatter => Offline_Twatter) if that's the canonical
|
||||||
|
capitalization that twitter uses, even though handles are case insensitive.
|
||||||
|
|
||||||
|
|
||||||
fetch_user
|
fetch_user
|
||||||
download_user_content
|
download_user_content
|
||||||
<TARGET> is the user handle.
|
<TARGET> is the user handle.
|
||||||
|
@ -3,9 +3,10 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
|
||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
|
"golang.org/x/term"
|
||||||
|
"os"
|
||||||
|
"syscall"
|
||||||
|
|
||||||
"offline_twitter/persistence"
|
"offline_twitter/persistence"
|
||||||
"offline_twitter/scraper"
|
"offline_twitter/scraper"
|
||||||
@ -28,6 +29,8 @@ func main() {
|
|||||||
show_version_flag := flag.Bool("version", false, "")
|
show_version_flag := flag.Bool("version", false, "")
|
||||||
flag.BoolVar(show_version_flag, "v", false, "")
|
flag.BoolVar(show_version_flag, "v", false, "")
|
||||||
|
|
||||||
|
session_name := flag.String("session", "", "Name of session file to use")
|
||||||
|
|
||||||
how_many := flag.Int("n", 50, "")
|
how_many := flag.Int("n", 50, "")
|
||||||
flag.IntVar(how_many, "number", 50, "")
|
flag.IntVar(how_many, "number", 50, "")
|
||||||
|
|
||||||
@ -86,21 +89,34 @@ func main() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
profile, err = persistence.LoadProfile(*profile_dir)
|
profile, err = persistence.LoadProfile(*profile_dir)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
die(fmt.Sprintf("Could not load profile: %s", err.Error()), true, 2)
|
die(fmt.Sprintf("Could not load profile: %s", err.Error()), true, 2)
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(args) == 3 && args[0] == "login" {
|
if *session_name != "" {
|
||||||
username := args[1]
|
scraper.InitApi(profile.LoadSession(scraper.UserHandle(*session_name)))
|
||||||
password := args[2]
|
// fmt.Printf("Operating as user: @%s\n", scraper.the_api.UserHandle)
|
||||||
|
} else {
|
||||||
login(username, password)
|
scraper.InitApi(scraper.NewGuestSession())
|
||||||
}
|
}
|
||||||
|
|
||||||
switch operation {
|
switch operation {
|
||||||
case "create_profile":
|
case "create_profile":
|
||||||
create_profile(target)
|
create_profile(target)
|
||||||
|
case "login":
|
||||||
|
var password string
|
||||||
|
if len(args) == 2 {
|
||||||
|
fmt.Printf("Password for @%s: ", target)
|
||||||
|
bytes_password, err := term.ReadPassword(int(syscall.Stdin))
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
fmt.Println()
|
||||||
|
password = string(bytes_password)
|
||||||
|
} else {
|
||||||
|
password = args[2]
|
||||||
|
}
|
||||||
|
login(target, password)
|
||||||
case "fetch_user":
|
case "fetch_user":
|
||||||
fetch_user(scraper.UserHandle(target))
|
fetch_user(scraper.UserHandle(target))
|
||||||
case "download_user_content":
|
case "download_user_content":
|
||||||
@ -135,6 +151,7 @@ func main() {
|
|||||||
// - password: twitter account password
|
// - password: twitter account password
|
||||||
|
|
||||||
func login(username string, password string) {
|
func login(username string, password string) {
|
||||||
|
// Skip the scraper.the_api variable, just use a local one since no scraping is happening
|
||||||
api := scraper.NewGuestSession()
|
api := scraper.NewGuestSession()
|
||||||
api.LogIn(username, password)
|
api.LogIn(username, password)
|
||||||
|
|
||||||
|
2
go.mod
2
go.mod
@ -9,6 +9,6 @@ require (
|
|||||||
github.com/mattn/go-sqlite3 v1.14.7
|
github.com/mattn/go-sqlite3 v1.14.7
|
||||||
github.com/sirupsen/logrus v1.8.1
|
github.com/sirupsen/logrus v1.8.1
|
||||||
github.com/stretchr/testify v1.7.0
|
github.com/stretchr/testify v1.7.0
|
||||||
golang.org/x/sys v0.3.0 // indirect
|
golang.org/x/term v0.5.0
|
||||||
gopkg.in/yaml.v2 v2.4.0
|
gopkg.in/yaml.v2 v2.4.0
|
||||||
)
|
)
|
||||||
|
6
go.sum
6
go.sum
@ -23,8 +23,10 @@ github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXf
|
|||||||
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
|
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
|
||||||
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||||
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
golang.org/x/sys v0.3.0 h1:w8ZOecv6NaNa/zC8944JTU3vz4u6Lagfk4RPQxv92NQ=
|
golang.org/x/sys v0.5.0 h1:MUK/U/4lj1t1oPg0HfuXDN/Z1wv31ZJ/YcPiGccS4DU=
|
||||||
golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
golang.org/x/term v0.5.0 h1:n2a8QNdAb0sZNpU9R1ALUXBbY+w51fCQDN+7EdxNBsY=
|
||||||
|
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
|
||||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
||||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||||
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
|
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
|
||||||
|
@ -24,6 +24,18 @@ type API struct {
|
|||||||
CSRFToken string
|
CSRFToken string
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Use a global API variable since it is needed in so many utility functions (e.g.,
|
||||||
|
// tweet_trove.FillSpaceDetails, tweet_trove.FetchTombstoneUsers, etc.); this avoids having
|
||||||
|
// to inject it everywhere.
|
||||||
|
//
|
||||||
|
// Should be set by the caller (main program) depending on the session file used.
|
||||||
|
var the_api API
|
||||||
|
|
||||||
|
// Initializer for the global api variable
|
||||||
|
func InitApi(newApi API) {
|
||||||
|
the_api = newApi
|
||||||
|
}
|
||||||
|
|
||||||
type api_outstruct struct {
|
type api_outstruct struct {
|
||||||
Cookies []*http.Cookie
|
Cookies []*http.Cookie
|
||||||
UserHandle UserHandle
|
UserHandle UserHandle
|
||||||
|
@ -18,14 +18,13 @@ func TimestampToDateString(timestamp int) string {
|
|||||||
* - videos
|
* - videos
|
||||||
*/
|
*/
|
||||||
func Search(query string, min_results int) (trove TweetTrove, err error) {
|
func Search(query string, min_results int) (trove TweetTrove, err error) {
|
||||||
api := NewGuestSession()
|
tweet_response, err := the_api.Search(query, "")
|
||||||
tweet_response, err := api.Search(query, "")
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(tweet_response.GlobalObjects.Tweets) < min_results && tweet_response.GetCursor() != "" {
|
if len(tweet_response.GlobalObjects.Tweets) < min_results && tweet_response.GetCursor() != "" {
|
||||||
err = api.GetMoreTweetsFromSearch(query, &tweet_response, min_results)
|
err = the_api.GetMoreTweetsFromSearch(query, &tweet_response, min_results)
|
||||||
if errors.Is(err, END_OF_FEED) {
|
if errors.Is(err, END_OF_FEED) {
|
||||||
println("End of feed!")
|
println("End of feed!")
|
||||||
} else if err != nil {
|
} else if err != nil {
|
||||||
|
@ -38,8 +38,7 @@ func ParseAPISpace(apiCard APICard) Space {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func FetchSpaceDetail(id SpaceID) (TweetTrove, error) {
|
func FetchSpaceDetail(id SpaceID) (TweetTrove, error) {
|
||||||
api := NewGuestSession()
|
space_response, err := the_api.GetSpace(id)
|
||||||
space_response, err := api.GetSpace(id)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return TweetTrove{}, fmt.Errorf("Error in API call to fetch Space (id %q):\n %w", id, err)
|
return TweetTrove{}, fmt.Errorf("Error in API call to fetch Space (id %q):\n %w", id, err)
|
||||||
}
|
}
|
||||||
|
@ -212,8 +212,7 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
|
|||||||
* returns: the single Tweet
|
* returns: the single Tweet
|
||||||
*/
|
*/
|
||||||
func GetTweet(id TweetID) (Tweet, error) {
|
func GetTweet(id TweetID) (Tweet, error) {
|
||||||
api := NewGuestSession()
|
tweet_response, err := the_api.GetTweet(id, "")
|
||||||
tweet_response, err := api.GetTweet(id, "")
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return Tweet{}, fmt.Errorf("Error in API call:\n %w", err)
|
return Tweet{}, fmt.Errorf("Error in API call:\n %w", err)
|
||||||
}
|
}
|
||||||
@ -240,15 +239,14 @@ func GetTweet(id TweetID) (Tweet, error) {
|
|||||||
* returns: the tweet, list of its replies and context, and users associated with those replies
|
* returns: the tweet, list of its replies and context, and users associated with those replies
|
||||||
*/
|
*/
|
||||||
func GetTweetFull(id TweetID) (trove TweetTrove, err error) {
|
func GetTweetFull(id TweetID) (trove TweetTrove, err error) {
|
||||||
api := NewGuestSession()
|
tweet_response, err := the_api.GetTweet(id, "")
|
||||||
tweet_response, err := api.GetTweet(id, "")
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
err = fmt.Errorf("Error getting tweet: %d\n %w", id, err)
|
err = fmt.Errorf("Error getting tweet: %d\n %w", id, err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if len(tweet_response.GlobalObjects.Tweets) < DEFAULT_MAX_REPLIES_EAGER_LOAD &&
|
if len(tweet_response.GlobalObjects.Tweets) < DEFAULT_MAX_REPLIES_EAGER_LOAD &&
|
||||||
tweet_response.GetCursor() != "" {
|
tweet_response.GetCursor() != "" {
|
||||||
err = api.GetMoreReplies(id, &tweet_response, DEFAULT_MAX_REPLIES_EAGER_LOAD)
|
err = the_api.GetMoreReplies(id, &tweet_response, DEFAULT_MAX_REPLIES_EAGER_LOAD)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
err = fmt.Errorf("Error getting more tweet replies: %d\n %w", id, err)
|
err = fmt.Errorf("Error getting more tweet replies: %d\n %w", id, err)
|
||||||
return
|
return
|
||||||
|
@ -156,8 +156,7 @@ func ParseSingleUser(apiUser APIUser) (ret User, err error) {
|
|||||||
|
|
||||||
// Calls API#GetUser and returns the parsed result
|
// Calls API#GetUser and returns the parsed result
|
||||||
func GetUser(handle UserHandle) (User, error) {
|
func GetUser(handle UserHandle) (User, error) {
|
||||||
api := NewGuestSession()
|
apiUser, err := the_api.GetUser(handle)
|
||||||
apiUser, err := api.GetUser(handle)
|
|
||||||
if apiUser.ScreenName == "" {
|
if apiUser.ScreenName == "" {
|
||||||
apiUser.ScreenName = string(handle)
|
apiUser.ScreenName = string(handle)
|
||||||
}
|
}
|
||||||
|
@ -16,15 +16,14 @@ import (
|
|||||||
* returns: a slice of Tweets, Retweets, and Users
|
* returns: a slice of Tweets, Retweets, and Users
|
||||||
*/
|
*/
|
||||||
func GetUserFeedFor(user_id UserID, min_tweets int) (trove TweetTrove, err error) {
|
func GetUserFeedFor(user_id UserID, min_tweets int) (trove TweetTrove, err error) {
|
||||||
api := NewGuestSession()
|
tweet_response, err := the_api.GetFeedFor(user_id, "")
|
||||||
tweet_response, err := api.GetFeedFor(user_id, "")
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
err = fmt.Errorf("Error calling API to fetch user feed: UserID %d\n %w", user_id, err)
|
err = fmt.Errorf("Error calling API to fetch user feed: UserID %d\n %w", user_id, err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(tweet_response.GlobalObjects.Tweets) < min_tweets && tweet_response.GetCursor() != "" {
|
if len(tweet_response.GlobalObjects.Tweets) < min_tweets && tweet_response.GetCursor() != "" {
|
||||||
err = api.GetMoreTweetsFromFeed(user_id, &tweet_response, min_tweets)
|
err = the_api.GetMoreTweetsFromFeed(user_id, &tweet_response, min_tweets)
|
||||||
if err != nil && !errors.Is(err, END_OF_FEED) {
|
if err != nil && !errors.Is(err, END_OF_FEED) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@ -34,15 +33,14 @@ func GetUserFeedFor(user_id UserID, min_tweets int) (trove TweetTrove, err error
|
|||||||
}
|
}
|
||||||
|
|
||||||
func GetUserFeedGraphqlFor(user_id UserID, min_tweets int) (trove TweetTrove, err error) {
|
func GetUserFeedGraphqlFor(user_id UserID, min_tweets int) (trove TweetTrove, err error) {
|
||||||
api := NewGuestSession()
|
api_response, err := the_api.GetGraphqlFeedFor(user_id, "")
|
||||||
api_response, err := api.GetGraphqlFeedFor(user_id, "")
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
err = fmt.Errorf("Error calling API to fetch user feed: UserID %d\n %w", user_id, err)
|
err = fmt.Errorf("Error calling API to fetch user feed: UserID %d\n %w", user_id, err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(api_response.GetMainInstruction().Entries) < min_tweets && api_response.GetCursorBottom() != "" {
|
if len(api_response.GetMainInstruction().Entries) < min_tweets && api_response.GetCursorBottom() != "" {
|
||||||
err = api.GetMoreTweetsFromGraphqlFeed(user_id, &api_response, min_tweets)
|
err = the_api.GetMoreTweetsFromGraphqlFeed(user_id, &api_response, min_tweets)
|
||||||
if err != nil && !errors.Is(err, END_OF_FEED) {
|
if err != nil && !errors.Is(err, END_OF_FEED) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user