diff --git a/go.mod b/go.mod index 89a4432..999651a 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,8 @@ module offline_twitter go 1.16 require ( - github.com/go-test/deep v1.0.7 // indirect + github.com/go-test/deep v1.0.7 + github.com/jarcoal/httpmock v1.1.0 github.com/mattn/go-sqlite3 v1.14.7 - gopkg.in/yaml.v2 v2.4.0 // indirect + gopkg.in/yaml.v2 v2.4.0 ) diff --git a/go.sum b/go.sum index 59f45c4..2b13897 100644 --- a/go.sum +++ b/go.sum @@ -1,7 +1,10 @@ github.com/go-test/deep v1.0.7 h1:/VSMRlnY/JSyqxQUzQLKVMAskpY/NZKFA5j2P+0pP2M= github.com/go-test/deep v1.0.7/go.mod h1:QV8Hv/iy04NyLBxAdO9njL0iVPN1S4d/A3NVv1V36o8= +github.com/jarcoal/httpmock v1.1.0 h1:F47ChZj1Y2zFsCXxNkBPwNNKnAyOATcdQibk0qEdVCE= +github.com/jarcoal/httpmock v1.1.0/go.mod h1:ATjnClrvW/3tijVmpL/va5Z3aAyGvqU3gCT8nX0Txik= github.com/mattn/go-sqlite3 v1.14.7 h1:fxWBnXkxfM6sRiuH3bqJ4CfzZojMOLVc0UTsTglEghA= github.com/mattn/go-sqlite3 v1.14.7/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= diff --git a/scraper/user.go b/scraper/user.go index 1d1b225..cf5f0d6 100644 --- a/scraper/user.go +++ b/scraper/user.go @@ -86,7 +86,12 @@ Joined %s * If tweet url is not valid, return an error. */ func ParseHandleFromTweetUrl(tweet_url string) (UserHandle, error) { - r := regexp.MustCompile(`https://twitter.com/(\w+)/status/\d+`) + short_url_regex := regexp.MustCompile(`^https://t.co/\w{5,20}$`) + if short_url_regex.MatchString(tweet_url) { + tweet_url = ExpandShortUrl(tweet_url) + } + + r := regexp.MustCompile(`^https://twitter.com/(\w+)/status/\d+$`) matches := r.FindStringSubmatch(tweet_url) if len(matches) != 2 { // matches[0] is the full string return "", fmt.Errorf("Invalid tweet url: %s", tweet_url) diff --git a/scraper/user_test.go b/scraper/user_test.go index 54a56e9..67cee6f 100644 --- a/scraper/user_test.go +++ b/scraper/user_test.go @@ -1,9 +1,12 @@ package scraper_test import ( + "testing" "encoding/json" "io/ioutil" - "testing" + "net/http" + + "github.com/jarcoal/httpmock" "offline_twitter/scraper" ) @@ -152,3 +155,35 @@ func TestParseHandleFromTweetUrl(t *testing.T) { t.Errorf("Should have produced an error for invalid URL") } } + + +/** + * Should extract a user handle from a shortened tweet URL + */ +func TestParseHandleFromShortenedTweetUrl(t *testing.T) { + short_url := "https://t.co/rZVrNGJyDe" + expanded_url := "https://twitter.com/MarkSnyderJr1/status/1460857606147350529" + + httpmock.Activate() + defer httpmock.DeactivateAndReset() + + httpmock.RegisterResponder("GET", short_url, func(req *http.Request) (*http.Response, error) { + header := http.Header{} + header.Set("Location", expanded_url) + return &http.Response{StatusCode: 301, Header: header}, nil + }) + + // Check the httmock interceptor is working correctly + if scraper.ExpandShortUrl(short_url) != expanded_url { + t.Fatalf("httpmock didn't intercept the request") + } + + result, err := scraper.ParseHandleFromTweetUrl(short_url) + if err != nil { + t.Errorf(err.Error()) + } + expected_user := scraper.UserHandle("MarkSnyderJr1") + if result != expected_user { + t.Errorf("Expected user %q, got %q", expected_user, result) + } +}