Integrate url expander into Tweet URL parser
This commit is contained in:
parent
043a7fd66f
commit
d9f9c9de83
5
go.mod
5
go.mod
@ -3,7 +3,8 @@ module offline_twitter
|
||||
go 1.16
|
||||
|
||||
require (
|
||||
github.com/go-test/deep v1.0.7 // indirect
|
||||
github.com/go-test/deep v1.0.7
|
||||
github.com/jarcoal/httpmock v1.1.0
|
||||
github.com/mattn/go-sqlite3 v1.14.7
|
||||
gopkg.in/yaml.v2 v2.4.0 // indirect
|
||||
gopkg.in/yaml.v2 v2.4.0
|
||||
)
|
||||
|
3
go.sum
3
go.sum
@ -1,7 +1,10 @@
|
||||
github.com/go-test/deep v1.0.7 h1:/VSMRlnY/JSyqxQUzQLKVMAskpY/NZKFA5j2P+0pP2M=
|
||||
github.com/go-test/deep v1.0.7/go.mod h1:QV8Hv/iy04NyLBxAdO9njL0iVPN1S4d/A3NVv1V36o8=
|
||||
github.com/jarcoal/httpmock v1.1.0 h1:F47ChZj1Y2zFsCXxNkBPwNNKnAyOATcdQibk0qEdVCE=
|
||||
github.com/jarcoal/httpmock v1.1.0/go.mod h1:ATjnClrvW/3tijVmpL/va5Z3aAyGvqU3gCT8nX0Txik=
|
||||
github.com/mattn/go-sqlite3 v1.14.7 h1:fxWBnXkxfM6sRiuH3bqJ4CfzZojMOLVc0UTsTglEghA=
|
||||
github.com/mattn/go-sqlite3 v1.14.7/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
|
||||
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
|
||||
|
@ -86,7 +86,12 @@ Joined %s
|
||||
* If tweet url is not valid, return an error.
|
||||
*/
|
||||
func ParseHandleFromTweetUrl(tweet_url string) (UserHandle, error) {
|
||||
r := regexp.MustCompile(`https://twitter.com/(\w+)/status/\d+`)
|
||||
short_url_regex := regexp.MustCompile(`^https://t.co/\w{5,20}$`)
|
||||
if short_url_regex.MatchString(tweet_url) {
|
||||
tweet_url = ExpandShortUrl(tweet_url)
|
||||
}
|
||||
|
||||
r := regexp.MustCompile(`^https://twitter.com/(\w+)/status/\d+$`)
|
||||
matches := r.FindStringSubmatch(tweet_url)
|
||||
if len(matches) != 2 { // matches[0] is the full string
|
||||
return "", fmt.Errorf("Invalid tweet url: %s", tweet_url)
|
||||
|
@ -1,9 +1,12 @@
|
||||
package scraper_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"encoding/json"
|
||||
"io/ioutil"
|
||||
"testing"
|
||||
"net/http"
|
||||
|
||||
"github.com/jarcoal/httpmock"
|
||||
|
||||
"offline_twitter/scraper"
|
||||
)
|
||||
@ -152,3 +155,35 @@ func TestParseHandleFromTweetUrl(t *testing.T) {
|
||||
t.Errorf("Should have produced an error for invalid URL")
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Should extract a user handle from a shortened tweet URL
|
||||
*/
|
||||
func TestParseHandleFromShortenedTweetUrl(t *testing.T) {
|
||||
short_url := "https://t.co/rZVrNGJyDe"
|
||||
expanded_url := "https://twitter.com/MarkSnyderJr1/status/1460857606147350529"
|
||||
|
||||
httpmock.Activate()
|
||||
defer httpmock.DeactivateAndReset()
|
||||
|
||||
httpmock.RegisterResponder("GET", short_url, func(req *http.Request) (*http.Response, error) {
|
||||
header := http.Header{}
|
||||
header.Set("Location", expanded_url)
|
||||
return &http.Response{StatusCode: 301, Header: header}, nil
|
||||
})
|
||||
|
||||
// Check the httmock interceptor is working correctly
|
||||
if scraper.ExpandShortUrl(short_url) != expanded_url {
|
||||
t.Fatalf("httpmock didn't intercept the request")
|
||||
}
|
||||
|
||||
result, err := scraper.ParseHandleFromTweetUrl(short_url)
|
||||
if err != nil {
|
||||
t.Errorf(err.Error())
|
||||
}
|
||||
expected_user := scraper.UserHandle("MarkSnyderJr1")
|
||||
if result != expected_user {
|
||||
t.Errorf("Expected user %q, got %q", expected_user, result)
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user