Integrate url expander into Tweet URL parser
This commit is contained in:
parent
043a7fd66f
commit
d9f9c9de83
5
go.mod
5
go.mod
@ -3,7 +3,8 @@ module offline_twitter
|
|||||||
go 1.16
|
go 1.16
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/go-test/deep v1.0.7 // indirect
|
github.com/go-test/deep v1.0.7
|
||||||
|
github.com/jarcoal/httpmock v1.1.0
|
||||||
github.com/mattn/go-sqlite3 v1.14.7
|
github.com/mattn/go-sqlite3 v1.14.7
|
||||||
gopkg.in/yaml.v2 v2.4.0 // indirect
|
gopkg.in/yaml.v2 v2.4.0
|
||||||
)
|
)
|
||||||
|
3
go.sum
3
go.sum
@ -1,7 +1,10 @@
|
|||||||
github.com/go-test/deep v1.0.7 h1:/VSMRlnY/JSyqxQUzQLKVMAskpY/NZKFA5j2P+0pP2M=
|
github.com/go-test/deep v1.0.7 h1:/VSMRlnY/JSyqxQUzQLKVMAskpY/NZKFA5j2P+0pP2M=
|
||||||
github.com/go-test/deep v1.0.7/go.mod h1:QV8Hv/iy04NyLBxAdO9njL0iVPN1S4d/A3NVv1V36o8=
|
github.com/go-test/deep v1.0.7/go.mod h1:QV8Hv/iy04NyLBxAdO9njL0iVPN1S4d/A3NVv1V36o8=
|
||||||
|
github.com/jarcoal/httpmock v1.1.0 h1:F47ChZj1Y2zFsCXxNkBPwNNKnAyOATcdQibk0qEdVCE=
|
||||||
|
github.com/jarcoal/httpmock v1.1.0/go.mod h1:ATjnClrvW/3tijVmpL/va5Z3aAyGvqU3gCT8nX0Txik=
|
||||||
github.com/mattn/go-sqlite3 v1.14.7 h1:fxWBnXkxfM6sRiuH3bqJ4CfzZojMOLVc0UTsTglEghA=
|
github.com/mattn/go-sqlite3 v1.14.7 h1:fxWBnXkxfM6sRiuH3bqJ4CfzZojMOLVc0UTsTglEghA=
|
||||||
github.com/mattn/go-sqlite3 v1.14.7/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU=
|
github.com/mattn/go-sqlite3 v1.14.7/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU=
|
||||||
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
||||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||||
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
|
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
|
||||||
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
|
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
|
||||||
|
@ -86,7 +86,12 @@ Joined %s
|
|||||||
* If tweet url is not valid, return an error.
|
* If tweet url is not valid, return an error.
|
||||||
*/
|
*/
|
||||||
func ParseHandleFromTweetUrl(tweet_url string) (UserHandle, error) {
|
func ParseHandleFromTweetUrl(tweet_url string) (UserHandle, error) {
|
||||||
r := regexp.MustCompile(`https://twitter.com/(\w+)/status/\d+`)
|
short_url_regex := regexp.MustCompile(`^https://t.co/\w{5,20}$`)
|
||||||
|
if short_url_regex.MatchString(tweet_url) {
|
||||||
|
tweet_url = ExpandShortUrl(tweet_url)
|
||||||
|
}
|
||||||
|
|
||||||
|
r := regexp.MustCompile(`^https://twitter.com/(\w+)/status/\d+$`)
|
||||||
matches := r.FindStringSubmatch(tweet_url)
|
matches := r.FindStringSubmatch(tweet_url)
|
||||||
if len(matches) != 2 { // matches[0] is the full string
|
if len(matches) != 2 { // matches[0] is the full string
|
||||||
return "", fmt.Errorf("Invalid tweet url: %s", tweet_url)
|
return "", fmt.Errorf("Invalid tweet url: %s", tweet_url)
|
||||||
|
@ -1,9 +1,12 @@
|
|||||||
package scraper_test
|
package scraper_test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"testing"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"testing"
|
"net/http"
|
||||||
|
|
||||||
|
"github.com/jarcoal/httpmock"
|
||||||
|
|
||||||
"offline_twitter/scraper"
|
"offline_twitter/scraper"
|
||||||
)
|
)
|
||||||
@ -152,3 +155,35 @@ func TestParseHandleFromTweetUrl(t *testing.T) {
|
|||||||
t.Errorf("Should have produced an error for invalid URL")
|
t.Errorf("Should have produced an error for invalid URL")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Should extract a user handle from a shortened tweet URL
|
||||||
|
*/
|
||||||
|
func TestParseHandleFromShortenedTweetUrl(t *testing.T) {
|
||||||
|
short_url := "https://t.co/rZVrNGJyDe"
|
||||||
|
expanded_url := "https://twitter.com/MarkSnyderJr1/status/1460857606147350529"
|
||||||
|
|
||||||
|
httpmock.Activate()
|
||||||
|
defer httpmock.DeactivateAndReset()
|
||||||
|
|
||||||
|
httpmock.RegisterResponder("GET", short_url, func(req *http.Request) (*http.Response, error) {
|
||||||
|
header := http.Header{}
|
||||||
|
header.Set("Location", expanded_url)
|
||||||
|
return &http.Response{StatusCode: 301, Header: header}, nil
|
||||||
|
})
|
||||||
|
|
||||||
|
// Check the httmock interceptor is working correctly
|
||||||
|
if scraper.ExpandShortUrl(short_url) != expanded_url {
|
||||||
|
t.Fatalf("httpmock didn't intercept the request")
|
||||||
|
}
|
||||||
|
|
||||||
|
result, err := scraper.ParseHandleFromTweetUrl(short_url)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf(err.Error())
|
||||||
|
}
|
||||||
|
expected_user := scraper.UserHandle("MarkSnyderJr1")
|
||||||
|
if result != expected_user {
|
||||||
|
t.Errorf("Expected user %q, got %q", expected_user, result)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user