Integrate url expander into Tweet URL parser

This commit is contained in:
Alessio 2022-01-08 18:36:23 -05:00
parent 043a7fd66f
commit d9f9c9de83
4 changed files with 48 additions and 4 deletions

5
go.mod
View File

@ -3,7 +3,8 @@ module offline_twitter
go 1.16
require (
github.com/go-test/deep v1.0.7 // indirect
github.com/go-test/deep v1.0.7
github.com/jarcoal/httpmock v1.1.0
github.com/mattn/go-sqlite3 v1.14.7
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v2 v2.4.0
)

3
go.sum
View File

@ -1,7 +1,10 @@
github.com/go-test/deep v1.0.7 h1:/VSMRlnY/JSyqxQUzQLKVMAskpY/NZKFA5j2P+0pP2M=
github.com/go-test/deep v1.0.7/go.mod h1:QV8Hv/iy04NyLBxAdO9njL0iVPN1S4d/A3NVv1V36o8=
github.com/jarcoal/httpmock v1.1.0 h1:F47ChZj1Y2zFsCXxNkBPwNNKnAyOATcdQibk0qEdVCE=
github.com/jarcoal/httpmock v1.1.0/go.mod h1:ATjnClrvW/3tijVmpL/va5Z3aAyGvqU3gCT8nX0Txik=
github.com/mattn/go-sqlite3 v1.14.7 h1:fxWBnXkxfM6sRiuH3bqJ4CfzZojMOLVc0UTsTglEghA=
github.com/mattn/go-sqlite3 v1.14.7/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=

View File

@ -86,7 +86,12 @@ Joined %s
* If tweet url is not valid, return an error.
*/
func ParseHandleFromTweetUrl(tweet_url string) (UserHandle, error) {
r := regexp.MustCompile(`https://twitter.com/(\w+)/status/\d+`)
short_url_regex := regexp.MustCompile(`^https://t.co/\w{5,20}$`)
if short_url_regex.MatchString(tweet_url) {
tweet_url = ExpandShortUrl(tweet_url)
}
r := regexp.MustCompile(`^https://twitter.com/(\w+)/status/\d+$`)
matches := r.FindStringSubmatch(tweet_url)
if len(matches) != 2 { // matches[0] is the full string
return "", fmt.Errorf("Invalid tweet url: %s", tweet_url)

View File

@ -1,9 +1,12 @@
package scraper_test
import (
"testing"
"encoding/json"
"io/ioutil"
"testing"
"net/http"
"github.com/jarcoal/httpmock"
"offline_twitter/scraper"
)
@ -152,3 +155,35 @@ func TestParseHandleFromTweetUrl(t *testing.T) {
t.Errorf("Should have produced an error for invalid URL")
}
}
/**
* Should extract a user handle from a shortened tweet URL
*/
func TestParseHandleFromShortenedTweetUrl(t *testing.T) {
short_url := "https://t.co/rZVrNGJyDe"
expanded_url := "https://twitter.com/MarkSnyderJr1/status/1460857606147350529"
httpmock.Activate()
defer httpmock.DeactivateAndReset()
httpmock.RegisterResponder("GET", short_url, func(req *http.Request) (*http.Response, error) {
header := http.Header{}
header.Set("Location", expanded_url)
return &http.Response{StatusCode: 301, Header: header}, nil
})
// Check the httmock interceptor is working correctly
if scraper.ExpandShortUrl(short_url) != expanded_url {
t.Fatalf("httpmock didn't intercept the request")
}
result, err := scraper.ParseHandleFromTweetUrl(short_url)
if err != nil {
t.Errorf(err.Error())
}
expected_user := scraper.UserHandle("MarkSnyderJr1")
if result != expected_user {
t.Errorf("Expected user %q, got %q", expected_user, result)
}
}