From dc24ce5ad4734e4a61e9ef06e8e5982e0a755cbc Mon Sep 17 00:00:00 2001 From: Alessio Date: Mon, 10 Jan 2022 11:41:03 -0500 Subject: [PATCH] Fix tweet URL parsing for urls with the stupid '?s=20' etc --- scraper/user.go | 2 +- scraper/user_test.go | 13 +++++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/scraper/user.go b/scraper/user.go index cf5f0d6..8cf37c6 100644 --- a/scraper/user.go +++ b/scraper/user.go @@ -91,7 +91,7 @@ func ParseHandleFromTweetUrl(tweet_url string) (UserHandle, error) { tweet_url = ExpandShortUrl(tweet_url) } - r := regexp.MustCompile(`^https://twitter.com/(\w+)/status/\d+$`) + r := regexp.MustCompile(`^https://twitter.com/(\w+)/status/\d+(?:\?.*)?$`) matches := r.FindStringSubmatch(tweet_url) if len(matches) != 2 { // matches[0] is the full string return "", fmt.Errorf("Invalid tweet url: %s", tweet_url) diff --git a/scraper/user_test.go b/scraper/user_test.go index 67cee6f..f18a62d 100644 --- a/scraper/user_test.go +++ b/scraper/user_test.go @@ -143,14 +143,23 @@ func TestParseHandleFromTweetUrl(t *testing.T) { t.Errorf("Expected handle %q, got %q", expected_user, result) } + // Test url with GET params + result, err = scraper.ParseHandleFromTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720?s=20") + expected_user = scraper.UserHandle("NerdNoticing") + if err != nil { + t.Errorf("Unexpected error while parsing handle: %s", err) + } + if result != expected_user { + t.Errorf("Expected handle %q, got %q", expected_user, result) + } // Test invalid url - _, err = scraper.ParseHandleFromTweetUrl("awjgwekf") + _, err = scraper.ParseHandleFromTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720s=20") if err == nil { t.Errorf("Should have produced an error for invalid URL") } // Test empty string - _, err = scraper.ParseHandleFromTweetUrl("awjgwekf") + _, err = scraper.ParseHandleFromTweetUrl("") if err == nil { t.Errorf("Should have produced an error for invalid URL") }