Fix tweet URL parsing for urls with the stupid '?s=20' etc
This commit is contained in:
parent
25fddaef9d
commit
dc24ce5ad4
@ -91,7 +91,7 @@ func ParseHandleFromTweetUrl(tweet_url string) (UserHandle, error) {
|
||||
tweet_url = ExpandShortUrl(tweet_url)
|
||||
}
|
||||
|
||||
r := regexp.MustCompile(`^https://twitter.com/(\w+)/status/\d+$`)
|
||||
r := regexp.MustCompile(`^https://twitter.com/(\w+)/status/\d+(?:\?.*)?$`)
|
||||
matches := r.FindStringSubmatch(tweet_url)
|
||||
if len(matches) != 2 { // matches[0] is the full string
|
||||
return "", fmt.Errorf("Invalid tweet url: %s", tweet_url)
|
||||
|
@ -143,14 +143,23 @@ func TestParseHandleFromTweetUrl(t *testing.T) {
|
||||
t.Errorf("Expected handle %q, got %q", expected_user, result)
|
||||
}
|
||||
|
||||
// Test url with GET params
|
||||
result, err = scraper.ParseHandleFromTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720?s=20")
|
||||
expected_user = scraper.UserHandle("NerdNoticing")
|
||||
if err != nil {
|
||||
t.Errorf("Unexpected error while parsing handle: %s", err)
|
||||
}
|
||||
if result != expected_user {
|
||||
t.Errorf("Expected handle %q, got %q", expected_user, result)
|
||||
}
|
||||
// Test invalid url
|
||||
_, err = scraper.ParseHandleFromTweetUrl("awjgwekf")
|
||||
_, err = scraper.ParseHandleFromTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720s=20")
|
||||
if err == nil {
|
||||
t.Errorf("Should have produced an error for invalid URL")
|
||||
}
|
||||
|
||||
// Test empty string
|
||||
_, err = scraper.ParseHandleFromTweetUrl("awjgwekf")
|
||||
_, err = scraper.ParseHandleFromTweetUrl("")
|
||||
if err == nil {
|
||||
t.Errorf("Should have produced an error for invalid URL")
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user