Fix tweet URL parsing for urls with the stupid '?s=20' etc

This commit is contained in:
Alessio 2022-01-10 11:41:03 -05:00
parent 25fddaef9d
commit dc24ce5ad4
2 changed files with 12 additions and 3 deletions

View File

@ -91,7 +91,7 @@ func ParseHandleFromTweetUrl(tweet_url string) (UserHandle, error) {
tweet_url = ExpandShortUrl(tweet_url) tweet_url = ExpandShortUrl(tweet_url)
} }
r := regexp.MustCompile(`^https://twitter.com/(\w+)/status/\d+$`) r := regexp.MustCompile(`^https://twitter.com/(\w+)/status/\d+(?:\?.*)?$`)
matches := r.FindStringSubmatch(tweet_url) matches := r.FindStringSubmatch(tweet_url)
if len(matches) != 2 { // matches[0] is the full string if len(matches) != 2 { // matches[0] is the full string
return "", fmt.Errorf("Invalid tweet url: %s", tweet_url) return "", fmt.Errorf("Invalid tweet url: %s", tweet_url)

View File

@ -143,14 +143,23 @@ func TestParseHandleFromTweetUrl(t *testing.T) {
t.Errorf("Expected handle %q, got %q", expected_user, result) t.Errorf("Expected handle %q, got %q", expected_user, result)
} }
// Test url with GET params
result, err = scraper.ParseHandleFromTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720?s=20")
expected_user = scraper.UserHandle("NerdNoticing")
if err != nil {
t.Errorf("Unexpected error while parsing handle: %s", err)
}
if result != expected_user {
t.Errorf("Expected handle %q, got %q", expected_user, result)
}
// Test invalid url // Test invalid url
_, err = scraper.ParseHandleFromTweetUrl("awjgwekf") _, err = scraper.ParseHandleFromTweetUrl("https://twitter.com/NerdNoticing/status/1263192389050654720s=20")
if err == nil { if err == nil {
t.Errorf("Should have produced an error for invalid URL") t.Errorf("Should have produced an error for invalid URL")
} }
// Test empty string // Test empty string
_, err = scraper.ParseHandleFromTweetUrl("awjgwekf") _, err = scraper.ParseHandleFromTweetUrl("")
if err == nil { if err == nil {
t.Errorf("Should have produced an error for invalid URL") t.Errorf("Should have produced an error for invalid URL")
} }