From 043a7fd66f8e3506127e4eed4b90638f8a4841a3 Mon Sep 17 00:00:00 2001 From: Alessio Date: Sat, 8 Jan 2022 18:25:26 -0500 Subject: [PATCH] Add link expander --- scraper/link_expander.go | 35 +++++++++++++++++++++++++++++++++++ scraper/link_expander_test.go | 25 +++++++++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 scraper/link_expander.go create mode 100644 scraper/link_expander_test.go diff --git a/scraper/link_expander.go b/scraper/link_expander.go new file mode 100644 index 0000000..f892288 --- /dev/null +++ b/scraper/link_expander.go @@ -0,0 +1,35 @@ +package scraper + +import ( + "fmt" + "time" + "net/http" +) + + +/** + * Return the expanded version of a short URL. Input must be a real short URL. + */ +func ExpandShortUrl(short_url string) string { + // Create a client that doesn't follow redirects + client := &http.Client{ + Timeout: 5 * time.Second, + CheckRedirect: func(req *http.Request, via []*http.Request) error { + return http.ErrUseLastResponse + }, + } + + resp, err := client.Get(short_url) + if err != nil { + panic(err) // TODO: handle timeouts + } + if resp.StatusCode != 301 { + panic(fmt.Sprintf("Unknown status code returned when expanding short url %q: %s", short_url, resp.Status)) + } + + long_url := resp.Header.Get("Location") + if long_url == "" { + panic(fmt.Sprintf("Header didn't have a Location field for short url %q", short_url)) + } + return long_url +} diff --git a/scraper/link_expander_test.go b/scraper/link_expander_test.go new file mode 100644 index 0000000..ffdc1c0 --- /dev/null +++ b/scraper/link_expander_test.go @@ -0,0 +1,25 @@ +package scraper_test + +import ( + "testing" + + "net/http" + "net/http/httptest" + + "offline_twitter/scraper" +) + + +func TestExpandShortUrl(t *testing.T) { + redirecting_to := "redirect target" + srvr := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { + w.Header().Set("Location", redirecting_to) + w.WriteHeader(301) + })) + defer srvr.Close() + + result := scraper.ExpandShortUrl(srvr.URL) + if result != redirecting_to { + t.Errorf("Expected %q, got %q", redirecting_to, result) + } +}