Add Url type

2021-09-17 13:41:43 -07:00 · 2021-09-17 13:41:43 -07:00 · 3f9c807efa
commit 3f9c807efa
parent 06831dfb52
4 changed files with 132 additions and 0 deletions
--- a/scraper/api_types.go
+++ b/scraper/api_types.go
@ -32,6 +32,35 @@ type APIExtendedMedia struct {
 	} `json:"video_info"`
 }
 type APICard struct {
 	BindingValues struct {
 		Domain struct {
 			Value string `json:"string_value"`
 		} `json:"domain"`
 		Creator struct {
 			UserValue struct {
 				Value int64 `json:"id_str,string"`
 			} `json:"user_value"`
 		} `json:"creator"`
 		Site struct {
 			UserValue struct {
 				Value int64 `json:"id_str,string"`
 			} `json:"user_value"`
 		} `json:"site"`
 		Title struct {
 			Value string `json:"string_value"`
 		} `json:"title"`
 		Description struct {
 			Value string `json:"string_value"`
 		} `json:"description"`
 		Thumbnail struct {
 			ImageValue struct {
 				Url string `json:"url"`
 			} `json:"image_value"`
 		} `json:"thumbnail_image_large"`
 	} `json:"binding_values"`
 }
 type APITweet struct {
 	ID                int64  `json:"id_str,string"`
 	ConversationID    int64  `json:"conversation_id_str,string"`
@ -66,6 +95,7 @@ type APITweet struct {
 	QuotedStatusID       int64
 	Time                 time.Time `json:"time"`
 	UserID               int64     `json:"user_id_str,string"`
 	Card                 APICard   `json:"card"`
 }
 func (t *APITweet) NormalizeContent() {
--- a/scraper/test_responses/url_card.json
+++ b/scraper/test_responses/url_card.json
--- a/scraper/url.go
+++ b/scraper/url.go
@ -0,0 +1,47 @@
 package scraper
 import (
 	"fmt"
 	"path"
 	"net/url"
 )
 type Url struct {
 	Domain string
 	Text string
 	Title string
 	Description string
 	ThumbnailRemoteUrl string
 	ThumbnailLocalPath string
 	CreatorID UserID
 	SiteID UserID
 	IsContentDownloaded bool
 }
 func ParseAPIUrlCard(apiCard APICard) Url {
 	values := apiCard.BindingValues
 	return Url{
 		Domain: values.Domain.Value,
 		Title: values.Title.Value,
 		Description: values.Description.Value,
 		ThumbnailRemoteUrl: values.Thumbnail.ImageValue.Url,
 		ThumbnailLocalPath: get_thumbnail_local_path(values.Thumbnail.ImageValue.Url),
 		CreatorID: UserID(values.Creator.UserValue.Value),
 		SiteID: UserID(values.Site.UserValue.Value),
 		IsContentDownloaded: false,
 	}
 }
 func get_thumbnail_local_path(remote_url string) string {
    u, err := url.Parse(remote_url)
    if err != nil {
        panic(err)
    }
    query_params, err := url.ParseQuery(u.RawQuery)
    if err != nil {
        panic(err)
    }
    return fmt.Sprintf("%s_%s.%s", path.Base(u.Path), query_params["name"][0], query_params["format"][0])
 }
--- a/scraper/url_test.go
+++ b/scraper/url_test.go
@ -0,0 +1,54 @@
 package scraper_test
 import (
    "testing"
    "io/ioutil"
    "encoding/json"
    "offline_twitter/scraper"
 )
 func TestParseAPIUrlCard(t *testing.T) {
    data, err := ioutil.ReadFile("test_responses/url_card.json")
    if err != nil {
        panic(err)
    }
    var apiCard scraper.APICard
    err = json.Unmarshal(data, &apiCard)
    if err != nil {
        t.Fatal(err.Error())
    }
    url := scraper.ParseAPIUrlCard(apiCard)
    expected_domain := "reason.com"
    if url.Domain != expected_domain {
        t.Errorf("Expected %q, got %q", expected_domain, url.Domain)
    }
    expected_title := "L.A. Teachers Union Leader: 'There's No Such Thing As Learning Loss'"
    if url.Title != expected_title {
        t.Errorf("Expected %q, got %q", expected_title, url.Title)
    }
    expected_description := "\"It’s OK that our babies may not have learned all their times tables,\" says Cecily Myart-Cruz. \"They learned resilience.\""
    if url.Description != expected_description {
        t.Errorf("Expected %q, got %q", expected_description, url.Description)
    }
    expected_remote_url := "https://pbs.twimg.com/card_img/1434998862305968129/odDi9EqO?format=jpg&name=600x600"
    if url.ThumbnailRemoteUrl != expected_remote_url {
        t.Errorf("Expected %q, got %q", expected_remote_url, url.ThumbnailRemoteUrl)
    }
    expected_local_filename := "odDi9EqO_600x600.jpg"
    if url.ThumbnailLocalPath != expected_local_filename {
        t.Errorf("Expected %q, got %q", expected_local_filename, url.ThumbnailLocalPath)
    }
    expected_creator_id := scraper.UserID(155581583)
    if url.CreatorID != expected_creator_id {
        t.Errorf("Expected %d, got %d", expected_creator_id, url.CreatorID)
    }
    expected_site_id := scraper.UserID(16467567)
    if url.SiteID != expected_site_id {
        t.Errorf("Expected %d, got %d", expected_site_id, url.SiteID)
    }
    if url.IsContentDownloaded {
        t.Errorf("Expected it not to be downloaded, but it was")
    }
 }