Add Url type
This commit is contained in:
parent
06831dfb52
commit
3f9c807efa
@ -32,6 +32,35 @@ type APIExtendedMedia struct {
|
||||
} `json:"video_info"`
|
||||
}
|
||||
|
||||
type APICard struct {
|
||||
BindingValues struct {
|
||||
Domain struct {
|
||||
Value string `json:"string_value"`
|
||||
} `json:"domain"`
|
||||
Creator struct {
|
||||
UserValue struct {
|
||||
Value int64 `json:"id_str,string"`
|
||||
} `json:"user_value"`
|
||||
} `json:"creator"`
|
||||
Site struct {
|
||||
UserValue struct {
|
||||
Value int64 `json:"id_str,string"`
|
||||
} `json:"user_value"`
|
||||
} `json:"site"`
|
||||
Title struct {
|
||||
Value string `json:"string_value"`
|
||||
} `json:"title"`
|
||||
Description struct {
|
||||
Value string `json:"string_value"`
|
||||
} `json:"description"`
|
||||
Thumbnail struct {
|
||||
ImageValue struct {
|
||||
Url string `json:"url"`
|
||||
} `json:"image_value"`
|
||||
} `json:"thumbnail_image_large"`
|
||||
} `json:"binding_values"`
|
||||
}
|
||||
|
||||
type APITweet struct {
|
||||
ID int64 `json:"id_str,string"`
|
||||
ConversationID int64 `json:"conversation_id_str,string"`
|
||||
@ -66,6 +95,7 @@ type APITweet struct {
|
||||
QuotedStatusID int64
|
||||
Time time.Time `json:"time"`
|
||||
UserID int64 `json:"user_id_str,string"`
|
||||
Card APICard `json:"card"`
|
||||
}
|
||||
|
||||
func (t *APITweet) NormalizeContent() {
|
||||
|
1
scraper/test_responses/url_card.json
Normal file
1
scraper/test_responses/url_card.json
Normal file
File diff suppressed because one or more lines are too long
47
scraper/url.go
Normal file
47
scraper/url.go
Normal file
@ -0,0 +1,47 @@
|
||||
package scraper
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path"
|
||||
"net/url"
|
||||
)
|
||||
|
||||
type Url struct {
|
||||
Domain string
|
||||
Text string
|
||||
Title string
|
||||
Description string
|
||||
ThumbnailRemoteUrl string
|
||||
ThumbnailLocalPath string
|
||||
CreatorID UserID
|
||||
SiteID UserID
|
||||
|
||||
IsContentDownloaded bool
|
||||
}
|
||||
|
||||
func ParseAPIUrlCard(apiCard APICard) Url {
|
||||
values := apiCard.BindingValues
|
||||
return Url{
|
||||
Domain: values.Domain.Value,
|
||||
Title: values.Title.Value,
|
||||
Description: values.Description.Value,
|
||||
ThumbnailRemoteUrl: values.Thumbnail.ImageValue.Url,
|
||||
ThumbnailLocalPath: get_thumbnail_local_path(values.Thumbnail.ImageValue.Url),
|
||||
CreatorID: UserID(values.Creator.UserValue.Value),
|
||||
SiteID: UserID(values.Site.UserValue.Value),
|
||||
IsContentDownloaded: false,
|
||||
}
|
||||
}
|
||||
|
||||
func get_thumbnail_local_path(remote_url string) string {
|
||||
u, err := url.Parse(remote_url)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
query_params, err := url.ParseQuery(u.RawQuery)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
return fmt.Sprintf("%s_%s.%s", path.Base(u.Path), query_params["name"][0], query_params["format"][0])
|
||||
}
|
54
scraper/url_test.go
Normal file
54
scraper/url_test.go
Normal file
@ -0,0 +1,54 @@
|
||||
package scraper_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"io/ioutil"
|
||||
"encoding/json"
|
||||
|
||||
"offline_twitter/scraper"
|
||||
)
|
||||
|
||||
func TestParseAPIUrlCard(t *testing.T) {
|
||||
data, err := ioutil.ReadFile("test_responses/url_card.json")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
var apiCard scraper.APICard
|
||||
err = json.Unmarshal(data, &apiCard)
|
||||
if err != nil {
|
||||
t.Fatal(err.Error())
|
||||
}
|
||||
url := scraper.ParseAPIUrlCard(apiCard)
|
||||
|
||||
expected_domain := "reason.com"
|
||||
if url.Domain != expected_domain {
|
||||
t.Errorf("Expected %q, got %q", expected_domain, url.Domain)
|
||||
}
|
||||
expected_title := "L.A. Teachers Union Leader: 'There's No Such Thing As Learning Loss'"
|
||||
if url.Title != expected_title {
|
||||
t.Errorf("Expected %q, got %q", expected_title, url.Title)
|
||||
}
|
||||
expected_description := "\"It’s OK that our babies may not have learned all their times tables,\" says Cecily Myart-Cruz. \"They learned resilience.\""
|
||||
if url.Description != expected_description {
|
||||
t.Errorf("Expected %q, got %q", expected_description, url.Description)
|
||||
}
|
||||
expected_remote_url := "https://pbs.twimg.com/card_img/1434998862305968129/odDi9EqO?format=jpg&name=600x600"
|
||||
if url.ThumbnailRemoteUrl != expected_remote_url {
|
||||
t.Errorf("Expected %q, got %q", expected_remote_url, url.ThumbnailRemoteUrl)
|
||||
}
|
||||
expected_local_filename := "odDi9EqO_600x600.jpg"
|
||||
if url.ThumbnailLocalPath != expected_local_filename {
|
||||
t.Errorf("Expected %q, got %q", expected_local_filename, url.ThumbnailLocalPath)
|
||||
}
|
||||
expected_creator_id := scraper.UserID(155581583)
|
||||
if url.CreatorID != expected_creator_id {
|
||||
t.Errorf("Expected %d, got %d", expected_creator_id, url.CreatorID)
|
||||
}
|
||||
expected_site_id := scraper.UserID(16467567)
|
||||
if url.SiteID != expected_site_id {
|
||||
t.Errorf("Expected %d, got %d", expected_site_id, url.SiteID)
|
||||
}
|
||||
if url.IsContentDownloaded {
|
||||
t.Errorf("Expected it not to be downloaded, but it was")
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user