offline-twitter/scraper/url_test.go

187 lines
7.1 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package scraper_test
import (
"testing"
"io/ioutil"
"encoding/json"
"offline_twitter/scraper"
)
func TestParseAPIUrlCard(t *testing.T) {
data, err := ioutil.ReadFile("test_responses/tweet_content/url_card.json")
if err != nil {
panic(err)
}
var apiCard scraper.APICard
err = json.Unmarshal(data, &apiCard)
if err != nil {
t.Fatal(err.Error())
}
url := scraper.ParseAPIUrlCard(apiCard)
expected_domain := "reason.com"
if url.Domain != expected_domain {
t.Errorf("Expected %q, got %q", expected_domain, url.Domain)
}
expected_title := "L.A. Teachers Union Leader: 'There's No Such Thing As Learning Loss'"
if url.Title != expected_title {
t.Errorf("Expected %q, got %q", expected_title, url.Title)
}
expected_description := "\"Its OK that our babies may not have learned all their times tables,\" says Cecily Myart-Cruz. \"They learned resilience.\""
if url.Description != expected_description {
t.Errorf("Expected %q, got %q", expected_description, url.Description)
}
expected_width := 600
if url.ThumbnailWidth != expected_width {
t.Errorf("Expected thumbnail width %d, got %d", expected_width, url.ThumbnailWidth)
}
expected_height := 315
if url.ThumbnailHeight != expected_height {
t.Errorf("Expected thumbnail height %d, got %d", expected_height, url.ThumbnailHeight)
}
expected_remote_url := "https://pbs.twimg.com/card_img/1434998862305968129/odDi9EqO?format=jpg&name=600x600"
if url.ThumbnailRemoteUrl != expected_remote_url {
t.Errorf("Expected %q, got %q", expected_remote_url, url.ThumbnailRemoteUrl)
}
expected_local_filename := "odDi9EqO_600x600.jpg"
if url.ThumbnailLocalPath != expected_local_filename {
t.Errorf("Expected %q, got %q", expected_local_filename, url.ThumbnailLocalPath)
}
expected_creator_id := scraper.UserID(155581583)
if url.CreatorID != expected_creator_id {
t.Errorf("Expected %d, got %d", expected_creator_id, url.CreatorID)
}
expected_site_id := scraper.UserID(16467567)
if url.SiteID != expected_site_id {
t.Errorf("Expected %d, got %d", expected_site_id, url.SiteID)
}
if !url.HasThumbnail {
t.Errorf("Should have a thumbnail, but it doesn't")
}
if url.IsContentDownloaded {
t.Errorf("Expected it not to be downloaded, but it was")
}
}
func TestParseAPIUrlCardWithPlayer(t *testing.T) {
data, err := ioutil.ReadFile("test_responses/tweet_content/url_card_with_player.json")
if err != nil {
panic(err)
}
var apiCard scraper.APICard
err = json.Unmarshal(data, &apiCard)
if err != nil {
t.Fatal(err.Error())
}
url := scraper.ParseAPIUrlCard(apiCard)
expected_domain := "www.youtube.com"
if url.Domain != expected_domain {
t.Errorf("Expected %q, got %q", expected_domain, url.Domain)
}
expected_title := "The Politically Incorrect Guide to the Constitution (Starring Tom..."
if url.Title != expected_title {
t.Errorf("Expected %q, got %q", expected_title, url.Title)
}
expected_description := "Watch this episode on LBRY/Odysee: https://odysee.com/@capitalresearch:5/the-politically-incorrect-guide-to-the:8Watch this episode on Rumble: https://rumble..."
if url.Description != expected_description {
t.Errorf("Expected %q, got %q", expected_description, url.Description)
}
expected_remote_url := "https://pbs.twimg.com/card_img/1437849456423194639/_1t0btyt?format=jpg&name=800x320_1"
if url.ThumbnailRemoteUrl != expected_remote_url {
t.Errorf("Expected %q, got %q", expected_remote_url, url.ThumbnailRemoteUrl)
}
expected_local_filename := "_1t0btyt_800x320_1.jpg"
if url.ThumbnailLocalPath != expected_local_filename {
t.Errorf("Expected %q, got %q", expected_local_filename, url.ThumbnailLocalPath)
}
expected_site_id := scraper.UserID(10228272)
if url.SiteID != expected_site_id {
t.Errorf("Expected %d, got %d", expected_site_id, url.SiteID)
}
if !url.HasThumbnail {
t.Errorf("Should have a thumbnail, but it doesn't")
}
if url.IsContentDownloaded {
t.Errorf("Expected it not to be downloaded, but it was")
}
}
func TestParseAPIUrlCardWithPlayerAndPlaceholderThumbnail(t *testing.T) {
data, err := ioutil.ReadFile("test_responses/tweet_content/url_card_with_player_placeholder_image.json")
if err != nil {
panic(err)
}
var apiCard scraper.APICard
err = json.Unmarshal(data, &apiCard)
if err != nil {
t.Fatal(err.Error())
}
url := scraper.ParseAPIUrlCard(apiCard)
expected_domain := "www.youtube.com"
if url.Domain != expected_domain {
t.Errorf("Expected %q, got %q", expected_domain, url.Domain)
}
expected_title := "Did Michael Malice Turn Me into an Anarchist? | Ep 181"
if url.Title != expected_title {
t.Errorf("Expected %q, got %q", expected_title, url.Title)
}
expected_description := "SUBSCRIBE TO THE NEW SHOW W/ ELIJAH & SYDNEY: \"YOU ARE HERE\"YT: https://www.youtube.com/youareheredaily______________________________________________________..."
if url.Description != expected_description {
t.Errorf("Expected %q, got %q", expected_description, url.Description)
}
expected_remote_url := "https://pbs.twimg.com/cards/player-placeholder.png"
if url.ThumbnailRemoteUrl != expected_remote_url {
t.Errorf("Expected %q, got %q", expected_remote_url, url.ThumbnailRemoteUrl)
}
expected_local_filename := "player-placeholder.png"
if url.ThumbnailLocalPath != expected_local_filename {
t.Errorf("Expected %q, got %q", expected_local_filename, url.ThumbnailLocalPath)
}
expected_site_id := scraper.UserID(10228272)
if url.SiteID != expected_site_id {
t.Errorf("Expected %d, got %d", expected_site_id, url.SiteID)
}
if !url.HasThumbnail {
t.Errorf("Should have a thumbnail, but it doesn't")
}
if url.IsContentDownloaded {
t.Errorf("Expected it not to be downloaded, but it was")
}
}
func TestParseAPIUrlCardWithoutThumbnail(t *testing.T) {
data, err := ioutil.ReadFile("test_responses/tweet_content/url_card_without_thumbnail.json")
if err != nil {
panic(err)
}
var apiCard scraper.APICard
err = json.Unmarshal(data, &apiCard)
if err != nil {
t.Fatal(err.Error())
}
url := scraper.ParseAPIUrlCard(apiCard)
expected_domain := "en.m.wikipedia.org"
if url.Domain != expected_domain {
t.Errorf("Expected %q, got %q", expected_domain, url.Domain)
}
expected_title := "Entryism - Wikipedia"
if url.Title != expected_title {
t.Errorf("Expected %q, got %q", expected_title, url.Title)
}
expected_description := ""
if url.Description != expected_description {
t.Errorf("Expected %q, got %q", expected_description, url.Description)
}
if !url.HasCard {
t.Errorf("Expected it to have a card, but it didn't")
}
if url.HasThumbnail {
t.Errorf("Should have no thumbnail, but it does")
}
}