Add link parsing for APIv2

This commit is contained in:
Alessio 2022-02-01 16:56:37 -08:00
parent 1bc6aec3b5
commit 9eb3e42539
5 changed files with 161 additions and 2 deletions

View File

@ -163,7 +163,7 @@ type APITweet struct {
QuotedStatusIDStr string `json:"quoted_status_id_str"` // Can be empty string QuotedStatusIDStr string `json:"quoted_status_id_str"` // Can be empty string
QuotedStatusID int64 QuotedStatusID int64
QuotedStatusPermalink struct { QuotedStatusPermalink struct {
URL string `json:"url"` ShortURL string `json:"url"`
ExpandedURL string `json:"expanded"` ExpandedURL string `json:"expanded"`
} `json:"quoted_status_permalink"` } `json:"quoted_status_permalink"`
Time time.Time `json:"time"` Time time.Time `json:"time"`
@ -188,10 +188,20 @@ func (t *APITweet) NormalizeContent() {
t.FullText = string([]rune(t.FullText)[t.DisplayTextRange[0]:t.DisplayTextRange[1]]) t.FullText = string([]rune(t.FullText)[t.DisplayTextRange[0]:t.DisplayTextRange[1]])
} }
// Handle short links showing up at ends of tweets
for _, url := range t.Entities.URLs {
index := strings.Index(t.FullText, url.ShortenedUrl)
if index == (len(t.FullText) - len(url.ShortenedUrl)) {
t.FullText = strings.TrimSpace(t.FullText[0:index])
}
}
// Handle pasted tweet links that turn into quote tweets but still have a link in them // Handle pasted tweet links that turn into quote tweets but still have a link in them
// This is a separate case from above because we want it gone even if it's in the middle of the tweet
if t.QuotedStatusID != 0 { if t.QuotedStatusID != 0 {
for _, url := range t.Entities.URLs { for _, url := range t.Entities.URLs {
if url.ShortenedUrl == t.QuotedStatusPermalink.URL { if url.ShortenedUrl == t.QuotedStatusPermalink.ShortURL {
t.FullText = strings.ReplaceAll(t.FullText, url.ShortenedUrl, "") t.FullText = strings.ReplaceAll(t.FullText, url.ShortenedUrl, "")
} }
} }

View File

@ -10,6 +10,67 @@ import (
"strings" "strings"
) )
type CardValue struct {
Type string `json:"type"`
StringValue string `json:"string_value"`
ImageValue struct {
AltText string `json:"alt"`
Height int `json:"height"`
Width int `json:"width"`
Url string `json:"url"`
} `json:"image_value"`
UserValue struct {
ID int64 `json:"id_str,string"`
} `json:"user_value"`
BooleanValue bool `json:"boolean_value"`
}
type APIV2Card struct {
Legacy struct {
BindingValues []struct {
Key string `json:"key"`
Value CardValue `json:"value"`
} `json:"binding_values"`
Name string `json:"name"`
Url string `json:"url"`
} `json:"legacy"`
}
func (card APIV2Card) ParseAsUrl() Url {
values := make(map[string]CardValue)
for _, obj := range card.Legacy.BindingValues {
values[obj.Key] = obj.Value
}
ret := Url{}
ret.HasCard = true
ret.ShortText = card.Legacy.Url
ret.Domain = values["domain"].StringValue
ret.Title = values["title"].StringValue
ret.Description = values["description"].StringValue
ret.IsContentDownloaded = false
ret.CreatorID = UserID(values["creator"].UserValue.ID)
ret.SiteID = UserID(values["site"].UserValue.ID)
var thumbnail_url string
if card.Legacy.Name == "summary_large_image" || card.Legacy.Name == "summary" {
thumbnail_url = values["thumbnail_image_large"].ImageValue.Url
} else if card.Legacy.Name == "player" {
thumbnail_url = values["player_image_large"].ImageValue.Url
} else {
panic("TODO unknown card type")
}
if thumbnail_url != "" {
ret.HasThumbnail = true
ret.ThumbnailRemoteUrl = thumbnail_url
ret.ThumbnailLocalPath = get_thumbnail_local_path(thumbnail_url)
ret.ThumbnailWidth = values["thumbnail_image_large"].ImageValue.Width
ret.ThumbnailHeight = values["thumbnail_image_large"].ImageValue.Height
}
return ret
}
type APIV2UserResult struct { type APIV2UserResult struct {
UserResults struct { UserResults struct {
Result struct { Result struct {
@ -37,6 +98,7 @@ type APIV2Result struct {
} `json:"text"` } `json:"text"`
} `json:"tombstone"` } `json:"tombstone"`
Core *APIV2UserResult `json:"core"` Core *APIV2UserResult `json:"core"`
Card APIV2Card `json:"card"`
QuotedStatusResult *APIV2Result `json:"quoted_status_result"` QuotedStatusResult *APIV2Result `json:"quoted_status_result"`
} `json:"result"` } `json:"result"`
} }
@ -74,6 +136,25 @@ func (api_result APIV2Result) ToTweetTrove() TweetTrove {
ret.MergeWith(quoted_trove) ret.MergeWith(quoted_trove)
} }
// Handle URL cards
if api_result.Result.Card.Legacy.Name == "summary_large_image" || api_result.Result.Card.Legacy.Name == "player" {
url := api_result.Result.Card.ParseAsUrl()
main_tweet := ret.Tweets[TweetID(api_result.Result.Legacy.ID)]
found := false
for i := range main_tweet.Urls {
if main_tweet.Urls[i].ShortText != url.ShortText {
continue
}
found = true
url.Text = main_tweet.Urls[i].Text // Copy the expanded URL over, since the card doesn't have it in the new API
main_tweet.Urls[i] = url
}
if !found {
panic("Tweet trove doesn't contain its own main tweet")
}
}
return ret return ret
} }

View File

@ -268,6 +268,72 @@ func TestAPIV2ParseRetweetedQuoteTweet(t *testing.T) {
} }
/**
* Parse a tweet with a link
*/
func TestAPIV2ParseTweetWithURL(t *testing.T) {
assert := assert.New(t)
data, err := ioutil.ReadFile("test_responses/api_v2/tweet_with_url.json")
if err != nil {
panic(err)
}
var tweet_result APIV2Result
err = json.Unmarshal(data, &tweet_result)
assert.NoError(err)
trove := tweet_result.ToTweetTrove()
assert.Equal(1, len(trove.Tweets))
tweet, ok := trove.Tweets[1485695695025803264]
assert.True(ok)
assert.Equal("This led to what I discussed as \"anguish signaling,\" where progs competed in proclaiming their distress both to show they were the Good Guys but also to get the pack to regroup, akin to wolves howling.", tweet.Text)
assert.Equal(1, len(tweet.Urls))
url := tweet.Urls[0]
assert.Equal("observer.com", url.Domain)
assert.Equal("Why Evangelical Progressives Need to Demonstrate Anguish Publicly", url.Title)
assert.Equal("https://observer.com/2016/12/why-evangelical-progressives-need-to-demonstrate-anguish-publicly/", url.Text)
assert.Equal("The concept of “virtue signaling” gained a great deal of currency in this past year. Its a way to demonstrate to others that one is a good person without having to do anything", url.Description)
assert.Equal("https://pbs.twimg.com/card_img/1485694664640507911/WsproWyP?format=jpg&name=600x600", url.ThumbnailRemoteUrl)
assert.Equal(600, url.ThumbnailWidth)
assert.Equal(300, url.ThumbnailHeight)
assert.Equal(UserID(15738599), url.SiteID)
assert.Equal(UserID(15738599), url.CreatorID)
}
/**
* Parse a tweet with a link with a "player" card
*/
func TestAPIV2ParseTweetWithURLPlayerCard(t *testing.T) {
assert := assert.New(t)
data, err := ioutil.ReadFile("test_responses/api_v2/tweet_with_url_player_card.json")
if err != nil {
panic(err)
}
var tweet_result APIV2Result
err = json.Unmarshal(data, &tweet_result)
assert.NoError(err)
trove := tweet_result.ToTweetTrove()
assert.Equal(1, len(trove.Tweets))
tweet, ok := trove.Tweets[1485504913614327808]
assert.True(ok)
assert.Equal("i'll just leave this here", tweet.Text)
assert.Equal(1, len(tweet.Urls))
url := tweet.Urls[0]
assert.Equal("www.youtube.com", url.Domain)
assert.Equal("Michael Malice on Kennedy Nov. 15, 2016", url.Title)
assert.Equal("https://www.youtube.com/watch?v=c9TypEM1ik4&t=9s", url.Text)
assert.Equal("Steve Bannon;", url.Description)
assert.Equal("https://pbs.twimg.com/card_img/1485504774233415680/fsbK59th?format=jpg&name=800x320_1", url.ThumbnailRemoteUrl)
assert.Equal(UserID(10228272), url.SiteID)
}
func TestParseAPIV2UserFeed(t *testing.T) { func TestParseAPIV2UserFeed(t *testing.T) {
data, err := ioutil.ReadFile("test_responses/api_v2/user_feed_apiv2.json") data, err := ioutil.ReadFile("test_responses/api_v2/user_feed_apiv2.json")
if err != nil { if err != nil {

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long