Add parsing of tombstones in immediate parent replies in Tweet Detail views for APIv2
This commit is contained in:
parent
21581b325a
commit
1226e3947e
@ -2,6 +2,7 @@ package scraper
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"net/url"
|
"net/url"
|
||||||
"strings"
|
"strings"
|
||||||
@ -9,6 +10,8 @@ import (
|
|||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var ErrorIsTombstone = errors.New("tweet is a tombstone")
|
||||||
|
|
||||||
type CardValue struct {
|
type CardValue struct {
|
||||||
Type string `json:"type"`
|
Type string `json:"type"`
|
||||||
StringValue string `json:"string_value"`
|
StringValue string `json:"string_value"`
|
||||||
@ -169,13 +172,14 @@ type APIV2Result struct {
|
|||||||
} `json:"result"`
|
} `json:"result"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (api_result APIV2Result) ToTweetTrove(ignore_null_entries bool) TweetTrove {
|
func (api_result APIV2Result) ToTweetTrove() (TweetTrove, error) {
|
||||||
ret := NewTweetTrove()
|
ret := NewTweetTrove()
|
||||||
|
|
||||||
// Start by checking if this is a null entry in a feed
|
// Start by checking if this is a null entry in a feed
|
||||||
if api_result.Result.Tombstone != nil && ignore_null_entries {
|
if api_result.Result.Tombstone != nil {
|
||||||
// TODO: this is becoming really spaghetti. Why do we need a separate execution path for this?
|
// Returning an error indicates the parent (APIV2Entry) has to parse it as a tombstone.
|
||||||
return ret
|
// The tweet ID isn't available to the APIV2Result, but it is to the APIV2Entry.
|
||||||
|
return ret, ErrorIsTombstone
|
||||||
}
|
}
|
||||||
|
|
||||||
if api_result.Result.Legacy.ID == 0 && api_result.Result.Tweet.Legacy.ID != 0 {
|
if api_result.Result.Legacy.ID == 0 && api_result.Result.Tweet.Legacy.ID != 0 {
|
||||||
@ -207,25 +211,42 @@ func (api_result APIV2Result) ToTweetTrove(ignore_null_entries bool) TweetTrove
|
|||||||
// Handle quoted tweet
|
// Handle quoted tweet
|
||||||
if api_result.Result.QuotedStatusResult != nil {
|
if api_result.Result.QuotedStatusResult != nil {
|
||||||
quoted_api_result := api_result.Result.QuotedStatusResult
|
quoted_api_result := api_result.Result.QuotedStatusResult
|
||||||
|
quoted_trove, err := quoted_api_result.ToTweetTrove()
|
||||||
|
|
||||||
// Quoted tweets might be tombstones!
|
// Quoted tombstones can be handled here since we already have the ID and user handle
|
||||||
if quoted_api_result.Result.Tombstone != nil {
|
if errors.Is(err, ErrorIsTombstone) {
|
||||||
tombstoned_tweet := "ed_api_result.Result.Legacy.APITweet
|
tombstoned_tweet := quoted_api_result.Result.Legacy.APITweet
|
||||||
var ok bool
|
|
||||||
tombstoned_tweet.TombstoneText, ok = tombstone_types[quoted_api_result.Result.Tombstone.Text.Text]
|
// Capture the tombstone text
|
||||||
if !ok {
|
var is_ok bool
|
||||||
|
tombstoned_tweet.TombstoneText, is_ok = tombstone_types[quoted_api_result.Result.Tombstone.Text.Text]
|
||||||
|
if !is_ok {
|
||||||
panic(fmt.Errorf("Unknown tombstone text %q:\n %w", quoted_api_result.Result.Tombstone.Text.Text, EXTERNAL_API_ERROR))
|
panic(fmt.Errorf("Unknown tombstone text %q:\n %w", quoted_api_result.Result.Tombstone.Text.Text, EXTERNAL_API_ERROR))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Capture the tombstone ID
|
||||||
tombstoned_tweet.ID = int64(int_or_panic(api_result.Result.Legacy.APITweet.QuotedStatusIDStr))
|
tombstoned_tweet.ID = int64(int_or_panic(api_result.Result.Legacy.APITweet.QuotedStatusIDStr))
|
||||||
|
|
||||||
|
// Capture the tombstone's user handle
|
||||||
handle, err := ParseHandleFromTweetUrl(api_result.Result.Legacy.APITweet.QuotedStatusPermalink.ExpandedURL)
|
handle, err := ParseHandleFromTweetUrl(api_result.Result.Legacy.APITweet.QuotedStatusPermalink.ExpandedURL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
tombstoned_tweet.UserHandle = string(handle)
|
tombstoned_tweet.UserHandle = string(handle)
|
||||||
|
|
||||||
|
// Parse the tombstone into a Tweet and add it to the trove
|
||||||
|
parsed_tombstone_tweet, err := ParseSingleTweet(tombstoned_tweet)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
ret.Tweets[parsed_tombstone_tweet.ID] = parsed_tombstone_tweet
|
||||||
|
|
||||||
|
// Add the user as a tombstoned user to be fetched later
|
||||||
ret.TombstoneUsers = append(ret.TombstoneUsers, handle)
|
ret.TombstoneUsers = append(ret.TombstoneUsers, handle)
|
||||||
|
} else if err != nil {
|
||||||
|
panic(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
quoted_trove := quoted_api_result.ToTweetTrove(false)
|
|
||||||
ret.MergeWith(quoted_trove)
|
ret.MergeWith(quoted_trove)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -235,8 +256,8 @@ func (api_result APIV2Result) ToTweetTrove(ignore_null_entries bool) TweetTrove
|
|||||||
if api_result.Result.Legacy.RetweetedStatusResult == nil {
|
if api_result.Result.Legacy.RetweetedStatusResult == nil {
|
||||||
// We have to filter out retweets. For some reason, retweets have a copy of the card in both the retweeting
|
// We have to filter out retweets. For some reason, retweets have a copy of the card in both the retweeting
|
||||||
// and the retweeted TweetResults; it should only be parsed for the real Tweet, not the Retweet
|
// and the retweeted TweetResults; it should only be parsed for the real Tweet, not the Retweet
|
||||||
main_tweet, ok := ret.Tweets[TweetID(api_result.Result.Legacy.ID)]
|
main_tweet, is_ok := ret.Tweets[TweetID(api_result.Result.Legacy.ID)]
|
||||||
if !ok {
|
if !is_ok {
|
||||||
panic(fmt.Errorf("Tweet trove didn't contain its own tweet with ID %d:\n %w", api_result.Result.Legacy.ID, EXTERNAL_API_ERROR))
|
panic(fmt.Errorf("Tweet trove didn't contain its own tweet with ID %d:\n %w", api_result.Result.Legacy.ID, EXTERNAL_API_ERROR))
|
||||||
}
|
}
|
||||||
if api_result.Result.Card.Legacy.Name == "summary_large_image" || api_result.Result.Card.Legacy.Name == "player" {
|
if api_result.Result.Card.Legacy.Name == "summary_large_image" || api_result.Result.Card.Legacy.Name == "player" {
|
||||||
@ -284,7 +305,7 @@ func (api_result APIV2Result) ToTweetTrove(ignore_null_entries bool) TweetTrove
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret
|
return ret, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
type APIV2Tweet struct {
|
type APIV2Tweet struct {
|
||||||
@ -299,11 +320,13 @@ func (api_v2_tweet APIV2Tweet) ToTweetTrove() TweetTrove {
|
|||||||
|
|
||||||
// If there's a retweet, we ignore the main tweet except for posted_at and retweeting UserID
|
// If there's a retweet, we ignore the main tweet except for posted_at and retweeting UserID
|
||||||
if api_v2_tweet.RetweetedStatusResult != nil {
|
if api_v2_tweet.RetweetedStatusResult != nil {
|
||||||
orig_tweet_trove := api_v2_tweet.RetweetedStatusResult.ToTweetTrove(false)
|
orig_tweet_trove, err := api_v2_tweet.RetweetedStatusResult.ToTweetTrove()
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
ret.MergeWith(orig_tweet_trove)
|
ret.MergeWith(orig_tweet_trove)
|
||||||
|
|
||||||
retweet := Retweet{}
|
retweet := Retweet{}
|
||||||
var err error
|
|
||||||
|
|
||||||
retweet.RetweetID = TweetID(api_v2_tweet.ID)
|
retweet.RetweetID = TweetID(api_v2_tweet.ID)
|
||||||
if api_v2_tweet.RetweetedStatusResult.Result.Legacy.ID == 0 && api_v2_tweet.RetweetedStatusResult.Result.Tweet.Legacy.ID != 0 {
|
if api_v2_tweet.RetweetedStatusResult.Result.Legacy.ID == 0 && api_v2_tweet.RetweetedStatusResult.Result.Tweet.Legacy.ID != 0 {
|
||||||
@ -375,7 +398,7 @@ func (e *APIV2Entry) UnmarshalJSON(data []byte) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e APIV2Entry) ToTweetTrove(ignore_null_entries bool) TweetTrove {
|
func (e APIV2Entry) ToTweetTrove() TweetTrove {
|
||||||
defer func() {
|
defer func() {
|
||||||
if obj := recover(); obj != nil {
|
if obj := recover(); obj != nil {
|
||||||
log.Warn(fmt.Sprintf("Panic while decoding entry: %s\n", e.OriginalJSON))
|
log.Warn(fmt.Sprintf("Panic while decoding entry: %s\n", e.OriginalJSON))
|
||||||
@ -400,7 +423,11 @@ func (e APIV2Entry) ToTweetTrove(ignore_null_entries bool) TweetTrove {
|
|||||||
// "Show More" replies button in a thread on Tweet Detail page
|
// "Show More" replies button in a thread on Tweet Detail page
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
ret.MergeWith(item.Item.ItemContent.TweetResults.ToTweetTrove(ignore_null_entries))
|
trove, err := item.Item.ItemContent.TweetResults.ToTweetTrove()
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
ret.MergeWith(trove)
|
||||||
}
|
}
|
||||||
|
|
||||||
case "whoToFollow", "TopicsModule", "tweetdetailrelatedtweets":
|
case "whoToFollow", "TopicsModule", "tweetdetailrelatedtweets":
|
||||||
@ -414,7 +441,35 @@ func (e APIV2Entry) ToTweetTrove(ignore_null_entries bool) TweetTrove {
|
|||||||
|
|
||||||
return ret
|
return ret
|
||||||
} else if e.Content.EntryType == "TimelineTimelineItem" {
|
} else if e.Content.EntryType == "TimelineTimelineItem" {
|
||||||
return e.Content.ItemContent.TweetResults.ToTweetTrove(ignore_null_entries)
|
ret, err := e.Content.ItemContent.TweetResults.ToTweetTrove()
|
||||||
|
|
||||||
|
if errors.Is(err, ErrorIsTombstone) {
|
||||||
|
// Handle tombstones
|
||||||
|
ret = NewTweetTrove() // clear the result just in case
|
||||||
|
tombstoned_tweet := e.Content.ItemContent.TweetResults.Result.Legacy.APITweet // Will be empty to start
|
||||||
|
|
||||||
|
// Capture the tombstone text
|
||||||
|
var is_ok bool
|
||||||
|
tombstoned_tweet.TombstoneText, is_ok = tombstone_types[e.Content.ItemContent.TweetResults.Result.Tombstone.Text.Text]
|
||||||
|
if !is_ok {
|
||||||
|
panic(fmt.Errorf(
|
||||||
|
"Unknown tombstone text %q:\n %w",
|
||||||
|
e.Content.ItemContent.TweetResults.Result.Tombstone.Text.Text,
|
||||||
|
EXTERNAL_API_ERROR,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Capture the tombstone ID
|
||||||
|
tombstoned_tweet.ID = int64(int_or_panic(strings.Split(e.EntryID, "-")[1]))
|
||||||
|
|
||||||
|
// Parse the tombstone into a Tweet and add it to the trove
|
||||||
|
parsed_tombstone_tweet, err := ParseSingleTweet(tombstoned_tweet)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
ret.Tweets[parsed_tombstone_tweet.ID] = parsed_tombstone_tweet
|
||||||
|
}
|
||||||
|
return ret
|
||||||
}
|
}
|
||||||
panic("Unknown EntryType: " + e.Content.EntryType)
|
panic("Unknown EntryType: " + e.Content.EntryType)
|
||||||
}
|
}
|
||||||
@ -489,11 +544,58 @@ func (api_response APIV2Response) IsEmpty() bool {
|
|||||||
*/
|
*/
|
||||||
func (api_response APIV2Response) ToTweetTrove() (TweetTrove, error) {
|
func (api_response APIV2Response) ToTweetTrove() (TweetTrove, error) {
|
||||||
ret := NewTweetTrove()
|
ret := NewTweetTrove()
|
||||||
|
|
||||||
|
// Parse all of the entries
|
||||||
for _, entry := range api_response.GetMainInstruction().Entries { // TODO: the second Instruction is the pinned tweet
|
for _, entry := range api_response.GetMainInstruction().Entries { // TODO: the second Instruction is the pinned tweet
|
||||||
main_trove := entry.ToTweetTrove(true)
|
main_trove := entry.ToTweetTrove()
|
||||||
ret.MergeWith(main_trove)
|
ret.MergeWith(main_trove)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Add in any tombstoned user handles and IDs if possible, by reading from the replies
|
||||||
|
for _, tweet := range ret.Tweets {
|
||||||
|
// Skip if it's not a reply (nothing to add)
|
||||||
|
if tweet.InReplyToID == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip if the replied tweet isn't in the result set (e.g., the reply is a quoted tweet)
|
||||||
|
replied_tweet, is_ok := ret.Tweets[tweet.InReplyToID]
|
||||||
|
if !is_ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip if the replied tweet isn't a stub (it's already filled out)
|
||||||
|
if !replied_tweet.IsStub {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if replied_tweet.ID == 0 {
|
||||||
|
// Not sure if this can happen. Use a panic to detect if it does so we can analyse
|
||||||
|
// TODO: make a better system to capture "discovery panics" that doesn't involve panicking
|
||||||
|
panic(fmt.Sprintf("Tombstoned tweet has no ID (should be %d)", tweet.InReplyToID))
|
||||||
|
}
|
||||||
|
|
||||||
|
if replied_tweet.UserID == 0 {
|
||||||
|
replied_tweet.UserID = tweet.in_reply_to_user_id
|
||||||
|
if replied_tweet.UserID == 0 { // Still??
|
||||||
|
log.Warn(fmt.Sprintf("Still couldn't find user for replied tweet %d", tweet.InReplyToID))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
} // replied_tweet.UserID should now be a real UserID
|
||||||
|
|
||||||
|
existing_user, is_ok := ret.Users[replied_tweet.UserID]
|
||||||
|
if !is_ok {
|
||||||
|
existing_user = User{ID: replied_tweet.UserID}
|
||||||
|
}
|
||||||
|
if existing_user.Handle == "" {
|
||||||
|
existing_user.Handle = tweet.in_reply_to_user_handle
|
||||||
|
}
|
||||||
|
ret.Users[replied_tweet.UserID] = existing_user
|
||||||
|
// TODO: add to ret.TombstonedUsers?
|
||||||
|
|
||||||
|
ret.Tweets[replied_tweet.ID] = replied_tweet
|
||||||
|
}
|
||||||
|
|
||||||
return ret, nil // TODO: This doesn't need to return an error, it's always nil
|
return ret, nil // TODO: This doesn't need to return an error, it's always nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -63,7 +63,8 @@ func TestAPIV2ParseTweet(t *testing.T) {
|
|||||||
err = json.Unmarshal(data, &tweet_result)
|
err = json.Unmarshal(data, &tweet_result)
|
||||||
assert.NoError(err)
|
assert.NoError(err)
|
||||||
|
|
||||||
trove := tweet_result.ToTweetTrove(true)
|
trove, err := tweet_result.ToTweetTrove()
|
||||||
|
assert.NoError(err)
|
||||||
|
|
||||||
assert.Equal(1, len(trove.Tweets))
|
assert.Equal(1, len(trove.Tweets))
|
||||||
tweet, ok := trove.Tweets[1485708879174508550]
|
tweet, ok := trove.Tweets[1485708879174508550]
|
||||||
@ -111,7 +112,8 @@ func TestAPIV2ParseTweetWithQuotedTweet(t *testing.T) {
|
|||||||
err = json.Unmarshal(data, &tweet_result)
|
err = json.Unmarshal(data, &tweet_result)
|
||||||
assert.NoError(err)
|
assert.NoError(err)
|
||||||
|
|
||||||
trove := tweet_result.ToTweetTrove(true)
|
trove, err := tweet_result.ToTweetTrove()
|
||||||
|
assert.NoError(err)
|
||||||
|
|
||||||
// Should be 2 tweets: quote-tweet and quoted-tweet
|
// Should be 2 tweets: quote-tweet and quoted-tweet
|
||||||
assert.Equal(2, len(trove.Tweets))
|
assert.Equal(2, len(trove.Tweets))
|
||||||
@ -165,7 +167,8 @@ func TestAPIV2ParseRetweet(t *testing.T) {
|
|||||||
err = json.Unmarshal(data, &tweet_result)
|
err = json.Unmarshal(data, &tweet_result)
|
||||||
assert.NoError(err)
|
assert.NoError(err)
|
||||||
|
|
||||||
trove := tweet_result.ToTweetTrove(true)
|
trove, err := tweet_result.ToTweetTrove()
|
||||||
|
assert.NoError(err)
|
||||||
|
|
||||||
// Should only be 1 tweet, the retweeted one
|
// Should only be 1 tweet, the retweeted one
|
||||||
assert.Equal(1, len(trove.Tweets))
|
assert.Equal(1, len(trove.Tweets))
|
||||||
@ -224,7 +227,8 @@ func TestAPIV2ParseRetweetedQuoteTweet(t *testing.T) {
|
|||||||
err = json.Unmarshal(data, &tweet_result)
|
err = json.Unmarshal(data, &tweet_result)
|
||||||
assert.NoError(err)
|
assert.NoError(err)
|
||||||
|
|
||||||
trove := tweet_result.ToTweetTrove(true)
|
trove, err := tweet_result.ToTweetTrove()
|
||||||
|
assert.NoError(err)
|
||||||
|
|
||||||
// Quoted tweet and quoting tweet
|
// Quoted tweet and quoting tweet
|
||||||
assert.Equal(2, len(trove.Tweets))
|
assert.Equal(2, len(trove.Tweets))
|
||||||
@ -283,7 +287,8 @@ func TestAPIV2ParseTweetWithQuotedTombstone(t *testing.T) {
|
|||||||
err = json.Unmarshal(data, &tweet_result)
|
err = json.Unmarshal(data, &tweet_result)
|
||||||
assert.NoError(err)
|
assert.NoError(err)
|
||||||
|
|
||||||
trove := tweet_result.ToTweetTrove(true)
|
trove, err := tweet_result.ToTweetTrove()
|
||||||
|
assert.NoError(err)
|
||||||
|
|
||||||
assert.Equal(1, len(trove.Users))
|
assert.Equal(1, len(trove.Users))
|
||||||
user, ok := trove.Users[44067298]
|
user, ok := trove.Users[44067298]
|
||||||
@ -318,7 +323,8 @@ func TestAPIV2ParseTweetWithURL(t *testing.T) {
|
|||||||
err = json.Unmarshal(data, &tweet_result)
|
err = json.Unmarshal(data, &tweet_result)
|
||||||
assert.NoError(err)
|
assert.NoError(err)
|
||||||
|
|
||||||
trove := tweet_result.ToTweetTrove(true)
|
trove, err := tweet_result.ToTweetTrove()
|
||||||
|
assert.NoError(err)
|
||||||
|
|
||||||
assert.Equal(1, len(trove.Tweets))
|
assert.Equal(1, len(trove.Tweets))
|
||||||
tweet, ok := trove.Tweets[1485695695025803264]
|
tweet, ok := trove.Tweets[1485695695025803264]
|
||||||
@ -355,7 +361,8 @@ func TestAPIV2ParseTweetWithURLPlayerCard(t *testing.T) {
|
|||||||
err = json.Unmarshal(data, &tweet_result)
|
err = json.Unmarshal(data, &tweet_result)
|
||||||
assert.NoError(err)
|
assert.NoError(err)
|
||||||
|
|
||||||
trove := tweet_result.ToTweetTrove(true)
|
trove, err := tweet_result.ToTweetTrove()
|
||||||
|
assert.NoError(err)
|
||||||
|
|
||||||
assert.Equal(1, len(trove.Tweets))
|
assert.Equal(1, len(trove.Tweets))
|
||||||
tweet, ok := trove.Tweets[1485504913614327808]
|
tweet, ok := trove.Tweets[1485504913614327808]
|
||||||
@ -387,7 +394,8 @@ func TestAPIV2ParseTweetWithURLRetweet(t *testing.T) {
|
|||||||
err = json.Unmarshal(data, &tweet_result)
|
err = json.Unmarshal(data, &tweet_result)
|
||||||
assert.NoError(err)
|
assert.NoError(err)
|
||||||
|
|
||||||
trove := tweet_result.ToTweetTrove(true)
|
trove, err := tweet_result.ToTweetTrove()
|
||||||
|
assert.NoError(err)
|
||||||
|
|
||||||
assert.Equal(1, len(trove.Tweets))
|
assert.Equal(1, len(trove.Tweets))
|
||||||
tweet, ok := trove.Tweets[1488605073588559873]
|
tweet, ok := trove.Tweets[1488605073588559873]
|
||||||
@ -414,7 +422,8 @@ func TestAPIV2ParseTweetWithPoll(t *testing.T) {
|
|||||||
err = json.Unmarshal(data, &tweet_result)
|
err = json.Unmarshal(data, &tweet_result)
|
||||||
assert.NoError(err)
|
assert.NoError(err)
|
||||||
|
|
||||||
trove := tweet_result.ToTweetTrove(true)
|
trove, err := tweet_result.ToTweetTrove()
|
||||||
|
assert.NoError(err)
|
||||||
|
|
||||||
assert.Len(trove.Tweets, 1)
|
assert.Len(trove.Tweets, 1)
|
||||||
tweet, ok := trove.Tweets[1485692111106285571]
|
tweet, ok := trove.Tweets[1485692111106285571]
|
||||||
@ -454,7 +463,8 @@ func TestAPIV2ParseTweetWithSpace(t *testing.T) {
|
|||||||
err = json.Unmarshal(data, &tweet_result)
|
err = json.Unmarshal(data, &tweet_result)
|
||||||
assert.NoError(err)
|
assert.NoError(err)
|
||||||
|
|
||||||
trove := tweet_result.ToTweetTrove(true)
|
trove, err := tweet_result.ToTweetTrove()
|
||||||
|
assert.NoError(err)
|
||||||
|
|
||||||
assert.Len(trove.Tweets, 1)
|
assert.Len(trove.Tweets, 1)
|
||||||
tweet, ok := trove.Tweets[1497647006445146113]
|
tweet, ok := trove.Tweets[1497647006445146113]
|
||||||
@ -580,26 +590,68 @@ func TestAPIV2GetMainInstructionFromFeed(t *testing.T) {
|
|||||||
assert.Equal(feed.GetMainInstruction().Entries[41].EntryID, "asdf")
|
assert.Equal(feed.GetMainInstruction().Entries[41].EntryID, "asdf")
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Should handle an entry in the feed that's a tombstone by just ignoring it
|
|
||||||
* Expectation: random tombstones in the feed with no context should parse as empty TweetTroves.
|
|
||||||
*
|
|
||||||
* The indication that it's from a feed (i.e., not in a comments thread) is 'ToTweetTrove(true)'.
|
|
||||||
* On a reply thread, it would be 'ToTweetTrove(false)'.
|
|
||||||
*/
|
|
||||||
func TestAPIV2TombstoneEntry(t *testing.T) {
|
func TestAPIV2TombstoneEntry(t *testing.T) {
|
||||||
assert := assert.New(t)
|
assert := assert.New(t)
|
||||||
data, err := os.ReadFile("test_responses/api_v2/tombstone_tweet.json")
|
data, err := os.ReadFile("test_responses/api_v2/tombstone_tweet.json")
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
var tweet_result APIV2Result
|
var entry APIV2Entry
|
||||||
err = json.Unmarshal(data, &tweet_result)
|
err = json.Unmarshal(data, &entry)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
trove := tweet_result.ToTweetTrove(true) // 'true' indicates to ignore empty entries
|
trove := entry.ToTweetTrove()
|
||||||
assert.Len(trove.Tweets, 0)
|
assert.NoError(err)
|
||||||
|
assert.Len(trove.Tweets, 1)
|
||||||
assert.Len(trove.Users, 0)
|
assert.Len(trove.Users, 0)
|
||||||
assert.Len(trove.Retweets, 0)
|
assert.Len(trove.Retweets, 0)
|
||||||
|
|
||||||
|
tweet, is_ok := trove.Tweets[1454515503242829830]
|
||||||
|
assert.True(is_ok)
|
||||||
|
assert.Equal(tweet.ID, TweetID(1454515503242829830))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAPIV2ConversationThreadWithTombstones(t *testing.T) {
|
||||||
|
assert := assert.New(t)
|
||||||
|
data, err := os.ReadFile("test_responses/api_v2/conversation_thread_with_tombstones.json")
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
var resp APIV2Response
|
||||||
|
err = json.Unmarshal(data, &resp)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
trove, err := resp.ToTweetTrove()
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
assert.Len(trove.Tweets, 4)
|
||||||
|
// t1, is_ok := trove.Tweets[1454515503242829830]
|
||||||
|
// assert.True(is_ok)
|
||||||
|
// assert.True(t1.IsStub)
|
||||||
|
// assert.Equal(TweetID(0), t1.InReplyToID)
|
||||||
|
// // TODO: assert associated user is fake
|
||||||
|
|
||||||
|
// t2, is_ok := trove.Tweets[1454521424144654344]
|
||||||
|
// assert.True(is_ok)
|
||||||
|
// assert.True(t2.IsStub)
|
||||||
|
// assert.Equal(TweetID(1454515503242829830), t2.InReplyToID)
|
||||||
|
|
||||||
|
t3, is_ok := trove.Tweets[1454522147750260742]
|
||||||
|
assert.True(is_ok)
|
||||||
|
assert.True(t3.IsStub)
|
||||||
|
// assert.Equal(TweetID(1454521424144654344), t3.InReplyToID)
|
||||||
|
assert.Equal(UserID(1365863538393309184), t3.UserID)
|
||||||
|
t3_user, is_ok := trove.Users[t3.UserID]
|
||||||
|
assert.True(is_ok)
|
||||||
|
assert.Equal(UserHandle("itsbackwereover"), t3_user.Handle)
|
||||||
|
|
||||||
|
t4, is_ok := trove.Tweets[1454526270809726977]
|
||||||
|
assert.True(is_ok)
|
||||||
|
assert.False(t4.IsStub)
|
||||||
|
assert.Equal(TweetID(1454522147750260742), t4.InReplyToID)
|
||||||
|
_, is_ok = trove.Users[t4.UserID]
|
||||||
|
assert.True(is_ok)
|
||||||
|
|
||||||
|
// assert.Len(trove.Users, 4)
|
||||||
|
assert.Len(trove.Retweets, 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestTweetWithWarning(t *testing.T) {
|
func TestTweetWithWarning(t *testing.T) {
|
||||||
@ -610,7 +662,8 @@ func TestTweetWithWarning(t *testing.T) {
|
|||||||
err = json.Unmarshal(data, &tweet_result)
|
err = json.Unmarshal(data, &tweet_result)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
trove := tweet_result.ToTweetTrove(true)
|
trove, err := tweet_result.ToTweetTrove()
|
||||||
|
assert.NoError(err)
|
||||||
|
|
||||||
assert.Len(trove.Retweets, 1)
|
assert.Len(trove.Retweets, 1)
|
||||||
assert.Len(trove.Tweets, 2)
|
assert.Len(trove.Tweets, 2)
|
||||||
@ -626,7 +679,8 @@ func TestRetweetWithVisibilityResults(t *testing.T) {
|
|||||||
err = json.Unmarshal(data, &tweet_result)
|
err = json.Unmarshal(data, &tweet_result)
|
||||||
require.NoError(err)
|
require.NoError(err)
|
||||||
|
|
||||||
trove := tweet_result.ToTweetTrove(true)
|
trove, err := tweet_result.ToTweetTrove()
|
||||||
|
assert.NoError(err)
|
||||||
|
|
||||||
assert.Len(trove.Retweets, 1)
|
assert.Len(trove.Retweets, 1)
|
||||||
assert.Len(trove.Tweets, 1)
|
assert.Len(trove.Tweets, 1)
|
||||||
@ -646,7 +700,8 @@ func TestExpandableTweet(t *testing.T) {
|
|||||||
err = json.Unmarshal(data, &tweet_result)
|
err = json.Unmarshal(data, &tweet_result)
|
||||||
require.NoError(err)
|
require.NoError(err)
|
||||||
|
|
||||||
trove := tweet_result.ToTweetTrove(true)
|
trove, err := tweet_result.ToTweetTrove()
|
||||||
|
assert.NoError(err)
|
||||||
main_tweet, is_ok := trove.Tweets[TweetID(1649600354747572225)]
|
main_tweet, is_ok := trove.Tweets[TweetID(1649600354747572225)]
|
||||||
require.True(is_ok)
|
require.True(is_ok)
|
||||||
|
|
||||||
@ -665,7 +720,7 @@ func TestEntryWithConversationThread(t *testing.T) {
|
|||||||
err = json.Unmarshal(data, &entry_result)
|
err = json.Unmarshal(data, &entry_result)
|
||||||
require.NoError(err)
|
require.NoError(err)
|
||||||
|
|
||||||
trove := entry_result.ToTweetTrove(true)
|
trove := entry_result.ToTweetTrove()
|
||||||
|
|
||||||
assert.Len(trove.Tweets, 4) // 3 tweets in the thread plus the quoted tweet
|
assert.Len(trove.Tweets, 4) // 3 tweets in the thread plus the quoted tweet
|
||||||
t1, is_ok := trove.Tweets[1624966566264680448]
|
t1, is_ok := trove.Tweets[1624966566264680448]
|
||||||
@ -693,7 +748,7 @@ func TestConversationThreadEntryWithShowMoreButton(t *testing.T) {
|
|||||||
err = json.Unmarshal(data, &entry_result)
|
err = json.Unmarshal(data, &entry_result)
|
||||||
require.NoError(err)
|
require.NoError(err)
|
||||||
|
|
||||||
trove := entry_result.ToTweetTrove(true)
|
trove := entry_result.ToTweetTrove()
|
||||||
|
|
||||||
assert.Len(trove.Tweets, 1)
|
assert.Len(trove.Tweets, 1)
|
||||||
t1, is_ok := trove.Tweets[1649803385485377536]
|
t1, is_ok := trove.Tweets[1649803385485377536]
|
||||||
|
@ -1 +1 @@
|
|||||||
{"result":{"__typename":"TweetTombstone","tombstone":{"__typename":"TextTombstone","text":{"rtl":false,"text":"This Tweet was deleted by the Tweet author. Learn more","entities":[{"fromIndex":44,"toIndex":54,"ref":{"type":"TimelineUrl","url":"https://help.twitter.com/rules-and-policies/notices-on-twitter","urlType":"ExternalUrl"}}]}}}}
|
{"entryId":"tweet-1454515503242829830","sortIndex":"7768856533611945977","content":{"entryType":"TimelineTimelineItem","__typename":"TimelineTimelineItem","itemContent":{"itemType":"TimelineTweet","__typename":"TimelineTweet","tweet_results":{"result":{"__typename":"TweetTombstone","tombstone":{"__typename":"TextTombstone","text":{"rtl":false,"text":"You’re unable to view this Tweet because this account owner limits who can view their Tweets. Learn more","entities":[{"fromIndex":94,"toIndex":104,"ref":{"type":"TimelineUrl","url":"https://help.twitter.com/rules-and-policies/notices-on-twitter","urlType":"ExternalUrl"}}]}}}},"tweetDisplayType":"Tweet","hasModeratedReplies":false}}}
|
||||||
|
@ -33,9 +33,8 @@ func (l CommaSeparatedList) Value() (driver.Value, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type Tweet struct {
|
type Tweet struct {
|
||||||
ID TweetID `db:"id"`
|
ID TweetID `db:"id"`
|
||||||
UserID UserID `db:"user_id"`
|
UserID UserID `db:"user_id"`
|
||||||
UserHandle UserHandle // For processing tombstones
|
|
||||||
User *User
|
User *User
|
||||||
Text string `db:"text"`
|
Text string `db:"text"`
|
||||||
IsExpandable bool `db:"is_expandable"`
|
IsExpandable bool `db:"is_expandable"`
|
||||||
@ -47,6 +46,11 @@ type Tweet struct {
|
|||||||
InReplyToID TweetID `db:"in_reply_to_id"`
|
InReplyToID TweetID `db:"in_reply_to_id"`
|
||||||
QuotedTweetID TweetID `db:"quoted_tweet_id"`
|
QuotedTweetID TweetID `db:"quoted_tweet_id"`
|
||||||
|
|
||||||
|
// For processing tombstones
|
||||||
|
UserHandle UserHandle
|
||||||
|
in_reply_to_user_handle UserHandle
|
||||||
|
in_reply_to_user_id UserID
|
||||||
|
|
||||||
Images []Image
|
Images []Image
|
||||||
Videos []Video
|
Videos []Video
|
||||||
Urls []Url
|
Urls []Url
|
||||||
@ -221,6 +225,10 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
|
|||||||
ret.LastScrapedAt = TimestampFromUnix(0) // Caller will change this for the tweet that was actually scraped
|
ret.LastScrapedAt = TimestampFromUnix(0) // Caller will change this for the tweet that was actually scraped
|
||||||
ret.IsConversationScraped = false // Safe due to the "No Worsening" principle
|
ret.IsConversationScraped = false // Safe due to the "No Worsening" principle
|
||||||
|
|
||||||
|
// Extra data that can help piece together tombstoned tweet info
|
||||||
|
ret.in_reply_to_user_id = UserID(apiTweet.InReplyToUserID)
|
||||||
|
ret.in_reply_to_user_handle = UserHandle(apiTweet.InReplyToScreenName)
|
||||||
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user