From 0293f11b9be94063c5a1afd8cef38f8fcbab264f Mon Sep 17 00:00:00 2001 From: Alessio Date: Thu, 24 Nov 2022 18:57:42 -0500 Subject: [PATCH] Fix parsing of empty Space responses --- scraper/api_types_v2.go | 5 +++++ scraper/api_types_v2_test.go | 15 +++++++++++++++ scraper/tweet.go | 11 +++++++++++ 3 files changed, 31 insertions(+) diff --git a/scraper/api_types_v2.go b/scraper/api_types_v2.go index 2bb3ff4..0cee0be 100644 --- a/scraper/api_types_v2.go +++ b/scraper/api_types_v2.go @@ -538,6 +538,11 @@ func (r SpaceResponse) ToTweetTrove() TweetTrove { ret := NewTweetTrove() space := Space{} space.ID = SpaceID(data.Metadata.RestId) + if space.ID == "" { + // The response is empty. Abort processing + return ret + } + space.Title = data.Metadata.Title space.State = data.Metadata.State space.CreatedAt = TimestampFromUnix(data.Metadata.CreatedAt) diff --git a/scraper/api_types_v2_test.go b/scraper/api_types_v2_test.go index 98697c7..fd67719 100644 --- a/scraper/api_types_v2_test.go +++ b/scraper/api_types_v2_test.go @@ -502,6 +502,21 @@ func TestParseSpaceResponse(t *testing.T) { assert.Equal(847, user.FollowersCount) } +func TestParseEmptySpaceResponse(t *testing.T) { + require := require.New(t) + data, err := os.ReadFile("test_responses/tweet_content/space_object_empty.json") + if err != nil { + panic(err) + } + + var response SpaceResponse + err = json.Unmarshal(data, &response) + require.NoError(err) + + trove := response.ToTweetTrove() + require.Len(trove.Spaces, 0) +} + func TestParseAPIV2UserFeed(t *testing.T) { data, err := os.ReadFile("test_responses/api_v2/user_feed_apiv2.json") if err != nil { diff --git a/scraper/tweet.go b/scraper/tweet.go index 8880c42..bc239b9 100644 --- a/scraper/tweet.go +++ b/scraper/tweet.go @@ -4,6 +4,7 @@ import ( "fmt" "strings" "time" + log "github.com/sirupsen/logrus" "offline_twitter/terminal_utils" ) @@ -33,7 +34,12 @@ type Tweet struct { Hashtags []string Urls []Url Polls []Poll + + // TODO get-rid-of-spaces: Might be good to get rid of `Spaces`. Only used in APIv1 I think. + // A first-step would be to delete the Spaces after pulling them out of a Tweet into the Trove + // in ParseTweetResponse. Then they will only be getting saved once rather than twice. Spaces []Space + SpaceID SpaceID TombstoneType string IsStub bool @@ -185,6 +191,7 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) { if apiTweet.Card.Name == "3691233323:audiospace" { space := ParseAPISpace(apiTweet.Card) ret.Spaces = []Space{space} + ret.SpaceID = space.ID } // Process tombstones and other metadata @@ -258,6 +265,7 @@ func GetTweetFull(id TweetID) (trove TweetTrove, err error) { trove.TombstoneUsers = tombstoned_users // Quoted tombstones need their user_id filled out from the tombstoned_users list + log.Debug("Running tweet trove post-processing\n") err = trove.PostProcess() if err != nil { err = fmt.Errorf("Error getting tweet (id %d):\n %w", id, err) @@ -294,6 +302,9 @@ func ParseTweetResponse(resp TweetResponse) (TweetTrove, error) { return trove, err } trove.Tweets[new_tweet.ID] = new_tweet + for _, space := range new_tweet.Spaces { + trove.Spaces[space.ID] = space + } } else { new_retweet, err := ParseSingleRetweet(single_tweet) if err != nil {