From e8a9c10c89631b725e4ecd9bc35c02e3c379067d Mon Sep 17 00:00:00 2001 From: Alessio Date: Mon, 1 Nov 2021 14:06:15 -0700 Subject: [PATCH] Add parsing of tombstones from api --- scraper/api_types.go | 37 ++++++++++++++++++ scraper/api_types_test.go | 82 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 119 insertions(+) diff --git a/scraper/api_types.go b/scraper/api_types.go index dda8726..8d4ecc8 100644 --- a/scraper/api_types.go +++ b/scraper/api_types.go @@ -1,10 +1,12 @@ package scraper import ( + "fmt" "time" "strings" "encoding/json" "strconv" + "sort" ) @@ -104,6 +106,7 @@ type APITweet struct { Media []APIExtendedMedia `json:"media"` } `json:"extended_entities"` InReplyToStatusID int64 `json:"in_reply_to_status_id_str,string"` + InReplyToUserID int64 `json:"in_reply_to_user_id_str,string"` InReplyToScreenName string `json:"in_reply_to_screen_name"` ReplyCount int `json:"reply_count"` RetweetCount int `json:"retweet_count"` @@ -119,6 +122,7 @@ type APITweet struct { Time time.Time `json:"time"` UserID int64 `json:"user_id_str,string"` Card APICard `json:"card"` + TombstoneText string } func (t *APITweet) NormalizeContent() { @@ -201,6 +205,39 @@ func (u UserResponse) ConvertToAPIUser() APIUser { return ret } +type Entry struct { + EntryID string `json:"entryId"` + SortIndex int64 `json:"sortIndex,string"` + Content struct { + Item struct { + Content struct { + Tombstone struct { + TombstoneInfo struct { + RichText struct { + Text string `json:"text"` + } `json:"richText"` + } `json:"tombstoneInfo"` + } `json:"tombstone"` + Tweet struct { + ID int64 `json:"id,string"` + } `json:"tweet"` + } `json:"content"` + } `json:"item"` + Operation struct { + Cursor struct { + Value string `json:"value"` + } `json:"cursor"` + } `json:"operation"` + } `json:"content"` +} +func (e Entry) GetTombstoneText() string { + return e.Content.Item.Content.Tombstone.TombstoneInfo.RichText.Text +} +type SortableEntries []Entry +func (e SortableEntries) Len() int { return len(e) } +func (e SortableEntries) Swap(i, j int) { e[i], e[j] = e[j], e[i] } +func (e SortableEntries) Less(i, j int) bool { return e[i].SortIndex > e[j].SortIndex } + type TweetResponse struct { GlobalObjects struct { Tweets map[string]APITweet `json:"tweets"` diff --git a/scraper/api_types_test.go b/scraper/api_types_test.go index bbb1c79..474aeb1 100644 --- a/scraper/api_types_test.go +++ b/scraper/api_types_test.go @@ -128,3 +128,85 @@ func TestIsEndOfFeed(t *testing.T) { } } } + + +func TestHandleTombstonesHidden(t *testing.T) { + data, err := ioutil.ReadFile("test_responses/tombstones/tombstone_hidden_1.json") + if err != nil { + panic(err) + } + var tweet_resp scraper.TweetResponse + err = json.Unmarshal(data, &tweet_resp) + if err != nil { + t.Fatalf(err.Error()) + } + if len(tweet_resp.GlobalObjects.Tweets) != 2 { + t.Fatalf("Should have started with %d tweets, but had %d instead", 2, len(tweet_resp.GlobalObjects.Tweets)) + } + tweet_resp.HandleTombstones() + if len(tweet_resp.GlobalObjects.Tweets) != 4 { + t.Errorf("Should have ended up with %d tweets, but had %d instead", 4, len(tweet_resp.GlobalObjects.Tweets)) + } + + first_tombstone, ok := tweet_resp.GlobalObjects.Tweets["1454522147750260742"] + if !ok { + t.Errorf("Missing tombstoned tweet for %s", "1454522147750260742") + } + if first_tombstone.ID != 1454522147750260742 { + t.Errorf("Expected ID %d, got %d instead", 1454522147750260742, first_tombstone.ID) + } + if first_tombstone.UserID != 1365863538393309184 { + t.Errorf("Expected UserID %d, got %d instead", 1365863538393309184, first_tombstone.UserID) + } + if first_tombstone.TombstoneText != "hidden" { + t.Errorf("Wrong tombstone text: %s", first_tombstone.TombstoneText) + } + + second_tombstone, ok := tweet_resp.GlobalObjects.Tweets["1454515503242829830"] + if !ok { + t.Errorf("Missing tombstoned tweet for %s", "1454515503242829830") + } + if second_tombstone.ID != 1454515503242829830 { + t.Errorf("Expected ID %d, got %d instead", 1454515503242829830, second_tombstone.ID) + } + if second_tombstone.UserID != 1365863538393309184 { + t.Errorf("Expected UserID %d, got %d instead", 1365863538393309184, second_tombstone.UserID) + } + if second_tombstone.TombstoneText != "hidden" { + t.Errorf("Wrong tombstone text: %s", second_tombstone.TombstoneText) + } +} + +func TestHandleTombstonesDeleted(t *testing.T) { + data, err := ioutil.ReadFile("test_responses/tombstones/tombstone_deleted.json") + if err != nil { + panic(err) + } + var tweet_resp scraper.TweetResponse + err = json.Unmarshal(data, &tweet_resp) + if err != nil { + t.Fatalf(err.Error()) + } + + if len(tweet_resp.GlobalObjects.Tweets) != 1 { + t.Fatalf("Should have started with %d tweets, but had %d instead", 1, len(tweet_resp.GlobalObjects.Tweets)) + } + tweet_resp.HandleTombstones() + if len(tweet_resp.GlobalObjects.Tweets) != 2 { + t.Errorf("Should have ended up with %d tweets, but had %d instead", 2, len(tweet_resp.GlobalObjects.Tweets)) + } + + tombstone, ok := tweet_resp.GlobalObjects.Tweets["1454521654781136902"] + if !ok { + t.Errorf("Missing tombstoned tweet for %s", "1454521654781136902") + } + if tombstone.ID != 1454521654781136902 { + t.Errorf("Expected ID %d, got %d instead", 1454521654781136902, tombstone.ID) + } + if tombstone.UserID != 1218687933391298560 { + t.Errorf("Expected UserID %d, got %d instead", 1218687933391298560, tombstone.UserID) + } + if tombstone.TombstoneText != "deleted" { + t.Errorf("Wrong tombstone text: %s", tombstone.TombstoneText) + } +}