Add parsing of tombstones from api

2021-11-01 14:06:15 -07:00 · 2021-11-01 14:06:15 -07:00 · e8a9c10c89
commit e8a9c10c89
parent 4fa30e246d
2 changed files with 119 additions and 0 deletions
--- a/scraper/api_types.go
+++ b/scraper/api_types.go
@ -1,10 +1,12 @@
 package scraper

 import (
+	"fmt"
 	"time"
 	"strings"
 	"encoding/json"
 	"strconv"
+	"sort"
 )


@ -104,6 +106,7 @@ type APITweet struct {
 		Media []APIExtendedMedia `json:"media"`
 	} `json:"extended_entities"`
 	InReplyToStatusID     int64     `json:"in_reply_to_status_id_str,string"`
+	InReplyToUserID       int64     `json:"in_reply_to_user_id_str,string"`
 	InReplyToScreenName   string    `json:"in_reply_to_screen_name"`
 	ReplyCount            int       `json:"reply_count"`
 	RetweetCount          int       `json:"retweet_count"`
@ -119,6 +122,7 @@ type APITweet struct {
 	Time                  time.Time `json:"time"`
 	UserID                int64     `json:"user_id_str,string"`
 	Card                  APICard   `json:"card"`
+	TombstoneText         string
 }

 func (t *APITweet) NormalizeContent() {
@ -201,6 +205,39 @@ func (u UserResponse) ConvertToAPIUser() APIUser {
 	return ret
 }

+type Entry struct {
+	EntryID string `json:"entryId"`
+	SortIndex int64 `json:"sortIndex,string"`
+	Content struct {
+		Item struct {
+			Content struct {
+				Tombstone struct {
+					TombstoneInfo struct {
+						RichText struct {
+							Text string `json:"text"`
+						} `json:"richText"`
+					} `json:"tombstoneInfo"`
+				} `json:"tombstone"`
+				Tweet struct {
+					ID int64 `json:"id,string"`
+				} `json:"tweet"`
+			} `json:"content"`
+		} `json:"item"`
+		Operation struct {
+			Cursor struct {
+				Value string `json:"value"`
+			} `json:"cursor"`
+		} `json:"operation"`
+	} `json:"content"`
+}
+func (e Entry) GetTombstoneText() string {
+	return e.Content.Item.Content.Tombstone.TombstoneInfo.RichText.Text
+}
+type SortableEntries []Entry
+func (e SortableEntries) Len() int { return len(e) }
+func (e SortableEntries) Swap(i, j int) { e[i], e[j] = e[j], e[i] }
+func (e SortableEntries) Less(i, j int) bool { return e[i].SortIndex > e[j].SortIndex }
+
 type TweetResponse struct {
 	GlobalObjects struct {
 		Tweets map[string]APITweet `json:"tweets"`
--- a/scraper/api_types_test.go
+++ b/scraper/api_types_test.go
@ -128,3 +128,85 @@ func TestIsEndOfFeed(t *testing.T) {
 		}
 	}
 }
+
+
+func TestHandleTombstonesHidden(t *testing.T) {
+	data, err := ioutil.ReadFile("test_responses/tombstones/tombstone_hidden_1.json")
+	if err != nil {
+		panic(err)
+	}
+	var tweet_resp scraper.TweetResponse
+	err = json.Unmarshal(data, &tweet_resp)
+	if err != nil {
+		t.Fatalf(err.Error())
+	}
+	if len(tweet_resp.GlobalObjects.Tweets) != 2 {
+		t.Fatalf("Should have started with %d tweets, but had %d instead", 2, len(tweet_resp.GlobalObjects.Tweets))
+	}
+	tweet_resp.HandleTombstones()
+	if len(tweet_resp.GlobalObjects.Tweets) != 4 {
+		t.Errorf("Should have ended up with %d tweets, but had %d instead", 4, len(tweet_resp.GlobalObjects.Tweets))
+	}
+
+	first_tombstone, ok := tweet_resp.GlobalObjects.Tweets["1454522147750260742"]
+	if !ok {
+		t.Errorf("Missing tombstoned tweet for %s", "1454522147750260742")
+	}
+	if first_tombstone.ID != 1454522147750260742 {
+		t.Errorf("Expected ID %d, got %d instead", 1454522147750260742, first_tombstone.ID)
+	}
+	if first_tombstone.UserID != 1365863538393309184 {
+		t.Errorf("Expected UserID %d, got %d instead", 1365863538393309184, first_tombstone.UserID)
+	}
+	if first_tombstone.TombstoneText != "hidden" {
+		t.Errorf("Wrong tombstone text: %s", first_tombstone.TombstoneText)
+	}
+
+	second_tombstone, ok := tweet_resp.GlobalObjects.Tweets["1454515503242829830"]
+	if !ok {
+		t.Errorf("Missing tombstoned tweet for %s", "1454515503242829830")
+	}
+	if second_tombstone.ID != 1454515503242829830 {
+		t.Errorf("Expected ID %d, got %d instead", 1454515503242829830, second_tombstone.ID)
+	}
+	if second_tombstone.UserID != 1365863538393309184 {
+		t.Errorf("Expected UserID %d, got %d instead", 1365863538393309184, second_tombstone.UserID)
+	}
+	if second_tombstone.TombstoneText != "hidden" {
+		t.Errorf("Wrong tombstone text: %s", second_tombstone.TombstoneText)
+	}
+}
+
+func TestHandleTombstonesDeleted(t *testing.T) {
+	data, err := ioutil.ReadFile("test_responses/tombstones/tombstone_deleted.json")
+	if err != nil {
+		panic(err)
+	}
+	var tweet_resp scraper.TweetResponse
+	err = json.Unmarshal(data, &tweet_resp)
+	if err != nil {
+		t.Fatalf(err.Error())
+	}
+
+	if len(tweet_resp.GlobalObjects.Tweets) != 1 {
+		t.Fatalf("Should have started with %d tweets, but had %d instead", 1, len(tweet_resp.GlobalObjects.Tweets))
+	}
+	tweet_resp.HandleTombstones()
+	if len(tweet_resp.GlobalObjects.Tweets) != 2 {
+		t.Errorf("Should have ended up with %d tweets, but had %d instead", 2, len(tweet_resp.GlobalObjects.Tweets))
+	}
+
+	tombstone, ok := tweet_resp.GlobalObjects.Tweets["1454521654781136902"]
+	if !ok {
+		t.Errorf("Missing tombstoned tweet for %s", "1454521654781136902")
+	}
+	if tombstone.ID != 1454521654781136902 {
+		t.Errorf("Expected ID %d, got %d instead", 1454521654781136902, tombstone.ID)
+	}
+	if tombstone.UserID != 1218687933391298560 {
+		t.Errorf("Expected UserID %d, got %d instead", 1218687933391298560, tombstone.UserID)
+	}
+	if tombstone.TombstoneText != "deleted" {
+		t.Errorf("Wrong tombstone text: %s", tombstone.TombstoneText)
+	}
+}