Add parsing of tombstones from api

This commit is contained in:
Alessio 2021-11-01 14:06:15 -07:00
parent 4fa30e246d
commit e8a9c10c89
2 changed files with 119 additions and 0 deletions

View File

@ -1,10 +1,12 @@
package scraper
import (
"fmt"
"time"
"strings"
"encoding/json"
"strconv"
"sort"
)
@ -104,6 +106,7 @@ type APITweet struct {
Media []APIExtendedMedia `json:"media"`
} `json:"extended_entities"`
InReplyToStatusID int64 `json:"in_reply_to_status_id_str,string"`
InReplyToUserID int64 `json:"in_reply_to_user_id_str,string"`
InReplyToScreenName string `json:"in_reply_to_screen_name"`
ReplyCount int `json:"reply_count"`
RetweetCount int `json:"retweet_count"`
@ -119,6 +122,7 @@ type APITweet struct {
Time time.Time `json:"time"`
UserID int64 `json:"user_id_str,string"`
Card APICard `json:"card"`
TombstoneText string
}
func (t *APITweet) NormalizeContent() {
@ -201,6 +205,39 @@ func (u UserResponse) ConvertToAPIUser() APIUser {
return ret
}
type Entry struct {
EntryID string `json:"entryId"`
SortIndex int64 `json:"sortIndex,string"`
Content struct {
Item struct {
Content struct {
Tombstone struct {
TombstoneInfo struct {
RichText struct {
Text string `json:"text"`
} `json:"richText"`
} `json:"tombstoneInfo"`
} `json:"tombstone"`
Tweet struct {
ID int64 `json:"id,string"`
} `json:"tweet"`
} `json:"content"`
} `json:"item"`
Operation struct {
Cursor struct {
Value string `json:"value"`
} `json:"cursor"`
} `json:"operation"`
} `json:"content"`
}
func (e Entry) GetTombstoneText() string {
return e.Content.Item.Content.Tombstone.TombstoneInfo.RichText.Text
}
type SortableEntries []Entry
func (e SortableEntries) Len() int { return len(e) }
func (e SortableEntries) Swap(i, j int) { e[i], e[j] = e[j], e[i] }
func (e SortableEntries) Less(i, j int) bool { return e[i].SortIndex > e[j].SortIndex }
type TweetResponse struct {
GlobalObjects struct {
Tweets map[string]APITweet `json:"tweets"`

View File

@ -128,3 +128,85 @@ func TestIsEndOfFeed(t *testing.T) {
}
}
}
func TestHandleTombstonesHidden(t *testing.T) {
data, err := ioutil.ReadFile("test_responses/tombstones/tombstone_hidden_1.json")
if err != nil {
panic(err)
}
var tweet_resp scraper.TweetResponse
err = json.Unmarshal(data, &tweet_resp)
if err != nil {
t.Fatalf(err.Error())
}
if len(tweet_resp.GlobalObjects.Tweets) != 2 {
t.Fatalf("Should have started with %d tweets, but had %d instead", 2, len(tweet_resp.GlobalObjects.Tweets))
}
tweet_resp.HandleTombstones()
if len(tweet_resp.GlobalObjects.Tweets) != 4 {
t.Errorf("Should have ended up with %d tweets, but had %d instead", 4, len(tweet_resp.GlobalObjects.Tweets))
}
first_tombstone, ok := tweet_resp.GlobalObjects.Tweets["1454522147750260742"]
if !ok {
t.Errorf("Missing tombstoned tweet for %s", "1454522147750260742")
}
if first_tombstone.ID != 1454522147750260742 {
t.Errorf("Expected ID %d, got %d instead", 1454522147750260742, first_tombstone.ID)
}
if first_tombstone.UserID != 1365863538393309184 {
t.Errorf("Expected UserID %d, got %d instead", 1365863538393309184, first_tombstone.UserID)
}
if first_tombstone.TombstoneText != "hidden" {
t.Errorf("Wrong tombstone text: %s", first_tombstone.TombstoneText)
}
second_tombstone, ok := tweet_resp.GlobalObjects.Tweets["1454515503242829830"]
if !ok {
t.Errorf("Missing tombstoned tweet for %s", "1454515503242829830")
}
if second_tombstone.ID != 1454515503242829830 {
t.Errorf("Expected ID %d, got %d instead", 1454515503242829830, second_tombstone.ID)
}
if second_tombstone.UserID != 1365863538393309184 {
t.Errorf("Expected UserID %d, got %d instead", 1365863538393309184, second_tombstone.UserID)
}
if second_tombstone.TombstoneText != "hidden" {
t.Errorf("Wrong tombstone text: %s", second_tombstone.TombstoneText)
}
}
func TestHandleTombstonesDeleted(t *testing.T) {
data, err := ioutil.ReadFile("test_responses/tombstones/tombstone_deleted.json")
if err != nil {
panic(err)
}
var tweet_resp scraper.TweetResponse
err = json.Unmarshal(data, &tweet_resp)
if err != nil {
t.Fatalf(err.Error())
}
if len(tweet_resp.GlobalObjects.Tweets) != 1 {
t.Fatalf("Should have started with %d tweets, but had %d instead", 1, len(tweet_resp.GlobalObjects.Tweets))
}
tweet_resp.HandleTombstones()
if len(tweet_resp.GlobalObjects.Tweets) != 2 {
t.Errorf("Should have ended up with %d tweets, but had %d instead", 2, len(tweet_resp.GlobalObjects.Tweets))
}
tombstone, ok := tweet_resp.GlobalObjects.Tweets["1454521654781136902"]
if !ok {
t.Errorf("Missing tombstoned tweet for %s", "1454521654781136902")
}
if tombstone.ID != 1454521654781136902 {
t.Errorf("Expected ID %d, got %d instead", 1454521654781136902, tombstone.ID)
}
if tombstone.UserID != 1218687933391298560 {
t.Errorf("Expected UserID %d, got %d instead", 1218687933391298560, tombstone.UserID)
}
if tombstone.TombstoneText != "deleted" {
t.Errorf("Wrong tombstone text: %s", tombstone.TombstoneText)
}
}