Add Notifications parser

This commit is contained in:
Alessio 2024-08-25 16:27:49 -07:00
parent 27d8ab39f0
commit eafdbf5ffb
6 changed files with 263 additions and 13 deletions

View File

@ -260,6 +260,37 @@ type APIUser struct {
DoesntExist bool DoesntExist bool
} }
type APINotification struct {
ID string `json:"id"`
TimestampMs int64 `json:"timestampMs,string"`
Message struct {
Text string `json:"text"`
Entities []struct {
FromIndex int `json:"fromIndex"`
ToIndex int `json:"toIndex"`
Ref struct {
User struct {
ID int `json:"id,string"`
} `json:"user"`
} `json:"ref"`
} `json:"entities"`
} `json:"message"`
Template struct {
AggregateUserActionsV1 struct {
TargetObjects []struct {
Tweet struct {
ID int `json:"id,string"`
} `json:"tweet"`
} `json:"targetObjects"`
FromUsers []struct {
User struct {
ID int `json:"id,string"`
} `json:"user"`
} `json:"fromUsers"`
} `json:"aggregateUserActionsV1"`
} `json:"template"`
}
type UserResponse struct { type UserResponse struct {
Data struct { Data struct {
User struct { User struct {
@ -326,7 +357,15 @@ type Entry struct {
Tweet struct { Tweet struct {
ID int64 `json:"id,string"` ID int64 `json:"id,string"`
} `json:"tweet"` } `json:"tweet"`
Notification struct {
ID string `json:"id"`
FromUsers Int64Slice `json:"fromUsers"`
TargetTweets Int64Slice `json:"targetTweets"`
} `json:"notification"`
} `json:"content"` } `json:"content"`
ClientEventInfo struct {
Element string `json:"element"`
} `json:"clientEventInfo"`
} `json:"item"` } `json:"item"`
Operation struct { Operation struct {
Cursor struct { Cursor struct {
@ -348,8 +387,9 @@ func (e SortableEntries) Less(i, j int) bool { return e[i].SortIndex > e[j].Sort
type TweetResponse struct { type TweetResponse struct {
GlobalObjects struct { GlobalObjects struct {
Tweets map[string]APITweet `json:"tweets"` Tweets map[string]APITweet `json:"tweets"`
Users map[string]APIUser `json:"users"` Users map[string]APIUser `json:"users"`
Notifications map[string]APINotification `json:"notifications"`
} `json:"globalObjects"` } `json:"globalObjects"`
Timeline struct { Timeline struct {
Instructions []struct { Instructions []struct {
@ -458,11 +498,13 @@ func (t *TweetResponse) HandleTombstones() []UserHandle {
} }
func (t *TweetResponse) GetCursor() string { func (t *TweetResponse) GetCursor() string {
entries := t.Timeline.Instructions[0].AddEntries.Entries // TODO: is this function used anywhere other than Notifications?
if len(entries) > 0 { for _, instr := range t.Timeline.Instructions {
last_entry := entries[len(entries)-1] if len(instr.AddEntries.Entries) > 0 {
if strings.Contains(last_entry.EntryID, "cursor") { last_entry := instr.AddEntries.Entries[len(instr.AddEntries.Entries)-1]
return last_entry.Content.Operation.Cursor.Value if strings.Contains(last_entry.EntryID, "cursor") {
return last_entry.Content.Operation.Cursor.Value
}
} }
} }
@ -523,6 +565,10 @@ func (t *TweetResponse) ToTweetTrove() (TweetTrove, error) {
} }
ret.Users[new_user.ID] = new_user ret.Users[new_user.ID] = new_user
} }
for _, n := range t.GlobalObjects.Notifications {
new_notification := ParseSingleNotification(n)
ret.Notifications[new_notification.ID] = new_notification
}
return ret, nil return ret, nil
} }

View File

@ -2,6 +2,9 @@ package scraper
import ( import (
"net/url" "net/url"
"strings"
"regexp"
"sort"
) )
func (api API) GetNotifications(cursor string) (TweetResponse, error) { func (api API) GetNotifications(cursor string) (TweetResponse, error) {
@ -19,3 +22,103 @@ func (api API) GetNotifications(cursor string) (TweetResponse, error) {
return result, err return result, err
} }
func (t *TweetResponse) ToTweetTroveAsNotifications() (TweetTrove, error) {
ret, err := t.ToTweetTrove()
if err != nil {
return TweetTrove{}, err
}
// Find the "addEntries" instruction
for _, instr := range t.Timeline.Instructions {
sort.Sort(instr.AddEntries.Entries)
for _, entry := range instr.AddEntries.Entries {
id_re := regexp.MustCompile(`notification-([\w-]+)`)
matches := id_re.FindStringSubmatch(entry.EntryID)
if matches == nil || len(matches) == 1 {
// Not a notification entry
continue
}
notification_id := matches[1]
notification, is_ok := ret.Notifications[NotificationID(notification_id)]
if !is_ok {
// Tweet entry (e.g., someone replied to you)
notification = Notification{ID: NotificationID(notification_id)}
}
notification.SortIndex = entry.SortIndex
if strings.Contains(entry.Content.Item.ClientEventInfo.Element, "replied") {
notification.Type = 4
} else if strings.Contains(entry.Content.Item.ClientEventInfo.Element, "recommended") {
notification.Type = 11
}
if entry.Content.Item.Content.Tweet.ID != 0 {
notification.ActionTweetID = TweetID(entry.Content.Item.Content.Tweet.ID)
notification.ActionUserID = UserID(ret.Tweets[notification.ActionTweetID].UserID)
}
if entry.Content.Item.Content.Notification.ID != "" {
notification.UserIDs = []UserID{}
for _, u_id := range entry.Content.Item.Content.Notification.FromUsers {
notification.UserIDs = append(notification.UserIDs, UserID(u_id))
notification.ActionUserID = UserID(u_id)
}
notification.TweetIDs = []TweetID{}
for _, t_id := range entry.Content.Item.Content.Notification.TargetTweets {
notification.TweetIDs = append(notification.TweetIDs, TweetID(t_id))
notification.ActionTweetID = TweetID(t_id)
}
}
ret.Notifications[notification.ID] = notification
}
}
return ret, err
}
func ParseSingleNotification(n APINotification) Notification {
ret := Notification{}
ret.ID = NotificationID(n.ID)
for i := len(n.Message.Entities) - 1; i >= 0; i -= 1 {
from := n.Message.Entities[i].FromIndex
to := n.Message.Entities[i].ToIndex
runetext := []rune(n.Message.Text)
n.Message.Text = string(runetext[0:from]) + string(runetext[to:])
}
// t.Entities.ReplyMentions = strings.TrimSpace(string([]rune(t.FullText)[0:t.DisplayTextRange[0]]))
if strings.HasSuffix(n.Message.Text, "followed you") {
ret.Type = 5
} else if strings.Contains(n.Message.Text, "liked") {
ret.Type = 1
} else if strings.Contains(n.Message.Text, "reposted") {
ret.Type = 2
} else if strings.Contains(n.Message.Text, "There was a login to your account") {
ret.Type = 9
} else {
// TODO: more types?
}
ret.SentAt = TimestampFromUnixMilli(n.TimestampMs)
// TODO: caller should set ret.UserID
ret.UserIDs = []UserID{}
for _, u := range n.Template.AggregateUserActionsV1.FromUsers {
ret.UserIDs = append(ret.UserIDs, UserID(u.User.ID))
}
target_objs := n.Template.AggregateUserActionsV1.TargetObjects
if len(target_objs) > 0 {
if strings.HasSuffix(n.Message.Text, "liked your repost") {
// Retweet
ret.ActionRetweetID = TweetID(target_objs[0].Tweet.ID)
} else {
// Normal tweet
ret.ActionTweetID = TweetID(target_objs[0].Tweet.ID)
ret.TweetIDs = []TweetID{TweetID(target_objs[0].Tweet.ID)}
}
}
return ret
}

View File

@ -0,0 +1,77 @@
package scraper_test
import (
"testing"
"encoding/json"
"os"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
)
func TestParseNotificationsPage(t *testing.T) {
assert := assert.New(t)
require := require.New(t)
data, err := os.ReadFile("test_responses/notifications/notifications_response_first_page.json")
require.NoError(err)
var resp TweetResponse
err = json.Unmarshal(data, &resp)
require.NoError(err)
tweet_trove, err := resp.ToTweetTroveAsNotifications()
require.NoError(err)
notif1, is_ok := tweet_trove.Notifications["FKncQJGVgAQAAAABSQ3bEYsN6BFN3re-ZsU"]
assert.True(is_ok)
assert.Equal(9, notif1.Type) // login
notif2, is_ok := tweet_trove.Notifications["FKncQJGVgAQAAAABSQ3bEYsN6BFaOkNV8aw"]
assert.True(is_ok)
assert.Equal(2, notif2.Type) // retweet
assert.Equal(UserID(1458284524761075714), notif2.ActionUserID)
assert.Equal(TweetID(1824915465275392037), notif2.ActionTweetID)
assert.Equal(TimestampFromUnixMilli(1723928739342), notif2.SentAt)
notif3, is_ok := tweet_trove.Notifications["FKncQJGVgAQAAAABSQ3bEYsN6BE-OY688aw"]
assert.True(is_ok)
assert.Equal(1, notif3.Type) // like
assert.Equal(UserID(1458284524761075714), notif3.ActionUserID)
assert.Equal(TweetID(1824915465275392037), notif3.ActionTweetID)
notif4, is_ok := tweet_trove.Notifications["FKncQJGVgAQAAAABSQ3bEYsN6BGLlh8UIQs"]
assert.True(is_ok)
assert.Equal(11, notif4.Type) // recommended
notif5, is_ok := tweet_trove.Notifications["FKncQJGVgAQAAAABSQ3bEYsN6BHS11EvITw"]
assert.True(is_ok)
assert.Equal(5, notif5.Type) // followed you
assert.Equal(UserID(28815778), notif5.ActionUserID)
notif6, is_ok := tweet_trove.Notifications["FKncQJGVgAQAAAABSQ3bEYsN6BE5ujkCepo"]
assert.True(is_ok)
assert.Equal(1, notif6.Type)
assert.Equal(UserID(1458284524761075714), notif6.ActionUserID)
assert.Equal(TweetID(1826778617705115868), notif6.ActionTweetID)
assert.Contains(notif6.UserIDs, UserID(1458284524761075714))
assert.Contains(notif6.UserIDs, UserID(2694459866))
// Check users
for _, u_id := range []UserID{1458284524761075714, 28815778} {
_, is_ok := tweet_trove.Users[u_id]
assert.True(is_ok)
}
// Check tweets
for _, t_id := range []TweetID{1824915465275392037, 1826778617705115868} {
_, is_ok := tweet_trove.Tweets[t_id]
assert.True(is_ok)
}
// Test cursor-bottom
bottom_cursor := resp.GetCursor()
assert.Equal("DAACDAABCgABFKncQJGVgAQIAAIAAAABCAADSQ3bEQgABIsN6BEACwACAAAAC0FaRkxRSXFNLTJJAAA", bottom_cursor)
}

View File

@ -0,0 +1,18 @@
package scraper
type NotificationID string
type Notification struct {
ID NotificationID
Type int
SentAt Timestamp
SortIndex int64
UserID UserID // recipient of the notification
ActionUserID UserID
ActionTweetID TweetID
ActionRetweetID TweetID
TweetIDs []TweetID
UserIDs []UserID
}

File diff suppressed because one or more lines are too long

View File

@ -8,12 +8,13 @@ import (
) )
type TweetTrove struct { type TweetTrove struct {
Tweets map[TweetID]Tweet Tweets map[TweetID]Tweet
Users map[UserID]User Users map[UserID]User
Retweets map[TweetID]Retweet Retweets map[TweetID]Retweet
Spaces map[SpaceID]Space Spaces map[SpaceID]Space
Likes map[LikeSortID]Like Likes map[LikeSortID]Like
Bookmarks map[BookmarkSortID]Bookmark Bookmarks map[BookmarkSortID]Bookmark
Notifications map[NotificationID]Notification
TombstoneUsers []UserHandle TombstoneUsers []UserHandle
@ -30,6 +31,7 @@ func NewTweetTrove() TweetTrove {
ret.Spaces = make(map[SpaceID]Space) ret.Spaces = make(map[SpaceID]Space)
ret.Likes = make(map[LikeSortID]Like) ret.Likes = make(map[LikeSortID]Like)
ret.Bookmarks = make(map[BookmarkSortID]Bookmark) ret.Bookmarks = make(map[BookmarkSortID]Bookmark)
ret.Notifications = make(map[NotificationID]Notification)
ret.TombstoneUsers = []UserHandle{} ret.TombstoneUsers = []UserHandle{}
ret.Rooms = make(map[DMChatRoomID]DMChatRoom) ret.Rooms = make(map[DMChatRoomID]DMChatRoom)
ret.Messages = make(map[DMMessageID]DMMessage) ret.Messages = make(map[DMMessageID]DMMessage)
@ -70,6 +72,9 @@ func (t1 *TweetTrove) MergeWith(t2 TweetTrove) {
for id, val := range t2.Bookmarks { for id, val := range t2.Bookmarks {
t1.Bookmarks[id] = val t1.Bookmarks[id] = val
} }
for id, val := range t2.Notifications {
t1.Notifications[id] = val
}
t1.TombstoneUsers = append(t1.TombstoneUsers, t2.TombstoneUsers...) t1.TombstoneUsers = append(t1.TombstoneUsers, t2.TombstoneUsers...)