Add Notifications parser
This commit is contained in:
parent
27d8ab39f0
commit
eafdbf5ffb
@ -260,6 +260,37 @@ type APIUser struct {
|
||||
DoesntExist bool
|
||||
}
|
||||
|
||||
type APINotification struct {
|
||||
ID string `json:"id"`
|
||||
TimestampMs int64 `json:"timestampMs,string"`
|
||||
Message struct {
|
||||
Text string `json:"text"`
|
||||
Entities []struct {
|
||||
FromIndex int `json:"fromIndex"`
|
||||
ToIndex int `json:"toIndex"`
|
||||
Ref struct {
|
||||
User struct {
|
||||
ID int `json:"id,string"`
|
||||
} `json:"user"`
|
||||
} `json:"ref"`
|
||||
} `json:"entities"`
|
||||
} `json:"message"`
|
||||
Template struct {
|
||||
AggregateUserActionsV1 struct {
|
||||
TargetObjects []struct {
|
||||
Tweet struct {
|
||||
ID int `json:"id,string"`
|
||||
} `json:"tweet"`
|
||||
} `json:"targetObjects"`
|
||||
FromUsers []struct {
|
||||
User struct {
|
||||
ID int `json:"id,string"`
|
||||
} `json:"user"`
|
||||
} `json:"fromUsers"`
|
||||
} `json:"aggregateUserActionsV1"`
|
||||
} `json:"template"`
|
||||
}
|
||||
|
||||
type UserResponse struct {
|
||||
Data struct {
|
||||
User struct {
|
||||
@ -326,7 +357,15 @@ type Entry struct {
|
||||
Tweet struct {
|
||||
ID int64 `json:"id,string"`
|
||||
} `json:"tweet"`
|
||||
Notification struct {
|
||||
ID string `json:"id"`
|
||||
FromUsers Int64Slice `json:"fromUsers"`
|
||||
TargetTweets Int64Slice `json:"targetTweets"`
|
||||
} `json:"notification"`
|
||||
} `json:"content"`
|
||||
ClientEventInfo struct {
|
||||
Element string `json:"element"`
|
||||
} `json:"clientEventInfo"`
|
||||
} `json:"item"`
|
||||
Operation struct {
|
||||
Cursor struct {
|
||||
@ -350,6 +389,7 @@ type TweetResponse struct {
|
||||
GlobalObjects struct {
|
||||
Tweets map[string]APITweet `json:"tweets"`
|
||||
Users map[string]APIUser `json:"users"`
|
||||
Notifications map[string]APINotification `json:"notifications"`
|
||||
} `json:"globalObjects"`
|
||||
Timeline struct {
|
||||
Instructions []struct {
|
||||
@ -458,13 +498,15 @@ func (t *TweetResponse) HandleTombstones() []UserHandle {
|
||||
}
|
||||
|
||||
func (t *TweetResponse) GetCursor() string {
|
||||
entries := t.Timeline.Instructions[0].AddEntries.Entries
|
||||
if len(entries) > 0 {
|
||||
last_entry := entries[len(entries)-1]
|
||||
// TODO: is this function used anywhere other than Notifications?
|
||||
for _, instr := range t.Timeline.Instructions {
|
||||
if len(instr.AddEntries.Entries) > 0 {
|
||||
last_entry := instr.AddEntries.Entries[len(instr.AddEntries.Entries)-1]
|
||||
if strings.Contains(last_entry.EntryID, "cursor") {
|
||||
return last_entry.Content.Operation.Cursor.Value
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Next, try the other format ("replaceEntry")
|
||||
instructions := t.Timeline.Instructions
|
||||
@ -523,6 +565,10 @@ func (t *TweetResponse) ToTweetTrove() (TweetTrove, error) {
|
||||
}
|
||||
ret.Users[new_user.ID] = new_user
|
||||
}
|
||||
for _, n := range t.GlobalObjects.Notifications {
|
||||
new_notification := ParseSingleNotification(n)
|
||||
ret.Notifications[new_notification.ID] = new_notification
|
||||
}
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
|
@ -2,6 +2,9 @@ package scraper
|
||||
|
||||
import (
|
||||
"net/url"
|
||||
"strings"
|
||||
"regexp"
|
||||
"sort"
|
||||
)
|
||||
|
||||
func (api API) GetNotifications(cursor string) (TweetResponse, error) {
|
||||
@ -19,3 +22,103 @@ func (api API) GetNotifications(cursor string) (TweetResponse, error) {
|
||||
|
||||
return result, err
|
||||
}
|
||||
|
||||
func (t *TweetResponse) ToTweetTroveAsNotifications() (TweetTrove, error) {
|
||||
ret, err := t.ToTweetTrove()
|
||||
if err != nil {
|
||||
return TweetTrove{}, err
|
||||
}
|
||||
|
||||
// Find the "addEntries" instruction
|
||||
for _, instr := range t.Timeline.Instructions {
|
||||
sort.Sort(instr.AddEntries.Entries)
|
||||
for _, entry := range instr.AddEntries.Entries {
|
||||
id_re := regexp.MustCompile(`notification-([\w-]+)`)
|
||||
matches := id_re.FindStringSubmatch(entry.EntryID)
|
||||
if matches == nil || len(matches) == 1 {
|
||||
// Not a notification entry
|
||||
continue
|
||||
}
|
||||
notification_id := matches[1]
|
||||
notification, is_ok := ret.Notifications[NotificationID(notification_id)]
|
||||
if !is_ok {
|
||||
// Tweet entry (e.g., someone replied to you)
|
||||
notification = Notification{ID: NotificationID(notification_id)}
|
||||
}
|
||||
notification.SortIndex = entry.SortIndex
|
||||
if strings.Contains(entry.Content.Item.ClientEventInfo.Element, "replied") {
|
||||
notification.Type = 4
|
||||
} else if strings.Contains(entry.Content.Item.ClientEventInfo.Element, "recommended") {
|
||||
notification.Type = 11
|
||||
}
|
||||
if entry.Content.Item.Content.Tweet.ID != 0 {
|
||||
notification.ActionTweetID = TweetID(entry.Content.Item.Content.Tweet.ID)
|
||||
notification.ActionUserID = UserID(ret.Tweets[notification.ActionTweetID].UserID)
|
||||
}
|
||||
|
||||
if entry.Content.Item.Content.Notification.ID != "" {
|
||||
notification.UserIDs = []UserID{}
|
||||
for _, u_id := range entry.Content.Item.Content.Notification.FromUsers {
|
||||
notification.UserIDs = append(notification.UserIDs, UserID(u_id))
|
||||
notification.ActionUserID = UserID(u_id)
|
||||
}
|
||||
|
||||
notification.TweetIDs = []TweetID{}
|
||||
for _, t_id := range entry.Content.Item.Content.Notification.TargetTweets {
|
||||
notification.TweetIDs = append(notification.TweetIDs, TweetID(t_id))
|
||||
notification.ActionTweetID = TweetID(t_id)
|
||||
}
|
||||
}
|
||||
ret.Notifications[notification.ID] = notification
|
||||
}
|
||||
}
|
||||
return ret, err
|
||||
}
|
||||
|
||||
func ParseSingleNotification(n APINotification) Notification {
|
||||
ret := Notification{}
|
||||
ret.ID = NotificationID(n.ID)
|
||||
|
||||
for i := len(n.Message.Entities) - 1; i >= 0; i -= 1 {
|
||||
from := n.Message.Entities[i].FromIndex
|
||||
to := n.Message.Entities[i].ToIndex
|
||||
|
||||
runetext := []rune(n.Message.Text)
|
||||
|
||||
n.Message.Text = string(runetext[0:from]) + string(runetext[to:])
|
||||
}
|
||||
// t.Entities.ReplyMentions = strings.TrimSpace(string([]rune(t.FullText)[0:t.DisplayTextRange[0]]))
|
||||
|
||||
if strings.HasSuffix(n.Message.Text, "followed you") {
|
||||
ret.Type = 5
|
||||
} else if strings.Contains(n.Message.Text, "liked") {
|
||||
ret.Type = 1
|
||||
} else if strings.Contains(n.Message.Text, "reposted") {
|
||||
ret.Type = 2
|
||||
} else if strings.Contains(n.Message.Text, "There was a login to your account") {
|
||||
ret.Type = 9
|
||||
} else {
|
||||
// TODO: more types?
|
||||
}
|
||||
|
||||
ret.SentAt = TimestampFromUnixMilli(n.TimestampMs)
|
||||
// TODO: caller should set ret.UserID
|
||||
ret.UserIDs = []UserID{}
|
||||
for _, u := range n.Template.AggregateUserActionsV1.FromUsers {
|
||||
ret.UserIDs = append(ret.UserIDs, UserID(u.User.ID))
|
||||
}
|
||||
|
||||
target_objs := n.Template.AggregateUserActionsV1.TargetObjects
|
||||
if len(target_objs) > 0 {
|
||||
if strings.HasSuffix(n.Message.Text, "liked your repost") {
|
||||
// Retweet
|
||||
ret.ActionRetweetID = TweetID(target_objs[0].Tweet.ID)
|
||||
} else {
|
||||
// Normal tweet
|
||||
ret.ActionTweetID = TweetID(target_objs[0].Tweet.ID)
|
||||
ret.TweetIDs = []TweetID{TweetID(target_objs[0].Tweet.ID)}
|
||||
}
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
77
pkg/scraper/api_types_notifications_test.go
Normal file
77
pkg/scraper/api_types_notifications_test.go
Normal file
@ -0,0 +1,77 @@
|
||||
package scraper_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"encoding/json"
|
||||
"os"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||
)
|
||||
|
||||
func TestParseNotificationsPage(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
require := require.New(t)
|
||||
data, err := os.ReadFile("test_responses/notifications/notifications_response_first_page.json")
|
||||
require.NoError(err)
|
||||
|
||||
var resp TweetResponse
|
||||
err = json.Unmarshal(data, &resp)
|
||||
require.NoError(err)
|
||||
|
||||
tweet_trove, err := resp.ToTweetTroveAsNotifications()
|
||||
require.NoError(err)
|
||||
|
||||
notif1, is_ok := tweet_trove.Notifications["FKncQJGVgAQAAAABSQ3bEYsN6BFN3re-ZsU"]
|
||||
assert.True(is_ok)
|
||||
assert.Equal(9, notif1.Type) // login
|
||||
|
||||
notif2, is_ok := tweet_trove.Notifications["FKncQJGVgAQAAAABSQ3bEYsN6BFaOkNV8aw"]
|
||||
assert.True(is_ok)
|
||||
assert.Equal(2, notif2.Type) // retweet
|
||||
assert.Equal(UserID(1458284524761075714), notif2.ActionUserID)
|
||||
assert.Equal(TweetID(1824915465275392037), notif2.ActionTweetID)
|
||||
assert.Equal(TimestampFromUnixMilli(1723928739342), notif2.SentAt)
|
||||
|
||||
notif3, is_ok := tweet_trove.Notifications["FKncQJGVgAQAAAABSQ3bEYsN6BE-OY688aw"]
|
||||
assert.True(is_ok)
|
||||
assert.Equal(1, notif3.Type) // like
|
||||
assert.Equal(UserID(1458284524761075714), notif3.ActionUserID)
|
||||
assert.Equal(TweetID(1824915465275392037), notif3.ActionTweetID)
|
||||
|
||||
notif4, is_ok := tweet_trove.Notifications["FKncQJGVgAQAAAABSQ3bEYsN6BGLlh8UIQs"]
|
||||
assert.True(is_ok)
|
||||
assert.Equal(11, notif4.Type) // recommended
|
||||
|
||||
notif5, is_ok := tweet_trove.Notifications["FKncQJGVgAQAAAABSQ3bEYsN6BHS11EvITw"]
|
||||
assert.True(is_ok)
|
||||
assert.Equal(5, notif5.Type) // followed you
|
||||
assert.Equal(UserID(28815778), notif5.ActionUserID)
|
||||
|
||||
notif6, is_ok := tweet_trove.Notifications["FKncQJGVgAQAAAABSQ3bEYsN6BE5ujkCepo"]
|
||||
assert.True(is_ok)
|
||||
assert.Equal(1, notif6.Type)
|
||||
assert.Equal(UserID(1458284524761075714), notif6.ActionUserID)
|
||||
assert.Equal(TweetID(1826778617705115868), notif6.ActionTweetID)
|
||||
assert.Contains(notif6.UserIDs, UserID(1458284524761075714))
|
||||
assert.Contains(notif6.UserIDs, UserID(2694459866))
|
||||
|
||||
// Check users
|
||||
for _, u_id := range []UserID{1458284524761075714, 28815778} {
|
||||
_, is_ok := tweet_trove.Users[u_id]
|
||||
assert.True(is_ok)
|
||||
}
|
||||
|
||||
// Check tweets
|
||||
for _, t_id := range []TweetID{1824915465275392037, 1826778617705115868} {
|
||||
_, is_ok := tweet_trove.Tweets[t_id]
|
||||
assert.True(is_ok)
|
||||
}
|
||||
|
||||
// Test cursor-bottom
|
||||
bottom_cursor := resp.GetCursor()
|
||||
assert.Equal("DAACDAABCgABFKncQJGVgAQIAAIAAAABCAADSQ3bEQgABIsN6BEACwACAAAAC0FaRkxRSXFNLTJJAAA", bottom_cursor)
|
||||
}
|
18
pkg/scraper/notification.go
Normal file
18
pkg/scraper/notification.go
Normal file
@ -0,0 +1,18 @@
|
||||
package scraper
|
||||
|
||||
type NotificationID string
|
||||
|
||||
type Notification struct {
|
||||
ID NotificationID
|
||||
Type int
|
||||
SentAt Timestamp
|
||||
SortIndex int64
|
||||
UserID UserID // recipient of the notification
|
||||
|
||||
ActionUserID UserID
|
||||
ActionTweetID TweetID
|
||||
ActionRetweetID TweetID
|
||||
|
||||
TweetIDs []TweetID
|
||||
UserIDs []UserID
|
||||
}
|
File diff suppressed because one or more lines are too long
@ -14,6 +14,7 @@ type TweetTrove struct {
|
||||
Spaces map[SpaceID]Space
|
||||
Likes map[LikeSortID]Like
|
||||
Bookmarks map[BookmarkSortID]Bookmark
|
||||
Notifications map[NotificationID]Notification
|
||||
|
||||
TombstoneUsers []UserHandle
|
||||
|
||||
@ -30,6 +31,7 @@ func NewTweetTrove() TweetTrove {
|
||||
ret.Spaces = make(map[SpaceID]Space)
|
||||
ret.Likes = make(map[LikeSortID]Like)
|
||||
ret.Bookmarks = make(map[BookmarkSortID]Bookmark)
|
||||
ret.Notifications = make(map[NotificationID]Notification)
|
||||
ret.TombstoneUsers = []UserHandle{}
|
||||
ret.Rooms = make(map[DMChatRoomID]DMChatRoom)
|
||||
ret.Messages = make(map[DMMessageID]DMMessage)
|
||||
@ -70,6 +72,9 @@ func (t1 *TweetTrove) MergeWith(t2 TweetTrove) {
|
||||
for id, val := range t2.Bookmarks {
|
||||
t1.Bookmarks[id] = val
|
||||
}
|
||||
for id, val := range t2.Notifications {
|
||||
t1.Notifications[id] = val
|
||||
}
|
||||
|
||||
t1.TombstoneUsers = append(t1.TombstoneUsers, t2.TombstoneUsers...)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user