Add scraping notiication detail
This commit is contained in:
parent
dc816c6f28
commit
0c620621a6
@ -603,6 +603,12 @@ func get_notifications(how_many int) {
|
|||||||
if err != nil && !errors.Is(err, scraper.END_OF_FEED) {
|
if err != nil && !errors.Is(err, scraper.END_OF_FEED) {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
|
to_scrape := profile.CheckNotificationScrapesNeeded(trove)
|
||||||
|
trove, err = api.GetNotificationDetailForAll(trove, to_scrape)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
profile.SaveTweetTrove(trove, true, &api)
|
profile.SaveTweetTrove(trove, true, &api)
|
||||||
happy_exit(fmt.Sprintf("Saved %d notifications, %d tweets and %d users",
|
happy_exit(fmt.Sprintf("Saved %d notifications, %d tweets and %d users",
|
||||||
len(trove.Notifications), len(trove.Tweets), len(trove.Users),
|
len(trove.Notifications), len(trove.Tweets), len(trove.Users),
|
||||||
|
@ -1,6 +1,9 @@
|
|||||||
package persistence
|
package persistence
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"database/sql"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -12,16 +15,20 @@ func (p Profile) SaveNotification(n Notification) {
|
|||||||
|
|
||||||
// Save the Notification
|
// Save the Notification
|
||||||
_, err = tx.NamedExec(`
|
_, err = tx.NamedExec(`
|
||||||
insert into notifications(id, type, sent_at, sort_index, user_id, action_user_id, action_tweet_id, action_retweet_id)
|
insert into notifications(id, type, sent_at, sort_index, user_id, action_user_id, action_tweet_id, action_retweet_id,
|
||||||
|
has_detail, last_scraped_at)
|
||||||
values (:id, :type, :sent_at, :sort_index, :user_id, nullif(:action_user_id, 0), nullif(:action_tweet_id, 0),
|
values (:id, :type, :sent_at, :sort_index, :user_id, nullif(:action_user_id, 0), nullif(:action_tweet_id, 0),
|
||||||
nullif(:action_retweet_id, 0))
|
nullif(:action_retweet_id, 0), :has_detail, :last_scraped_at)
|
||||||
on conflict do update
|
on conflict do update
|
||||||
set sent_at = max(sent_at, :sent_at),
|
set sent_at = max(sent_at, :sent_at),
|
||||||
sort_index = max(sort_index, :sort_index),
|
sort_index = max(sort_index, :sort_index),
|
||||||
action_user_id = nullif(:action_user_id, 0),
|
action_user_id = nullif(:action_user_id, 0),
|
||||||
action_tweet_id = nullif(:action_tweet_id, 0)
|
action_tweet_id = nullif(:action_tweet_id, 0),
|
||||||
|
has_detail = has_detail or :has_detail,
|
||||||
|
last_scraped_at = max(last_scraped_at, :last_scraped_at)
|
||||||
`, n)
|
`, n)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
fmt.Printf("failed to save notification %#v\n", n)
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -62,7 +69,7 @@ func (p Profile) GetNotification(id NotificationID) Notification {
|
|||||||
var ret Notification
|
var ret Notification
|
||||||
err := p.DB.Get(&ret,
|
err := p.DB.Get(&ret,
|
||||||
`select id, type, sent_at, sort_index, user_id, ifnull(action_user_id, 0) action_user_id,
|
`select id, type, sent_at, sort_index, user_id, ifnull(action_user_id, 0) action_user_id,
|
||||||
ifnull(action_tweet_id, 0) action_tweet_id, ifnull(action_retweet_id, 0) action_retweet_id
|
ifnull(action_tweet_id, 0) action_tweet_id, ifnull(action_retweet_id, 0) action_retweet_id, has_detail, last_scraped_at
|
||||||
from notifications where id = ?`,
|
from notifications where id = ?`,
|
||||||
id)
|
id)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -82,3 +89,29 @@ func (p Profile) GetNotification(id NotificationID) Notification {
|
|||||||
}
|
}
|
||||||
return ret
|
return ret
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (p Profile) CheckNotificationScrapesNeeded(trove TweetTrove) []NotificationID {
|
||||||
|
ret := []NotificationID{}
|
||||||
|
for n_id, notification := range trove.Notifications {
|
||||||
|
// If there's no detail page, skip
|
||||||
|
if !notification.HasDetail {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check its last-scraped
|
||||||
|
var last_scraped_at Timestamp
|
||||||
|
err := p.DB.Get(&last_scraped_at, `select last_scraped_at from notifications where id = ?`, n_id)
|
||||||
|
if errors.Is(err, sql.ErrNoRows) {
|
||||||
|
// It's not scraped at all yet
|
||||||
|
ret = append(ret, n_id)
|
||||||
|
continue
|
||||||
|
} else if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
// If the latest scrape is not fresh (older than the notification sent-at time), add it
|
||||||
|
if last_scraped_at.Time.Before(notification.SentAt.Time) {
|
||||||
|
ret = append(ret, n_id)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ret
|
||||||
|
}
|
||||||
|
@ -414,6 +414,9 @@ create table notifications (rowid integer primary key,
|
|||||||
action_tweet_id integer references tweets(id), -- tweet associated with the notification
|
action_tweet_id integer references tweets(id), -- tweet associated with the notification
|
||||||
action_retweet_id integer references retweets(retweet_id),
|
action_retweet_id integer references retweets(retweet_id),
|
||||||
|
|
||||||
|
has_detail boolean not null default 0,
|
||||||
|
last_scraped_at not null default 0,
|
||||||
|
|
||||||
foreign key(type) references notification_types(rowid)
|
foreign key(type) references notification_types(rowid)
|
||||||
foreign key(user_id) references users(id)
|
foreign key(user_id) references users(id)
|
||||||
);
|
);
|
||||||
|
@ -410,6 +410,8 @@ func create_dummy_notification() Notification {
|
|||||||
ActionUserID: create_stable_user().ID,
|
ActionUserID: create_stable_user().ID,
|
||||||
ActionTweetID: create_stable_tweet().ID,
|
ActionTweetID: create_stable_tweet().ID,
|
||||||
ActionRetweetID: create_stable_retweet().RetweetID,
|
ActionRetweetID: create_stable_retweet().RetweetID,
|
||||||
|
HasDetail: true,
|
||||||
|
LastScrapedAt: TimestampFromUnix(57234728),
|
||||||
TweetIDs: []TweetID{create_stable_tweet().ID},
|
TweetIDs: []TweetID{create_stable_tweet().ID},
|
||||||
UserIDs: []UserID{create_stable_user().ID},
|
UserIDs: []UserID{create_stable_user().ID},
|
||||||
RetweetIDs: []TweetID{create_stable_retweet().RetweetID},
|
RetweetIDs: []TweetID{create_stable_retweet().RetweetID},
|
||||||
|
@ -333,6 +333,9 @@ var MIGRATIONS = []string{
|
|||||||
action_tweet_id integer references tweets(id), -- tweet associated with the notification
|
action_tweet_id integer references tweets(id), -- tweet associated with the notification
|
||||||
action_retweet_id integer references retweets(retweet_id),
|
action_retweet_id integer references retweets(retweet_id),
|
||||||
|
|
||||||
|
has_detail boolean not null default 0,
|
||||||
|
last_scraped_at not null default 0,
|
||||||
|
|
||||||
foreign key(type) references notification_types(rowid)
|
foreign key(type) references notification_types(rowid)
|
||||||
foreign key(user_id) references users(id)
|
foreign key(user_id) references users(id)
|
||||||
);
|
);
|
||||||
|
@ -2,10 +2,12 @@ package scraper
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"errors"
|
"errors"
|
||||||
|
"fmt"
|
||||||
"net/url"
|
"net/url"
|
||||||
"regexp"
|
"regexp"
|
||||||
"sort"
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
@ -56,6 +58,41 @@ func (api *API) GetNotifications(how_many int) (TweetTrove, error) {
|
|||||||
}
|
}
|
||||||
trove.MergeWith(new_trove)
|
trove.MergeWith(new_trove)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return trove, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (api *API) GetNotificationDetailForAll(trove TweetTrove, to_scrape []NotificationID) (TweetTrove, error) {
|
||||||
|
for _, n_id := range to_scrape {
|
||||||
|
notification := trove.Notifications[n_id]
|
||||||
|
resp, err := api.GetNotificationDetail(notification)
|
||||||
|
if errors.Is(err, ErrRateLimited) {
|
||||||
|
log.Warnf("Rate limited!")
|
||||||
|
break
|
||||||
|
} else if err != nil {
|
||||||
|
return TweetTrove{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fetch the notification detail
|
||||||
|
new_trove, ids, err := resp.ToTweetTroveAsNotificationDetail()
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
trove.MergeWith(new_trove)
|
||||||
|
|
||||||
|
// Add the fetched Tweet / Retweet IDs to the notification
|
||||||
|
for _, id := range ids {
|
||||||
|
_, is_retweet := trove.Retweets[id]
|
||||||
|
if is_retweet {
|
||||||
|
notification.RetweetIDs = append(notification.RetweetIDs, id)
|
||||||
|
} else {
|
||||||
|
notification.TweetIDs = append(notification.TweetIDs, id)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Update the notification's last_scraped_at
|
||||||
|
notification.LastScrapedAt = Timestamp{time.Now()}
|
||||||
|
trove.Notifications[n_id] = notification
|
||||||
|
}
|
||||||
return trove, nil
|
return trove, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -91,6 +128,17 @@ func (t *TweetResponse) ToTweetTroveAsNotifications(current_user_id UserID) (Twe
|
|||||||
notification.Type = NOTIFICATION_TYPE_QUOTE_TWEET
|
notification.Type = NOTIFICATION_TYPE_QUOTE_TWEET
|
||||||
} else if strings.Contains(entry.Content.Item.ClientEventInfo.Element, "mentioned") {
|
} else if strings.Contains(entry.Content.Item.ClientEventInfo.Element, "mentioned") {
|
||||||
notification.Type = NOTIFICATION_TYPE_MENTION
|
notification.Type = NOTIFICATION_TYPE_MENTION
|
||||||
|
} else if strings.Contains(entry.Content.Item.ClientEventInfo.Element, "live_broadcast") {
|
||||||
|
// TODO: broadcast
|
||||||
|
notification.Type = NOTIFICATION_TYPE_USER_IS_LIVE
|
||||||
|
} else if strings.Contains(entry.Content.Item.ClientEventInfo.Element, "community_tweet_pinned") {
|
||||||
|
// TODO: communities
|
||||||
|
delete(ret.Notifications, notification.ID)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if strings.Contains(entry.Content.Item.ClientEventInfo.Element, "multiple") {
|
||||||
|
notification.HasDetail = true
|
||||||
}
|
}
|
||||||
|
|
||||||
if entry.Content.Item.Content.Tweet.ID != 0 {
|
if entry.Content.Item.Content.Tweet.ID != 0 {
|
||||||
@ -161,3 +209,39 @@ func ParseSingleNotification(n APINotification) Notification {
|
|||||||
|
|
||||||
return ret
|
return ret
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (api *API) GetNotificationDetail(n Notification) (TweetResponse, error) {
|
||||||
|
url, err := url.Parse(fmt.Sprintf("https://twitter.com/i/api/2/notifications/view/%s.json", n.ID))
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
query := url.Query()
|
||||||
|
add_tweet_query_params(&query)
|
||||||
|
url.RawQuery = query.Encode()
|
||||||
|
|
||||||
|
var result TweetResponse
|
||||||
|
err = api.do_http(url.String(), "", &result)
|
||||||
|
|
||||||
|
return result, err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *TweetResponse) ToTweetTroveAsNotificationDetail() (TweetTrove, []TweetID, error) {
|
||||||
|
ids := []TweetID{}
|
||||||
|
ret, err := t.ToTweetTrove()
|
||||||
|
if err != nil {
|
||||||
|
return TweetTrove{}, ids, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find the "addEntries" instruction
|
||||||
|
for _, instr := range t.Timeline.Instructions {
|
||||||
|
sort.Sort(instr.AddEntries.Entries)
|
||||||
|
for _, entry := range instr.AddEntries.Entries {
|
||||||
|
if entry.Content.Item.Content.Tweet.ID != 0 {
|
||||||
|
ids = append(ids, TweetID(entry.Content.Item.Content.Tweet.ID))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret, ids, nil
|
||||||
|
}
|
||||||
|
@ -119,6 +119,21 @@ func TestParseNotificationsPage(t *testing.T) {
|
|||||||
assert.Len(notif10.RetweetIDs, 1)
|
assert.Len(notif10.RetweetIDs, 1)
|
||||||
assert.Contains(notif10.RetweetIDs, TweetID(1827183097382654351))
|
assert.Contains(notif10.RetweetIDs, TweetID(1827183097382654351))
|
||||||
|
|
||||||
|
notif11, is_ok := tweet_trove.Notifications["FDzeDIfVUAIAAAABiJONco_yJRHyMqRjxDY"]
|
||||||
|
assert.True(is_ok)
|
||||||
|
assert.Equal(NOTIFICATION_TYPE_USER_IS_LIVE, notif11.Type)
|
||||||
|
assert.Equal(UserID(277536867), notif11.ActionUserID)
|
||||||
|
|
||||||
|
// 1 user liked multiple posts
|
||||||
|
notif12, is_ok := tweet_trove.Notifications["FDzeDIfVUAIAAAABiJONco_yJRESfwtSqvg"]
|
||||||
|
assert.True(is_ok)
|
||||||
|
assert.True(notif12.HasDetail)
|
||||||
|
|
||||||
|
// TODO: communities
|
||||||
|
// notif12, is_ok := tweet_trove.Notifications["FDzeDIfVUAIAAAABiJONco_yJRHPBNsDH88"]
|
||||||
|
// assert.True(is_ok)
|
||||||
|
// assert.Equal(NOTIFICATION_TYPE_COMMUNITY_PINNED_POST, notif12.Type)
|
||||||
|
|
||||||
// Check users
|
// Check users
|
||||||
for _, u_id := range []UserID{1458284524761075714, 28815778, 1633158398555353096} {
|
for _, u_id := range []UserID{1458284524761075714, 28815778, 1633158398555353096} {
|
||||||
_, is_ok := tweet_trove.Users[u_id]
|
_, is_ok := tweet_trove.Users[u_id]
|
||||||
@ -155,3 +170,25 @@ func TestParseNotificationsEndOfFeed(t *testing.T) {
|
|||||||
|
|
||||||
assert.True(resp.IsEndOfFeed())
|
assert.True(resp.IsEndOfFeed())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestParseNotificationDetail(t *testing.T) {
|
||||||
|
assert := assert.New(t)
|
||||||
|
require := require.New(t)
|
||||||
|
data, err := os.ReadFile("test_responses/notifications/notification_detail.json")
|
||||||
|
require.NoError(err)
|
||||||
|
|
||||||
|
var resp TweetResponse
|
||||||
|
err = json.Unmarshal(data, &resp)
|
||||||
|
require.NoError(err)
|
||||||
|
|
||||||
|
trove, ids, err := resp.ToTweetTroveAsNotificationDetail()
|
||||||
|
require.NoError(err)
|
||||||
|
assert.Len(ids, 2)
|
||||||
|
assert.Contains(ids, TweetID(1827544032714633628))
|
||||||
|
assert.Contains(ids, TweetID(1826743131108487390))
|
||||||
|
|
||||||
|
_, is_ok := trove.Tweets[1826743131108487390]
|
||||||
|
assert.True(is_ok)
|
||||||
|
_, is_ok = trove.Retweets[1827544032714633628]
|
||||||
|
assert.True(is_ok)
|
||||||
|
}
|
||||||
|
@ -52,6 +52,10 @@ type Notification struct {
|
|||||||
ActionTweetID TweetID `db:"action_tweet_id"`
|
ActionTweetID TweetID `db:"action_tweet_id"`
|
||||||
ActionRetweetID TweetID `db:"action_retweet_id"`
|
ActionRetweetID TweetID `db:"action_retweet_id"`
|
||||||
|
|
||||||
|
// Used for "multiple" notifs, like "user liked multiple tweets"
|
||||||
|
HasDetail bool `db:"has_detail"`
|
||||||
|
LastScrapedAt Timestamp `db:"last_scraped_at"`
|
||||||
|
|
||||||
TweetIDs []TweetID
|
TweetIDs []TweetID
|
||||||
UserIDs []UserID
|
UserIDs []UserID
|
||||||
RetweetIDs []TweetID
|
RetweetIDs []TweetID
|
||||||
|
File diff suppressed because one or more lines are too long
Loading…
x
Reference in New Issue
Block a user