Add scraping notiication detail
This commit is contained in:
parent
dc816c6f28
commit
0c620621a6
@ -603,6 +603,12 @@ func get_notifications(how_many int) {
|
||||
if err != nil && !errors.Is(err, scraper.END_OF_FEED) {
|
||||
panic(err)
|
||||
}
|
||||
to_scrape := profile.CheckNotificationScrapesNeeded(trove)
|
||||
trove, err = api.GetNotificationDetailForAll(trove, to_scrape)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
profile.SaveTweetTrove(trove, true, &api)
|
||||
happy_exit(fmt.Sprintf("Saved %d notifications, %d tweets and %d users",
|
||||
len(trove.Notifications), len(trove.Tweets), len(trove.Users),
|
||||
|
@ -1,6 +1,9 @@
|
||||
package persistence
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"errors"
|
||||
"fmt"
|
||||
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||
)
|
||||
|
||||
@ -12,16 +15,20 @@ func (p Profile) SaveNotification(n Notification) {
|
||||
|
||||
// Save the Notification
|
||||
_, err = tx.NamedExec(`
|
||||
insert into notifications(id, type, sent_at, sort_index, user_id, action_user_id, action_tweet_id, action_retweet_id)
|
||||
insert into notifications(id, type, sent_at, sort_index, user_id, action_user_id, action_tweet_id, action_retweet_id,
|
||||
has_detail, last_scraped_at)
|
||||
values (:id, :type, :sent_at, :sort_index, :user_id, nullif(:action_user_id, 0), nullif(:action_tweet_id, 0),
|
||||
nullif(:action_retweet_id, 0))
|
||||
nullif(:action_retweet_id, 0), :has_detail, :last_scraped_at)
|
||||
on conflict do update
|
||||
set sent_at = max(sent_at, :sent_at),
|
||||
sort_index = max(sort_index, :sort_index),
|
||||
action_user_id = nullif(:action_user_id, 0),
|
||||
action_tweet_id = nullif(:action_tweet_id, 0)
|
||||
action_tweet_id = nullif(:action_tweet_id, 0),
|
||||
has_detail = has_detail or :has_detail,
|
||||
last_scraped_at = max(last_scraped_at, :last_scraped_at)
|
||||
`, n)
|
||||
if err != nil {
|
||||
fmt.Printf("failed to save notification %#v\n", n)
|
||||
panic(err)
|
||||
}
|
||||
|
||||
@ -62,7 +69,7 @@ func (p Profile) GetNotification(id NotificationID) Notification {
|
||||
var ret Notification
|
||||
err := p.DB.Get(&ret,
|
||||
`select id, type, sent_at, sort_index, user_id, ifnull(action_user_id, 0) action_user_id,
|
||||
ifnull(action_tweet_id, 0) action_tweet_id, ifnull(action_retweet_id, 0) action_retweet_id
|
||||
ifnull(action_tweet_id, 0) action_tweet_id, ifnull(action_retweet_id, 0) action_retweet_id, has_detail, last_scraped_at
|
||||
from notifications where id = ?`,
|
||||
id)
|
||||
if err != nil {
|
||||
@ -82,3 +89,29 @@ func (p Profile) GetNotification(id NotificationID) Notification {
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
func (p Profile) CheckNotificationScrapesNeeded(trove TweetTrove) []NotificationID {
|
||||
ret := []NotificationID{}
|
||||
for n_id, notification := range trove.Notifications {
|
||||
// If there's no detail page, skip
|
||||
if !notification.HasDetail {
|
||||
continue
|
||||
}
|
||||
|
||||
// Check its last-scraped
|
||||
var last_scraped_at Timestamp
|
||||
err := p.DB.Get(&last_scraped_at, `select last_scraped_at from notifications where id = ?`, n_id)
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
// It's not scraped at all yet
|
||||
ret = append(ret, n_id)
|
||||
continue
|
||||
} else if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
// If the latest scrape is not fresh (older than the notification sent-at time), add it
|
||||
if last_scraped_at.Time.Before(notification.SentAt.Time) {
|
||||
ret = append(ret, n_id)
|
||||
}
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
@ -414,6 +414,9 @@ create table notifications (rowid integer primary key,
|
||||
action_tweet_id integer references tweets(id), -- tweet associated with the notification
|
||||
action_retweet_id integer references retweets(retweet_id),
|
||||
|
||||
has_detail boolean not null default 0,
|
||||
last_scraped_at not null default 0,
|
||||
|
||||
foreign key(type) references notification_types(rowid)
|
||||
foreign key(user_id) references users(id)
|
||||
);
|
||||
|
@ -410,6 +410,8 @@ func create_dummy_notification() Notification {
|
||||
ActionUserID: create_stable_user().ID,
|
||||
ActionTweetID: create_stable_tweet().ID,
|
||||
ActionRetweetID: create_stable_retweet().RetweetID,
|
||||
HasDetail: true,
|
||||
LastScrapedAt: TimestampFromUnix(57234728),
|
||||
TweetIDs: []TweetID{create_stable_tweet().ID},
|
||||
UserIDs: []UserID{create_stable_user().ID},
|
||||
RetweetIDs: []TweetID{create_stable_retweet().RetweetID},
|
||||
|
@ -333,6 +333,9 @@ var MIGRATIONS = []string{
|
||||
action_tweet_id integer references tweets(id), -- tweet associated with the notification
|
||||
action_retweet_id integer references retweets(retweet_id),
|
||||
|
||||
has_detail boolean not null default 0,
|
||||
last_scraped_at not null default 0,
|
||||
|
||||
foreign key(type) references notification_types(rowid)
|
||||
foreign key(user_id) references users(id)
|
||||
);
|
||||
|
@ -2,10 +2,12 @@ package scraper
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/url"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
@ -56,6 +58,41 @@ func (api *API) GetNotifications(how_many int) (TweetTrove, error) {
|
||||
}
|
||||
trove.MergeWith(new_trove)
|
||||
}
|
||||
|
||||
return trove, nil
|
||||
}
|
||||
|
||||
func (api *API) GetNotificationDetailForAll(trove TweetTrove, to_scrape []NotificationID) (TweetTrove, error) {
|
||||
for _, n_id := range to_scrape {
|
||||
notification := trove.Notifications[n_id]
|
||||
resp, err := api.GetNotificationDetail(notification)
|
||||
if errors.Is(err, ErrRateLimited) {
|
||||
log.Warnf("Rate limited!")
|
||||
break
|
||||
} else if err != nil {
|
||||
return TweetTrove{}, err
|
||||
}
|
||||
|
||||
// Fetch the notification detail
|
||||
new_trove, ids, err := resp.ToTweetTroveAsNotificationDetail()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
trove.MergeWith(new_trove)
|
||||
|
||||
// Add the fetched Tweet / Retweet IDs to the notification
|
||||
for _, id := range ids {
|
||||
_, is_retweet := trove.Retweets[id]
|
||||
if is_retweet {
|
||||
notification.RetweetIDs = append(notification.RetweetIDs, id)
|
||||
} else {
|
||||
notification.TweetIDs = append(notification.TweetIDs, id)
|
||||
}
|
||||
}
|
||||
// Update the notification's last_scraped_at
|
||||
notification.LastScrapedAt = Timestamp{time.Now()}
|
||||
trove.Notifications[n_id] = notification
|
||||
}
|
||||
return trove, nil
|
||||
}
|
||||
|
||||
@ -91,6 +128,17 @@ func (t *TweetResponse) ToTweetTroveAsNotifications(current_user_id UserID) (Twe
|
||||
notification.Type = NOTIFICATION_TYPE_QUOTE_TWEET
|
||||
} else if strings.Contains(entry.Content.Item.ClientEventInfo.Element, "mentioned") {
|
||||
notification.Type = NOTIFICATION_TYPE_MENTION
|
||||
} else if strings.Contains(entry.Content.Item.ClientEventInfo.Element, "live_broadcast") {
|
||||
// TODO: broadcast
|
||||
notification.Type = NOTIFICATION_TYPE_USER_IS_LIVE
|
||||
} else if strings.Contains(entry.Content.Item.ClientEventInfo.Element, "community_tweet_pinned") {
|
||||
// TODO: communities
|
||||
delete(ret.Notifications, notification.ID)
|
||||
continue
|
||||
}
|
||||
|
||||
if strings.Contains(entry.Content.Item.ClientEventInfo.Element, "multiple") {
|
||||
notification.HasDetail = true
|
||||
}
|
||||
|
||||
if entry.Content.Item.Content.Tweet.ID != 0 {
|
||||
@ -161,3 +209,39 @@ func ParseSingleNotification(n APINotification) Notification {
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
func (api *API) GetNotificationDetail(n Notification) (TweetResponse, error) {
|
||||
url, err := url.Parse(fmt.Sprintf("https://twitter.com/i/api/2/notifications/view/%s.json", n.ID))
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
query := url.Query()
|
||||
add_tweet_query_params(&query)
|
||||
url.RawQuery = query.Encode()
|
||||
|
||||
var result TweetResponse
|
||||
err = api.do_http(url.String(), "", &result)
|
||||
|
||||
return result, err
|
||||
}
|
||||
|
||||
func (t *TweetResponse) ToTweetTroveAsNotificationDetail() (TweetTrove, []TweetID, error) {
|
||||
ids := []TweetID{}
|
||||
ret, err := t.ToTweetTrove()
|
||||
if err != nil {
|
||||
return TweetTrove{}, ids, err
|
||||
}
|
||||
|
||||
// Find the "addEntries" instruction
|
||||
for _, instr := range t.Timeline.Instructions {
|
||||
sort.Sort(instr.AddEntries.Entries)
|
||||
for _, entry := range instr.AddEntries.Entries {
|
||||
if entry.Content.Item.Content.Tweet.ID != 0 {
|
||||
ids = append(ids, TweetID(entry.Content.Item.Content.Tweet.ID))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ret, ids, nil
|
||||
}
|
||||
|
@ -119,6 +119,21 @@ func TestParseNotificationsPage(t *testing.T) {
|
||||
assert.Len(notif10.RetweetIDs, 1)
|
||||
assert.Contains(notif10.RetweetIDs, TweetID(1827183097382654351))
|
||||
|
||||
notif11, is_ok := tweet_trove.Notifications["FDzeDIfVUAIAAAABiJONco_yJRHyMqRjxDY"]
|
||||
assert.True(is_ok)
|
||||
assert.Equal(NOTIFICATION_TYPE_USER_IS_LIVE, notif11.Type)
|
||||
assert.Equal(UserID(277536867), notif11.ActionUserID)
|
||||
|
||||
// 1 user liked multiple posts
|
||||
notif12, is_ok := tweet_trove.Notifications["FDzeDIfVUAIAAAABiJONco_yJRESfwtSqvg"]
|
||||
assert.True(is_ok)
|
||||
assert.True(notif12.HasDetail)
|
||||
|
||||
// TODO: communities
|
||||
// notif12, is_ok := tweet_trove.Notifications["FDzeDIfVUAIAAAABiJONco_yJRHPBNsDH88"]
|
||||
// assert.True(is_ok)
|
||||
// assert.Equal(NOTIFICATION_TYPE_COMMUNITY_PINNED_POST, notif12.Type)
|
||||
|
||||
// Check users
|
||||
for _, u_id := range []UserID{1458284524761075714, 28815778, 1633158398555353096} {
|
||||
_, is_ok := tweet_trove.Users[u_id]
|
||||
@ -155,3 +170,25 @@ func TestParseNotificationsEndOfFeed(t *testing.T) {
|
||||
|
||||
assert.True(resp.IsEndOfFeed())
|
||||
}
|
||||
|
||||
func TestParseNotificationDetail(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
require := require.New(t)
|
||||
data, err := os.ReadFile("test_responses/notifications/notification_detail.json")
|
||||
require.NoError(err)
|
||||
|
||||
var resp TweetResponse
|
||||
err = json.Unmarshal(data, &resp)
|
||||
require.NoError(err)
|
||||
|
||||
trove, ids, err := resp.ToTweetTroveAsNotificationDetail()
|
||||
require.NoError(err)
|
||||
assert.Len(ids, 2)
|
||||
assert.Contains(ids, TweetID(1827544032714633628))
|
||||
assert.Contains(ids, TweetID(1826743131108487390))
|
||||
|
||||
_, is_ok := trove.Tweets[1826743131108487390]
|
||||
assert.True(is_ok)
|
||||
_, is_ok = trove.Retweets[1827544032714633628]
|
||||
assert.True(is_ok)
|
||||
}
|
||||
|
@ -52,6 +52,10 @@ type Notification struct {
|
||||
ActionTweetID TweetID `db:"action_tweet_id"`
|
||||
ActionRetweetID TweetID `db:"action_retweet_id"`
|
||||
|
||||
// Used for "multiple" notifs, like "user liked multiple tweets"
|
||||
HasDetail bool `db:"has_detail"`
|
||||
LastScrapedAt Timestamp `db:"last_scraped_at"`
|
||||
|
||||
TweetIDs []TweetID
|
||||
UserIDs []UserID
|
||||
RetweetIDs []TweetID
|
||||
|
File diff suppressed because one or more lines are too long
Loading…
x
Reference in New Issue
Block a user