Add Space persistence queries for Space details

This commit is contained in:
Alessio 2022-11-24 19:08:00 -05:00
parent a81d0e80fe
commit 58c19df11a
8 changed files with 204 additions and 53 deletions

View File

@ -102,17 +102,6 @@ func (p Profile) SavePoll(poll scraper.Poll) error {
return nil return nil
} }
/**
* Save a Space
*/
func (p Profile) SaveSpace(space scraper.Space) error {
_, err := p.DB.NamedExec(`insert into spaces (id, short_url) values (:id, :short_url) on conflict do nothing`, space)
if err != nil {
return fmt.Errorf("Error saving Space (ID %s):\n %w", space.ID, err)
}
return nil
}
/** /**
* Get the list of images for a tweet * Get the list of images for a tweet
*/ */
@ -162,11 +151,3 @@ func (p Profile) GetPollsForTweet(t scraper.Tweet) (polls []scraper.Poll, err er
`, t.ID) `, t.ID)
return return
} }
/**
* Get a Space by ID
*/
func (p Profile) GetSpace(id scraper.SpaceID) (space scraper.Space, err error) {
err = p.DB.Get(&space, `select id, short_url from spaces where id = ?`, id)
return
}

View File

@ -277,22 +277,3 @@ func TestModifyPoll(t *testing.T) {
t.Error(diff) t.Error(diff)
} }
} }
/**
* Create a Space, save it, reload it, and make sure it comes back the same
*/
func TestSaveAndLoadSpace(t *testing.T) {
require := require.New(t)
profile_path := "test_profiles/TestMediaQueries"
profile := create_or_load_profile(profile_path)
space := create_space_from_id(rand.Int())
err := profile.SaveSpace(space)
require.NoError(err)
new_space, err := profile.GetSpace(space.ID)
require.NoError(err)
if diff := deep.Equal(space, new_space); diff != nil {
t.Error(diff)
}
}

View File

@ -116,7 +116,29 @@ create table polls (rowid integer primary key,
create table spaces(rowid integer primary key, create table spaces(rowid integer primary key,
id text unique not null, id text unique not null,
short_url text not null created_by_id integer,
short_url text not null,
state text not null,
title text not null,
created_at integer not null,
started_at integer not null,
ended_at integer not null,
updated_at integer not null,
is_available_for_replay boolean not null,
replay_watch_count integer,
live_listeners_count integer,
is_details_fetched boolean not null default 0,
foreign key(created_by_id) references users(id)
);
create table space_participants(rowid integer primary key,
user_id integer not null,
space_id not null,
foreign key(space_id) references spaces(id)
-- No foreign key for users, since they may not be downloaded yet and I don't want to
-- download every user who joins a space
); );
create table images (rowid integer primary key, create table images (rowid integer primary key,

View File

@ -0,0 +1,85 @@
package persistence
import (
"database/sql"
"errors"
"fmt"
"offline_twitter/scraper"
)
type SpaceParticipant struct {
UserID scraper.UserID `db:"user_id"`
SpaceID scraper.SpaceID `db:"space_id"`
}
/**
* Save a Space
*/
func (p Profile) SaveSpace(s scraper.Space) error {
_, err := p.DB.NamedExec(`
insert into spaces (id, created_by_id, short_url, state, title, created_at, started_at, ended_at, updated_at,
is_available_for_replay, replay_watch_count, live_listeners_count, is_details_fetched)
values (:id, nullif(:created_by_id, 0), :short_url, :state, :title, :created_at, :started_at, :ended_at, :updated_at,
:is_available_for_replay, :replay_watch_count, :live_listeners_count, :is_details_fetched)
on conflict do update
set id=:id,
created_by_id=case when created_by_id is not null then created_by_id else nullif(:created_by_id, 0) end,
short_url=case when short_url == "" then :short_url else short_url end,
state=:state,
title=:title,
updated_at=:updated_at,
is_available_for_replay=:is_available_for_replay,
replay_watch_count=:replay_watch_count,
live_listeners_count=:live_listeners_count,
is_details_fetched=:is_details_fetched
`, &s)
if err != nil {
return fmt.Errorf("Error saving space (space ID %q, value: %#v):\n %w", s.ID, s, err)
}
space_participants := []SpaceParticipant{}
for _, participant_id := range s.ParticipantIds {
space_participants = append(space_participants, SpaceParticipant{UserID: participant_id, SpaceID: s.ID})
}
if len(space_participants) > 0 {
_, err = p.DB.NamedExec(`
insert into space_participants (user_id, space_id) values (:user_id, :space_id)
`, space_participants)
if err != nil {
return fmt.Errorf("Error saving participants (space ID %q, participants: %#v):\n %w", s.ID, space_participants, err)
}
}
return nil
}
/**
* Get a Space by ID
*/
func (p Profile) GetSpaceById(id scraper.SpaceID) (space scraper.Space, err error) {
err = p.DB.Get(&space,
`select id, created_by_id, short_url, state, title, created_at, started_at, ended_at, updated_at, is_available_for_replay,
replay_watch_count, live_listeners_count, is_details_fetched
from spaces
where id = ?`, id)
if err != nil {
return
}
space.ParticipantIds = []scraper.UserID{}
rows, err := p.DB.Query(`select user_id from space_participants where space_id = ?`, id)
if errors.Is(err, sql.ErrNoRows) {
err = nil
}
if err != nil {
panic(err)
}
var participant_id scraper.UserID
for rows.Next() {
err = rows.Scan(&participant_id)
if err != nil {
panic(err)
}
space.ParticipantIds = append(space.ParticipantIds, participant_id)
}
return
}

View File

@ -0,0 +1,62 @@
package persistence_test
import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"offline_twitter/scraper"
"github.com/go-test/deep"
"math/rand"
)
/**
* Create a Space, save it, reload it, and make sure it comes back the same
*/
func TestSaveAndLoadSpace(t *testing.T) {
require := require.New(t)
profile_path := "test_profiles/TestMediaQueries"
profile := create_or_load_profile(profile_path)
space := create_space_from_id(rand.Int())
err := profile.SaveSpace(space)
require.NoError(err)
new_space, err := profile.GetSpaceById(space.ID)
require.NoError(err)
if diff := deep.Equal(space, new_space); diff != nil {
t.Error(diff)
}
}
func TestNoWorseningSpace(t *testing.T) {
require := require.New(t)
assert := assert.New(t)
profile_path := "test_profiles/TestMediaQueries"
profile := create_or_load_profile(profile_path)
space := create_space_from_id(rand.Int())
space.ShortUrl = "Some Short Url"
space.CreatedAt = scraper.TimestampFromUnix(1000)
space.CreatedById = scraper.UserID(-1)
// Save the space
err := profile.SaveSpace(space)
require.NoError(err)
// Worsen the space, then re-save
space.ShortUrl = ""
space.CreatedAt = scraper.TimestampFromUnix(0)
space.CreatedById = scraper.UserID(0)
err = profile.SaveSpace(space)
require.NoError(err)
// Reload it
new_space, err := profile.GetSpaceById(space.ID)
require.NoError(err)
assert.Equal(new_space.ShortUrl, "Some Short Url")
assert.Equal(new_space.CreatedAt, scraper.TimestampFromUnix(1000))
assert.Equal(new_space.CreatedById, scraper.UserID(-1))
}

View File

@ -14,13 +14,12 @@ func (p Profile) SaveTweet(t scraper.Tweet) error {
tx := db.MustBegin() tx := db.MustBegin()
var space_id scraper.SpaceID // Has to be done first since Tweet has a foreign key to Space
for _, space := range t.Spaces { for _, space := range t.Spaces {
err := p.SaveSpace(space) err := p.SaveSpace(space)
if err != nil { if err != nil {
return err return err
} }
space_id = space.ID
} }
_, err := db.Exec(` _, err := db.Exec(`
@ -54,7 +53,8 @@ func (p Profile) SaveTweet(t scraper.Tweet) error {
`, `,
t.ID, t.UserID, t.Text, t.PostedAt, t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.InReplyToID, t.ID, t.UserID, t.Text, t.PostedAt, t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.InReplyToID,
t.QuotedTweetID, scraper.JoinArrayOfHandles(t.Mentions), scraper.JoinArrayOfHandles(t.ReplyMentions), t.QuotedTweetID, scraper.JoinArrayOfHandles(t.Mentions), scraper.JoinArrayOfHandles(t.ReplyMentions),
strings.Join(t.Hashtags, ","), space_id, t.TombstoneType, t.IsStub, t.IsContentDownloaded, t.IsConversationScraped, t.LastScrapedAt, strings.Join(t.Hashtags, ","), t.SpaceID, t.TombstoneType, t.IsStub, t.IsContentDownloaded, t.IsConversationScraped,
t.LastScrapedAt,
t.Text, t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.IsStub, t.TombstoneType, t.TombstoneType, t.Text, t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.IsStub, t.TombstoneType, t.TombstoneType,
t.IsContentDownloaded, t.IsConversationScraped, t.LastScrapedAt, t.IsContentDownloaded, t.IsConversationScraped, t.LastScrapedAt,
@ -136,11 +136,10 @@ func (p Profile) GetTweetById(id scraper.TweetID) (scraper.Tweet, error) {
var mentions string var mentions string
var reply_mentions string var reply_mentions string
var hashtags string var hashtags string
var space_id scraper.SpaceID
row := stmt.QueryRow(id) row := stmt.QueryRow(id)
err = row.Scan(&t.ID, &t.UserID, &t.Text, &t.PostedAt, &t.NumLikes, &t.NumRetweets, &t.NumReplies, &t.NumQuoteTweets, &t.InReplyToID, err = row.Scan(&t.ID, &t.UserID, &t.Text, &t.PostedAt, &t.NumLikes, &t.NumRetweets, &t.NumReplies, &t.NumQuoteTweets, &t.InReplyToID,
&t.QuotedTweetID, &mentions, &reply_mentions, &hashtags, &space_id, &t.TombstoneType, &t.IsStub, &t.IsContentDownloaded, &t.QuotedTweetID, &mentions, &reply_mentions, &hashtags, &t.SpaceID, &t.TombstoneType, &t.IsStub, &t.IsContentDownloaded,
&t.IsConversationScraped, &t.LastScrapedAt) &t.IsConversationScraped, &t.LastScrapedAt)
if err != nil { if err != nil {
return t, fmt.Errorf("Error parsing result in GetTweetByID(%d):\n %w", id, err) return t, fmt.Errorf("Error parsing result in GetTweetByID(%d):\n %w", id, err)
@ -166,8 +165,8 @@ func (p Profile) GetTweetById(id scraper.TweetID) (scraper.Tweet, error) {
} }
t.Spaces = []scraper.Space{} t.Spaces = []scraper.Space{}
if space_id != "" { if t.SpaceID != "" {
space, err := p.GetSpace(space_id) space, err := p.GetSpaceById(t.SpaceID)
if err != nil { if err != nil {
return t, err return t, err
} }

View File

@ -31,6 +31,12 @@ func (p Profile) SaveTweetTrove(trove TweetTrove) {
trove.Retweets[j] = retweet trove.Retweets[j] = retweet
} }
} }
for j, space := range trove.Spaces {
if space.CreatedById == trove.Users[i].ID {
space.CreatedById = u.ID
trove.Spaces[j] = space
}
}
trove.Users[i] = u trove.Users[i] = u
// Download their tiny profile image // Download their tiny profile image
@ -40,10 +46,12 @@ func (p Profile) SaveTweetTrove(trove TweetTrove) {
} }
} }
// TODO: this is called earlier in the process as well, before parsing. Is that call redundant? Too tired to figure out right now for _, s := range trove.Spaces {
// Update: Yes it's redundant. Places that return tweet troves should call `PostProcess` err := p.SaveSpace(s)
// before returning, which includes `FillMissingUserIDs`. if err != nil {
// trove.FillMissingUserIDs() panic(fmt.Errorf("Error saving space with ID %s:\n %w", s.ID, err))
}
}
for _, t := range trove.Tweets { for _, t := range trove.Tweets {
err := p.SaveTweet(t) err := p.SaveTweet(t)

View File

@ -174,6 +174,7 @@ func create_stable_tweet() scraper.Tweet {
Spaces: []scraper.Space{ Spaces: []scraper.Space{
create_space_from_id(-1), create_space_from_id(-1),
}, },
SpaceID: scraper.SpaceID("some_id_-1"),
IsConversationScraped: true, IsConversationScraped: true,
LastScrapedAt: scraper.TimestampFromUnix(100000000), LastScrapedAt: scraper.TimestampFromUnix(100000000),
} }
@ -241,6 +242,9 @@ func create_dummy_tweet() scraper.Tweet {
poll := create_poll_from_id(rand.Int()) poll := create_poll_from_id(rand.Int())
poll.TweetID = tweet_id poll.TweetID = tweet_id
space := create_space_from_id(rand.Int())
space_id := space.ID
return scraper.Tweet{ return scraper.Tweet{
ID: tweet_id, ID: tweet_id,
UserID: -1, UserID: -1,
@ -257,7 +261,8 @@ func create_dummy_tweet() scraper.Tweet {
ReplyMentions: []scraper.UserHandle{"replymention1", "replymention2"}, ReplyMentions: []scraper.UserHandle{"replymention1", "replymention2"},
Hashtags: []string{"hash1", "hash2"}, Hashtags: []string{"hash1", "hash2"},
Polls: []scraper.Poll{poll}, Polls: []scraper.Poll{poll},
Spaces: []scraper.Space{create_space_from_id(rand.Int())}, Spaces: []scraper.Space{space},
SpaceID: space_id,
} }
} }
@ -302,5 +307,13 @@ func create_space_from_id(id int) scraper.Space {
return scraper.Space{ return scraper.Space{
ID: scraper.SpaceID(fmt.Sprintf("some_id_%d", id)), ID: scraper.SpaceID(fmt.Sprintf("some_id_%d", id)),
ShortUrl: fmt.Sprintf("short_url_%d", id), ShortUrl: fmt.Sprintf("short_url_%d", id),
State: "Ended",
Title: "Some Title",
CreatedAt: scraper.TimestampFromUnix(1000),
StartedAt: scraper.TimestampFromUnix(2000),
EndedAt: scraper.TimestampFromUnix(3000),
UpdatedAt: scraper.TimestampFromUnix(4000),
CreatedById: -1,
ParticipantIds: []scraper.UserID{-1},
} }
} }