From 58c19df11a8d9571c8802c401d03876b739ba86e Mon Sep 17 00:00:00 2001 From: Alessio Date: Thu, 24 Nov 2022 19:08:00 -0500 Subject: [PATCH] Add Space persistence queries for Space details --- persistence/media_queries.go | 19 ------- persistence/media_queries_test.go | 19 ------- persistence/schema.sql | 24 ++++++++- persistence/space_queries.go | 85 ++++++++++++++++++++++++++++++ persistence/space_queries_test.go | 62 ++++++++++++++++++++++ persistence/tweet_queries.go | 13 +++-- persistence/tweet_trove_queries.go | 16 ++++-- persistence/utils_test.go | 19 +++++-- 8 files changed, 204 insertions(+), 53 deletions(-) create mode 100644 persistence/space_queries.go create mode 100644 persistence/space_queries_test.go diff --git a/persistence/media_queries.go b/persistence/media_queries.go index 38ef346..5b42fe9 100644 --- a/persistence/media_queries.go +++ b/persistence/media_queries.go @@ -102,17 +102,6 @@ func (p Profile) SavePoll(poll scraper.Poll) error { return nil } -/** - * Save a Space - */ -func (p Profile) SaveSpace(space scraper.Space) error { - _, err := p.DB.NamedExec(`insert into spaces (id, short_url) values (:id, :short_url) on conflict do nothing`, space) - if err != nil { - return fmt.Errorf("Error saving Space (ID %s):\n %w", space.ID, err) - } - return nil -} - /** * Get the list of images for a tweet */ @@ -162,11 +151,3 @@ func (p Profile) GetPollsForTweet(t scraper.Tweet) (polls []scraper.Poll, err er `, t.ID) return } - -/** - * Get a Space by ID - */ -func (p Profile) GetSpace(id scraper.SpaceID) (space scraper.Space, err error) { - err = p.DB.Get(&space, `select id, short_url from spaces where id = ?`, id) - return -} diff --git a/persistence/media_queries_test.go b/persistence/media_queries_test.go index 703d0a6..3cfba4a 100644 --- a/persistence/media_queries_test.go +++ b/persistence/media_queries_test.go @@ -277,22 +277,3 @@ func TestModifyPoll(t *testing.T) { t.Error(diff) } } - -/** - * Create a Space, save it, reload it, and make sure it comes back the same - */ -func TestSaveAndLoadSpace(t *testing.T) { - require := require.New(t) - profile_path := "test_profiles/TestMediaQueries" - profile := create_or_load_profile(profile_path) - - space := create_space_from_id(rand.Int()) - err := profile.SaveSpace(space) - require.NoError(err) - - new_space, err := profile.GetSpace(space.ID) - require.NoError(err) - if diff := deep.Equal(space, new_space); diff != nil { - t.Error(diff) - } -} diff --git a/persistence/schema.sql b/persistence/schema.sql index 70b8044..3acd97d 100644 --- a/persistence/schema.sql +++ b/persistence/schema.sql @@ -116,7 +116,29 @@ create table polls (rowid integer primary key, create table spaces(rowid integer primary key, id text unique not null, - short_url text not null + created_by_id integer, + short_url text not null, + state text not null, + title text not null, + created_at integer not null, + started_at integer not null, + ended_at integer not null, + updated_at integer not null, + is_available_for_replay boolean not null, + replay_watch_count integer, + live_listeners_count integer, + is_details_fetched boolean not null default 0, + + foreign key(created_by_id) references users(id) +); + +create table space_participants(rowid integer primary key, + user_id integer not null, + space_id not null, + + foreign key(space_id) references spaces(id) + -- No foreign key for users, since they may not be downloaded yet and I don't want to + -- download every user who joins a space ); create table images (rowid integer primary key, diff --git a/persistence/space_queries.go b/persistence/space_queries.go new file mode 100644 index 0000000..55d65e0 --- /dev/null +++ b/persistence/space_queries.go @@ -0,0 +1,85 @@ +package persistence + +import ( + "database/sql" + "errors" + "fmt" + "offline_twitter/scraper" +) + +type SpaceParticipant struct { + UserID scraper.UserID `db:"user_id"` + SpaceID scraper.SpaceID `db:"space_id"` +} + +/** + * Save a Space + */ +func (p Profile) SaveSpace(s scraper.Space) error { + _, err := p.DB.NamedExec(` + insert into spaces (id, created_by_id, short_url, state, title, created_at, started_at, ended_at, updated_at, + is_available_for_replay, replay_watch_count, live_listeners_count, is_details_fetched) + values (:id, nullif(:created_by_id, 0), :short_url, :state, :title, :created_at, :started_at, :ended_at, :updated_at, + :is_available_for_replay, :replay_watch_count, :live_listeners_count, :is_details_fetched) + on conflict do update + set id=:id, + created_by_id=case when created_by_id is not null then created_by_id else nullif(:created_by_id, 0) end, + short_url=case when short_url == "" then :short_url else short_url end, + state=:state, + title=:title, + updated_at=:updated_at, + is_available_for_replay=:is_available_for_replay, + replay_watch_count=:replay_watch_count, + live_listeners_count=:live_listeners_count, + is_details_fetched=:is_details_fetched + `, &s) + if err != nil { + return fmt.Errorf("Error saving space (space ID %q, value: %#v):\n %w", s.ID, s, err) + } + + space_participants := []SpaceParticipant{} + for _, participant_id := range s.ParticipantIds { + space_participants = append(space_participants, SpaceParticipant{UserID: participant_id, SpaceID: s.ID}) + } + if len(space_participants) > 0 { + _, err = p.DB.NamedExec(` + insert into space_participants (user_id, space_id) values (:user_id, :space_id) + `, space_participants) + if err != nil { + return fmt.Errorf("Error saving participants (space ID %q, participants: %#v):\n %w", s.ID, space_participants, err) + } + } + return nil +} + +/** + * Get a Space by ID + */ +func (p Profile) GetSpaceById(id scraper.SpaceID) (space scraper.Space, err error) { + err = p.DB.Get(&space, + `select id, created_by_id, short_url, state, title, created_at, started_at, ended_at, updated_at, is_available_for_replay, + replay_watch_count, live_listeners_count, is_details_fetched + from spaces + where id = ?`, id) + if err != nil { + return + } + space.ParticipantIds = []scraper.UserID{} + rows, err := p.DB.Query(`select user_id from space_participants where space_id = ?`, id) + if errors.Is(err, sql.ErrNoRows) { + err = nil + } + if err != nil { + panic(err) + } + var participant_id scraper.UserID + for rows.Next() { + err = rows.Scan(&participant_id) + if err != nil { + panic(err) + } + space.ParticipantIds = append(space.ParticipantIds, participant_id) + } + + return +} diff --git a/persistence/space_queries_test.go b/persistence/space_queries_test.go new file mode 100644 index 0000000..a9037a5 --- /dev/null +++ b/persistence/space_queries_test.go @@ -0,0 +1,62 @@ +package persistence_test + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "offline_twitter/scraper" + + "github.com/go-test/deep" + "math/rand" +) + +/** + * Create a Space, save it, reload it, and make sure it comes back the same + */ +func TestSaveAndLoadSpace(t *testing.T) { + require := require.New(t) + profile_path := "test_profiles/TestMediaQueries" + profile := create_or_load_profile(profile_path) + + space := create_space_from_id(rand.Int()) + err := profile.SaveSpace(space) + require.NoError(err) + + new_space, err := profile.GetSpaceById(space.ID) + require.NoError(err) + if diff := deep.Equal(space, new_space); diff != nil { + t.Error(diff) + } +} + +func TestNoWorseningSpace(t *testing.T) { + require := require.New(t) + assert := assert.New(t) + profile_path := "test_profiles/TestMediaQueries" + profile := create_or_load_profile(profile_path) + + space := create_space_from_id(rand.Int()) + space.ShortUrl = "Some Short Url" + space.CreatedAt = scraper.TimestampFromUnix(1000) + space.CreatedById = scraper.UserID(-1) + + // Save the space + err := profile.SaveSpace(space) + require.NoError(err) + + // Worsen the space, then re-save + space.ShortUrl = "" + space.CreatedAt = scraper.TimestampFromUnix(0) + space.CreatedById = scraper.UserID(0) + err = profile.SaveSpace(space) + require.NoError(err) + + // Reload it + new_space, err := profile.GetSpaceById(space.ID) + require.NoError(err) + + assert.Equal(new_space.ShortUrl, "Some Short Url") + assert.Equal(new_space.CreatedAt, scraper.TimestampFromUnix(1000)) + assert.Equal(new_space.CreatedById, scraper.UserID(-1)) +} diff --git a/persistence/tweet_queries.go b/persistence/tweet_queries.go index a731b14..c1eae53 100644 --- a/persistence/tweet_queries.go +++ b/persistence/tweet_queries.go @@ -14,13 +14,12 @@ func (p Profile) SaveTweet(t scraper.Tweet) error { tx := db.MustBegin() - var space_id scraper.SpaceID + // Has to be done first since Tweet has a foreign key to Space for _, space := range t.Spaces { err := p.SaveSpace(space) if err != nil { return err } - space_id = space.ID } _, err := db.Exec(` @@ -54,7 +53,8 @@ func (p Profile) SaveTweet(t scraper.Tweet) error { `, t.ID, t.UserID, t.Text, t.PostedAt, t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.InReplyToID, t.QuotedTweetID, scraper.JoinArrayOfHandles(t.Mentions), scraper.JoinArrayOfHandles(t.ReplyMentions), - strings.Join(t.Hashtags, ","), space_id, t.TombstoneType, t.IsStub, t.IsContentDownloaded, t.IsConversationScraped, t.LastScrapedAt, + strings.Join(t.Hashtags, ","), t.SpaceID, t.TombstoneType, t.IsStub, t.IsContentDownloaded, t.IsConversationScraped, + t.LastScrapedAt, t.Text, t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.IsStub, t.TombstoneType, t.TombstoneType, t.IsContentDownloaded, t.IsConversationScraped, t.LastScrapedAt, @@ -136,11 +136,10 @@ func (p Profile) GetTweetById(id scraper.TweetID) (scraper.Tweet, error) { var mentions string var reply_mentions string var hashtags string - var space_id scraper.SpaceID row := stmt.QueryRow(id) err = row.Scan(&t.ID, &t.UserID, &t.Text, &t.PostedAt, &t.NumLikes, &t.NumRetweets, &t.NumReplies, &t.NumQuoteTweets, &t.InReplyToID, - &t.QuotedTweetID, &mentions, &reply_mentions, &hashtags, &space_id, &t.TombstoneType, &t.IsStub, &t.IsContentDownloaded, + &t.QuotedTweetID, &mentions, &reply_mentions, &hashtags, &t.SpaceID, &t.TombstoneType, &t.IsStub, &t.IsContentDownloaded, &t.IsConversationScraped, &t.LastScrapedAt) if err != nil { return t, fmt.Errorf("Error parsing result in GetTweetByID(%d):\n %w", id, err) @@ -166,8 +165,8 @@ func (p Profile) GetTweetById(id scraper.TweetID) (scraper.Tweet, error) { } t.Spaces = []scraper.Space{} - if space_id != "" { - space, err := p.GetSpace(space_id) + if t.SpaceID != "" { + space, err := p.GetSpaceById(t.SpaceID) if err != nil { return t, err } diff --git a/persistence/tweet_trove_queries.go b/persistence/tweet_trove_queries.go index e89993b..fae09bc 100644 --- a/persistence/tweet_trove_queries.go +++ b/persistence/tweet_trove_queries.go @@ -31,6 +31,12 @@ func (p Profile) SaveTweetTrove(trove TweetTrove) { trove.Retweets[j] = retweet } } + for j, space := range trove.Spaces { + if space.CreatedById == trove.Users[i].ID { + space.CreatedById = u.ID + trove.Spaces[j] = space + } + } trove.Users[i] = u // Download their tiny profile image @@ -40,10 +46,12 @@ func (p Profile) SaveTweetTrove(trove TweetTrove) { } } - // TODO: this is called earlier in the process as well, before parsing. Is that call redundant? Too tired to figure out right now - // Update: Yes it's redundant. Places that return tweet troves should call `PostProcess` - // before returning, which includes `FillMissingUserIDs`. - // trove.FillMissingUserIDs() + for _, s := range trove.Spaces { + err := p.SaveSpace(s) + if err != nil { + panic(fmt.Errorf("Error saving space with ID %s:\n %w", s.ID, err)) + } + } for _, t := range trove.Tweets { err := p.SaveTweet(t) diff --git a/persistence/utils_test.go b/persistence/utils_test.go index 7c0463c..05fd4fc 100644 --- a/persistence/utils_test.go +++ b/persistence/utils_test.go @@ -174,6 +174,7 @@ func create_stable_tweet() scraper.Tweet { Spaces: []scraper.Space{ create_space_from_id(-1), }, + SpaceID: scraper.SpaceID("some_id_-1"), IsConversationScraped: true, LastScrapedAt: scraper.TimestampFromUnix(100000000), } @@ -241,6 +242,9 @@ func create_dummy_tweet() scraper.Tweet { poll := create_poll_from_id(rand.Int()) poll.TweetID = tweet_id + space := create_space_from_id(rand.Int()) + space_id := space.ID + return scraper.Tweet{ ID: tweet_id, UserID: -1, @@ -257,7 +261,8 @@ func create_dummy_tweet() scraper.Tweet { ReplyMentions: []scraper.UserHandle{"replymention1", "replymention2"}, Hashtags: []string{"hash1", "hash2"}, Polls: []scraper.Poll{poll}, - Spaces: []scraper.Space{create_space_from_id(rand.Int())}, + Spaces: []scraper.Space{space}, + SpaceID: space_id, } } @@ -300,7 +305,15 @@ func create_dummy_retweet(tweet_id scraper.TweetID) scraper.Retweet { */ func create_space_from_id(id int) scraper.Space { return scraper.Space{ - ID: scraper.SpaceID(fmt.Sprintf("some_id_%d", id)), - ShortUrl: fmt.Sprintf("short_url_%d", id), + ID: scraper.SpaceID(fmt.Sprintf("some_id_%d", id)), + ShortUrl: fmt.Sprintf("short_url_%d", id), + State: "Ended", + Title: "Some Title", + CreatedAt: scraper.TimestampFromUnix(1000), + StartedAt: scraper.TimestampFromUnix(2000), + EndedAt: scraper.TimestampFromUnix(3000), + UpdatedAt: scraper.TimestampFromUnix(4000), + CreatedById: -1, + ParticipantIds: []scraper.UserID{-1}, } }