Add Space persistence queries for Space details

2022-11-24 19:08:00 -05:00 · 2022-11-24 19:08:00 -05:00 · 58c19df11a
commit 58c19df11a
parent a81d0e80fe
8 changed files with 204 additions and 53 deletions
--- a/persistence/media_queries.go
+++ b/persistence/media_queries.go
@ -102,17 +102,6 @@ func (p Profile) SavePoll(poll scraper.Poll) error {
 	return nil
 }
 /**
 * Save a Space
 */
 func (p Profile) SaveSpace(space scraper.Space) error {
 	_, err := p.DB.NamedExec(`insert into spaces (id, short_url) values (:id, :short_url) on conflict do nothing`, space)
 	if err != nil {
 		return fmt.Errorf("Error saving Space (ID %s):\n  %w", space.ID, err)
 	}
 	return nil
 }
 /**
 * Get the list of images for a tweet
 */
@ -162,11 +151,3 @@ func (p Profile) GetPollsForTweet(t scraper.Tweet) (polls []scraper.Poll, err er
 	`, t.ID)
 	return
 }
 /**
 * Get a Space by ID
 */
 func (p Profile) GetSpace(id scraper.SpaceID) (space scraper.Space, err error) {
 	err = p.DB.Get(&space, `select id, short_url from spaces where id = ?`, id)
 	return
 }
--- a/persistence/media_queries_test.go
+++ b/persistence/media_queries_test.go
@ -277,22 +277,3 @@ func TestModifyPoll(t *testing.T) {
 		t.Error(diff)
 	}
 }
 /**
 * Create a Space, save it, reload it, and make sure it comes back the same
 */
 func TestSaveAndLoadSpace(t *testing.T) {
 	require := require.New(t)
 	profile_path := "test_profiles/TestMediaQueries"
 	profile := create_or_load_profile(profile_path)
 	space := create_space_from_id(rand.Int())
 	err := profile.SaveSpace(space)
 	require.NoError(err)
 	new_space, err := profile.GetSpace(space.ID)
 	require.NoError(err)
 	if diff := deep.Equal(space, new_space); diff != nil {
 		t.Error(diff)
 	}
 }
--- a/persistence/schema.sql
+++ b/persistence/schema.sql
@ -116,7 +116,29 @@ create table polls (rowid integer primary key,
 create table spaces(rowid integer primary key,
    id text unique not null,
-    short_url text not null
+    created_by_id integer,
    short_url text not null,
    state text not null,
    title text not null,
    created_at integer not null,
    started_at integer not null,
    ended_at integer not null,
    updated_at integer not null,
    is_available_for_replay boolean not null,
    replay_watch_count integer,
    live_listeners_count integer,
    is_details_fetched boolean not null default 0,
    foreign key(created_by_id) references users(id)
 );
 create table space_participants(rowid integer primary key,
    user_id integer not null,
    space_id not null,
    foreign key(space_id) references spaces(id)
    -- No foreign key for users, since they may not be downloaded yet and I don't want to
    -- download every user who joins a space
 );
 create table images (rowid integer primary key,
--- a/persistence/space_queries.go
+++ b/persistence/space_queries.go
@ -0,0 +1,85 @@
 package persistence
 import (
 	"database/sql"
 	"errors"
 	"fmt"
 	"offline_twitter/scraper"
 )
 type SpaceParticipant struct {
 	UserID  scraper.UserID  `db:"user_id"`
 	SpaceID scraper.SpaceID `db:"space_id"`
 }
 /**
 * Save a Space
 */
 func (p Profile) SaveSpace(s scraper.Space) error {
 	_, err := p.DB.NamedExec(`
 		insert into spaces (id, created_by_id, short_url, state, title, created_at, started_at, ended_at, updated_at,
 		                    is_available_for_replay, replay_watch_count, live_listeners_count, is_details_fetched)
 		values (:id, nullif(:created_by_id, 0), :short_url, :state, :title, :created_at, :started_at, :ended_at, :updated_at,
 			    :is_available_for_replay, :replay_watch_count, :live_listeners_count, :is_details_fetched)
 		    on conflict do update
 		   set id=:id,
 		       created_by_id=case when created_by_id is not null then created_by_id else nullif(:created_by_id, 0) end,
 		       short_url=case when short_url == "" then :short_url else short_url end,
 		       state=:state,
 		       title=:title,
 		       updated_at=:updated_at,
 		       is_available_for_replay=:is_available_for_replay,
 		       replay_watch_count=:replay_watch_count,
 		       live_listeners_count=:live_listeners_count,
 		       is_details_fetched=:is_details_fetched
 	`, &s)
 	if err != nil {
 		return fmt.Errorf("Error saving space (space ID %q, value: %#v):\n  %w", s.ID, s, err)
 	}
 	space_participants := []SpaceParticipant{}
 	for _, participant_id := range s.ParticipantIds {
 		space_participants = append(space_participants, SpaceParticipant{UserID: participant_id, SpaceID: s.ID})
 	}
 	if len(space_participants) > 0 {
 		_, err = p.DB.NamedExec(`
 			insert into space_participants (user_id, space_id) values (:user_id, :space_id)
 		`, space_participants)
 		if err != nil {
 			return fmt.Errorf("Error saving participants (space ID %q, participants: %#v):\n  %w", s.ID, space_participants, err)
 		}
 	}
 	return nil
 }
 /**
 * Get a Space by ID
 */
 func (p Profile) GetSpaceById(id scraper.SpaceID) (space scraper.Space, err error) {
 	err = p.DB.Get(&space,
 		`select id, created_by_id, short_url, state, title, created_at, started_at, ended_at, updated_at, is_available_for_replay,
 	            replay_watch_count, live_listeners_count, is_details_fetched
 	       from spaces
 	      where id = ?`, id)
 	if err != nil {
 		return
 	}
 	space.ParticipantIds = []scraper.UserID{}
 	rows, err := p.DB.Query(`select user_id from space_participants where space_id = ?`, id)
 	if errors.Is(err, sql.ErrNoRows) {
 		err = nil
 	}
 	if err != nil {
 		panic(err)
 	}
 	var participant_id scraper.UserID
 	for rows.Next() {
 		err = rows.Scan(&participant_id)
 		if err != nil {
 			panic(err)
 		}
 		space.ParticipantIds = append(space.ParticipantIds, participant_id)
 	}
 	return
 }
--- a/persistence/space_queries_test.go
+++ b/persistence/space_queries_test.go
@ -0,0 +1,62 @@
 package persistence_test
 import (
 	"testing"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"offline_twitter/scraper"
 	"github.com/go-test/deep"
 	"math/rand"
 )
 /**
 * Create a Space, save it, reload it, and make sure it comes back the same
 */
 func TestSaveAndLoadSpace(t *testing.T) {
 	require := require.New(t)
 	profile_path := "test_profiles/TestMediaQueries"
 	profile := create_or_load_profile(profile_path)
 	space := create_space_from_id(rand.Int())
 	err := profile.SaveSpace(space)
 	require.NoError(err)
 	new_space, err := profile.GetSpaceById(space.ID)
 	require.NoError(err)
 	if diff := deep.Equal(space, new_space); diff != nil {
 		t.Error(diff)
 	}
 }
 func TestNoWorseningSpace(t *testing.T) {
 	require := require.New(t)
 	assert := assert.New(t)
 	profile_path := "test_profiles/TestMediaQueries"
 	profile := create_or_load_profile(profile_path)
 	space := create_space_from_id(rand.Int())
 	space.ShortUrl = "Some Short Url"
 	space.CreatedAt = scraper.TimestampFromUnix(1000)
 	space.CreatedById = scraper.UserID(-1)
 	// Save the space
 	err := profile.SaveSpace(space)
 	require.NoError(err)
 	// Worsen the space, then re-save
 	space.ShortUrl = ""
 	space.CreatedAt = scraper.TimestampFromUnix(0)
 	space.CreatedById = scraper.UserID(0)
 	err = profile.SaveSpace(space)
 	require.NoError(err)
 	// Reload it
 	new_space, err := profile.GetSpaceById(space.ID)
 	require.NoError(err)
 	assert.Equal(new_space.ShortUrl, "Some Short Url")
 	assert.Equal(new_space.CreatedAt, scraper.TimestampFromUnix(1000))
 	assert.Equal(new_space.CreatedById, scraper.UserID(-1))
 }
--- a/persistence/tweet_queries.go
+++ b/persistence/tweet_queries.go
@ -14,13 +14,12 @@ func (p Profile) SaveTweet(t scraper.Tweet) error {
 	tx := db.MustBegin()
-	var space_id scraper.SpaceID
+	// Has to be done first since Tweet has a foreign key to Space
 	for _, space := range t.Spaces {
 		err := p.SaveSpace(space)
 		if err != nil {
 			return err
 		}
 		space_id = space.ID
 	}
 	_, err := db.Exec(`
@ -54,7 +53,8 @@ func (p Profile) SaveTweet(t scraper.Tweet) error {
        `,
 		t.ID, t.UserID, t.Text, t.PostedAt, t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.InReplyToID,
 		t.QuotedTweetID, scraper.JoinArrayOfHandles(t.Mentions), scraper.JoinArrayOfHandles(t.ReplyMentions),
-		strings.Join(t.Hashtags, ","), space_id, t.TombstoneType, t.IsStub, t.IsContentDownloaded, t.IsConversationScraped, t.LastScrapedAt,
+		strings.Join(t.Hashtags, ","), t.SpaceID, t.TombstoneType, t.IsStub, t.IsContentDownloaded, t.IsConversationScraped,
 		t.LastScrapedAt,
 		t.Text, t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.IsStub, t.TombstoneType, t.TombstoneType,
 		t.IsContentDownloaded, t.IsConversationScraped, t.LastScrapedAt,
@ -136,11 +136,10 @@ func (p Profile) GetTweetById(id scraper.TweetID) (scraper.Tweet, error) {
 	var mentions string
 	var reply_mentions string
 	var hashtags string
 	var space_id scraper.SpaceID
 	row := stmt.QueryRow(id)
 	err = row.Scan(&t.ID, &t.UserID, &t.Text, &t.PostedAt, &t.NumLikes, &t.NumRetweets, &t.NumReplies, &t.NumQuoteTweets, &t.InReplyToID,
-		&t.QuotedTweetID, &mentions, &reply_mentions, &hashtags, &space_id, &t.TombstoneType, &t.IsStub, &t.IsContentDownloaded,
+		&t.QuotedTweetID, &mentions, &reply_mentions, &hashtags, &t.SpaceID, &t.TombstoneType, &t.IsStub, &t.IsContentDownloaded,
 		&t.IsConversationScraped, &t.LastScrapedAt)
 	if err != nil {
 		return t, fmt.Errorf("Error parsing result in GetTweetByID(%d):\n  %w", id, err)
@ -166,8 +165,8 @@ func (p Profile) GetTweetById(id scraper.TweetID) (scraper.Tweet, error) {
 	}
 	t.Spaces = []scraper.Space{}
-	if space_id != "" {
+	if t.SpaceID != "" {
-		space, err := p.GetSpace(space_id)
+		space, err := p.GetSpaceById(t.SpaceID)
 		if err != nil {
 			return t, err
 		}
--- a/persistence/tweet_trove_queries.go
+++ b/persistence/tweet_trove_queries.go
@ -31,6 +31,12 @@ func (p Profile) SaveTweetTrove(trove TweetTrove) {
 				trove.Retweets[j] = retweet
 			}
 		}
 		for j, space := range trove.Spaces {
 			if space.CreatedById == trove.Users[i].ID {
 				space.CreatedById = u.ID
 				trove.Spaces[j] = space
 			}
 		}
 		trove.Users[i] = u
 		// Download their tiny profile image
@ -40,10 +46,12 @@ func (p Profile) SaveTweetTrove(trove TweetTrove) {
 		}
 	}
-	// TODO: this is called earlier in the process as well, before parsing.  Is that call redundant?  Too tired to figure out right now
+	for _, s := range trove.Spaces {
-	// Update: Yes it's redundant.  Places that return tweet troves should call `PostProcess`
+		err := p.SaveSpace(s)
-	// before returning, which includes `FillMissingUserIDs`.
+		if err != nil {
-	// trove.FillMissingUserIDs()
+			panic(fmt.Errorf("Error saving space with ID %s:\n  %w", s.ID, err))
 		}
 	}
 	for _, t := range trove.Tweets {
 		err := p.SaveTweet(t)
--- a/persistence/utils_test.go
+++ b/persistence/utils_test.go
@ -174,6 +174,7 @@ func create_stable_tweet() scraper.Tweet {
 		Spaces: []scraper.Space{
 			create_space_from_id(-1),
 		},
 		SpaceID:               scraper.SpaceID("some_id_-1"),
 		IsConversationScraped: true,
 		LastScrapedAt:         scraper.TimestampFromUnix(100000000),
 	}
@ -241,6 +242,9 @@ func create_dummy_tweet() scraper.Tweet {
 	poll := create_poll_from_id(rand.Int())
 	poll.TweetID = tweet_id
 	space := create_space_from_id(rand.Int())
 	space_id := space.ID
 	return scraper.Tweet{
 		ID:             tweet_id,
 		UserID:         -1,
@ -257,7 +261,8 @@ func create_dummy_tweet() scraper.Tweet {
 		ReplyMentions:  []scraper.UserHandle{"replymention1", "replymention2"},
 		Hashtags:       []string{"hash1", "hash2"},
 		Polls:          []scraper.Poll{poll},
-		Spaces:         []scraper.Space{create_space_from_id(rand.Int())},
+		Spaces:         []scraper.Space{space},
 		SpaceID:        space_id,
 	}
 }
@ -302,5 +307,13 @@ func create_space_from_id(id int) scraper.Space {
 	return scraper.Space{
 		ID:             scraper.SpaceID(fmt.Sprintf("some_id_%d", id)),
 		ShortUrl:       fmt.Sprintf("short_url_%d", id),
 		State:          "Ended",
 		Title:          "Some Title",
 		CreatedAt:      scraper.TimestampFromUnix(1000),
 		StartedAt:      scraper.TimestampFromUnix(2000),
 		EndedAt:        scraper.TimestampFromUnix(3000),
 		UpdatedAt:      scraper.TimestampFromUnix(4000),
 		CreatedById:    -1,
 		ParticipantIds: []scraper.UserID{-1},
 	}
 }