Add Space persistence queries for Space details
This commit is contained in:
parent
a81d0e80fe
commit
58c19df11a
@ -102,17 +102,6 @@ func (p Profile) SavePoll(poll scraper.Poll) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
/**
|
||||
* Save a Space
|
||||
*/
|
||||
func (p Profile) SaveSpace(space scraper.Space) error {
|
||||
_, err := p.DB.NamedExec(`insert into spaces (id, short_url) values (:id, :short_url) on conflict do nothing`, space)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Error saving Space (ID %s):\n %w", space.ID, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the list of images for a tweet
|
||||
*/
|
||||
@ -162,11 +151,3 @@ func (p Profile) GetPollsForTweet(t scraper.Tweet) (polls []scraper.Poll, err er
|
||||
`, t.ID)
|
||||
return
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a Space by ID
|
||||
*/
|
||||
func (p Profile) GetSpace(id scraper.SpaceID) (space scraper.Space, err error) {
|
||||
err = p.DB.Get(&space, `select id, short_url from spaces where id = ?`, id)
|
||||
return
|
||||
}
|
||||
|
@ -277,22 +277,3 @@ func TestModifyPoll(t *testing.T) {
|
||||
t.Error(diff)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a Space, save it, reload it, and make sure it comes back the same
|
||||
*/
|
||||
func TestSaveAndLoadSpace(t *testing.T) {
|
||||
require := require.New(t)
|
||||
profile_path := "test_profiles/TestMediaQueries"
|
||||
profile := create_or_load_profile(profile_path)
|
||||
|
||||
space := create_space_from_id(rand.Int())
|
||||
err := profile.SaveSpace(space)
|
||||
require.NoError(err)
|
||||
|
||||
new_space, err := profile.GetSpace(space.ID)
|
||||
require.NoError(err)
|
||||
if diff := deep.Equal(space, new_space); diff != nil {
|
||||
t.Error(diff)
|
||||
}
|
||||
}
|
||||
|
@ -116,7 +116,29 @@ create table polls (rowid integer primary key,
|
||||
|
||||
create table spaces(rowid integer primary key,
|
||||
id text unique not null,
|
||||
short_url text not null
|
||||
created_by_id integer,
|
||||
short_url text not null,
|
||||
state text not null,
|
||||
title text not null,
|
||||
created_at integer not null,
|
||||
started_at integer not null,
|
||||
ended_at integer not null,
|
||||
updated_at integer not null,
|
||||
is_available_for_replay boolean not null,
|
||||
replay_watch_count integer,
|
||||
live_listeners_count integer,
|
||||
is_details_fetched boolean not null default 0,
|
||||
|
||||
foreign key(created_by_id) references users(id)
|
||||
);
|
||||
|
||||
create table space_participants(rowid integer primary key,
|
||||
user_id integer not null,
|
||||
space_id not null,
|
||||
|
||||
foreign key(space_id) references spaces(id)
|
||||
-- No foreign key for users, since they may not be downloaded yet and I don't want to
|
||||
-- download every user who joins a space
|
||||
);
|
||||
|
||||
create table images (rowid integer primary key,
|
||||
|
85
persistence/space_queries.go
Normal file
85
persistence/space_queries.go
Normal file
@ -0,0 +1,85 @@
|
||||
package persistence
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"errors"
|
||||
"fmt"
|
||||
"offline_twitter/scraper"
|
||||
)
|
||||
|
||||
type SpaceParticipant struct {
|
||||
UserID scraper.UserID `db:"user_id"`
|
||||
SpaceID scraper.SpaceID `db:"space_id"`
|
||||
}
|
||||
|
||||
/**
|
||||
* Save a Space
|
||||
*/
|
||||
func (p Profile) SaveSpace(s scraper.Space) error {
|
||||
_, err := p.DB.NamedExec(`
|
||||
insert into spaces (id, created_by_id, short_url, state, title, created_at, started_at, ended_at, updated_at,
|
||||
is_available_for_replay, replay_watch_count, live_listeners_count, is_details_fetched)
|
||||
values (:id, nullif(:created_by_id, 0), :short_url, :state, :title, :created_at, :started_at, :ended_at, :updated_at,
|
||||
:is_available_for_replay, :replay_watch_count, :live_listeners_count, :is_details_fetched)
|
||||
on conflict do update
|
||||
set id=:id,
|
||||
created_by_id=case when created_by_id is not null then created_by_id else nullif(:created_by_id, 0) end,
|
||||
short_url=case when short_url == "" then :short_url else short_url end,
|
||||
state=:state,
|
||||
title=:title,
|
||||
updated_at=:updated_at,
|
||||
is_available_for_replay=:is_available_for_replay,
|
||||
replay_watch_count=:replay_watch_count,
|
||||
live_listeners_count=:live_listeners_count,
|
||||
is_details_fetched=:is_details_fetched
|
||||
`, &s)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Error saving space (space ID %q, value: %#v):\n %w", s.ID, s, err)
|
||||
}
|
||||
|
||||
space_participants := []SpaceParticipant{}
|
||||
for _, participant_id := range s.ParticipantIds {
|
||||
space_participants = append(space_participants, SpaceParticipant{UserID: participant_id, SpaceID: s.ID})
|
||||
}
|
||||
if len(space_participants) > 0 {
|
||||
_, err = p.DB.NamedExec(`
|
||||
insert into space_participants (user_id, space_id) values (:user_id, :space_id)
|
||||
`, space_participants)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Error saving participants (space ID %q, participants: %#v):\n %w", s.ID, space_participants, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a Space by ID
|
||||
*/
|
||||
func (p Profile) GetSpaceById(id scraper.SpaceID) (space scraper.Space, err error) {
|
||||
err = p.DB.Get(&space,
|
||||
`select id, created_by_id, short_url, state, title, created_at, started_at, ended_at, updated_at, is_available_for_replay,
|
||||
replay_watch_count, live_listeners_count, is_details_fetched
|
||||
from spaces
|
||||
where id = ?`, id)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
space.ParticipantIds = []scraper.UserID{}
|
||||
rows, err := p.DB.Query(`select user_id from space_participants where space_id = ?`, id)
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
err = nil
|
||||
}
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
var participant_id scraper.UserID
|
||||
for rows.Next() {
|
||||
err = rows.Scan(&participant_id)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
space.ParticipantIds = append(space.ParticipantIds, participant_id)
|
||||
}
|
||||
|
||||
return
|
||||
}
|
62
persistence/space_queries_test.go
Normal file
62
persistence/space_queries_test.go
Normal file
@ -0,0 +1,62 @@
|
||||
package persistence_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"offline_twitter/scraper"
|
||||
|
||||
"github.com/go-test/deep"
|
||||
"math/rand"
|
||||
)
|
||||
|
||||
/**
|
||||
* Create a Space, save it, reload it, and make sure it comes back the same
|
||||
*/
|
||||
func TestSaveAndLoadSpace(t *testing.T) {
|
||||
require := require.New(t)
|
||||
profile_path := "test_profiles/TestMediaQueries"
|
||||
profile := create_or_load_profile(profile_path)
|
||||
|
||||
space := create_space_from_id(rand.Int())
|
||||
err := profile.SaveSpace(space)
|
||||
require.NoError(err)
|
||||
|
||||
new_space, err := profile.GetSpaceById(space.ID)
|
||||
require.NoError(err)
|
||||
if diff := deep.Equal(space, new_space); diff != nil {
|
||||
t.Error(diff)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNoWorseningSpace(t *testing.T) {
|
||||
require := require.New(t)
|
||||
assert := assert.New(t)
|
||||
profile_path := "test_profiles/TestMediaQueries"
|
||||
profile := create_or_load_profile(profile_path)
|
||||
|
||||
space := create_space_from_id(rand.Int())
|
||||
space.ShortUrl = "Some Short Url"
|
||||
space.CreatedAt = scraper.TimestampFromUnix(1000)
|
||||
space.CreatedById = scraper.UserID(-1)
|
||||
|
||||
// Save the space
|
||||
err := profile.SaveSpace(space)
|
||||
require.NoError(err)
|
||||
|
||||
// Worsen the space, then re-save
|
||||
space.ShortUrl = ""
|
||||
space.CreatedAt = scraper.TimestampFromUnix(0)
|
||||
space.CreatedById = scraper.UserID(0)
|
||||
err = profile.SaveSpace(space)
|
||||
require.NoError(err)
|
||||
|
||||
// Reload it
|
||||
new_space, err := profile.GetSpaceById(space.ID)
|
||||
require.NoError(err)
|
||||
|
||||
assert.Equal(new_space.ShortUrl, "Some Short Url")
|
||||
assert.Equal(new_space.CreatedAt, scraper.TimestampFromUnix(1000))
|
||||
assert.Equal(new_space.CreatedById, scraper.UserID(-1))
|
||||
}
|
@ -14,13 +14,12 @@ func (p Profile) SaveTweet(t scraper.Tweet) error {
|
||||
|
||||
tx := db.MustBegin()
|
||||
|
||||
var space_id scraper.SpaceID
|
||||
// Has to be done first since Tweet has a foreign key to Space
|
||||
for _, space := range t.Spaces {
|
||||
err := p.SaveSpace(space)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
space_id = space.ID
|
||||
}
|
||||
|
||||
_, err := db.Exec(`
|
||||
@ -54,7 +53,8 @@ func (p Profile) SaveTweet(t scraper.Tweet) error {
|
||||
`,
|
||||
t.ID, t.UserID, t.Text, t.PostedAt, t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.InReplyToID,
|
||||
t.QuotedTweetID, scraper.JoinArrayOfHandles(t.Mentions), scraper.JoinArrayOfHandles(t.ReplyMentions),
|
||||
strings.Join(t.Hashtags, ","), space_id, t.TombstoneType, t.IsStub, t.IsContentDownloaded, t.IsConversationScraped, t.LastScrapedAt,
|
||||
strings.Join(t.Hashtags, ","), t.SpaceID, t.TombstoneType, t.IsStub, t.IsContentDownloaded, t.IsConversationScraped,
|
||||
t.LastScrapedAt,
|
||||
|
||||
t.Text, t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.IsStub, t.TombstoneType, t.TombstoneType,
|
||||
t.IsContentDownloaded, t.IsConversationScraped, t.LastScrapedAt,
|
||||
@ -136,11 +136,10 @@ func (p Profile) GetTweetById(id scraper.TweetID) (scraper.Tweet, error) {
|
||||
var mentions string
|
||||
var reply_mentions string
|
||||
var hashtags string
|
||||
var space_id scraper.SpaceID
|
||||
|
||||
row := stmt.QueryRow(id)
|
||||
err = row.Scan(&t.ID, &t.UserID, &t.Text, &t.PostedAt, &t.NumLikes, &t.NumRetweets, &t.NumReplies, &t.NumQuoteTweets, &t.InReplyToID,
|
||||
&t.QuotedTweetID, &mentions, &reply_mentions, &hashtags, &space_id, &t.TombstoneType, &t.IsStub, &t.IsContentDownloaded,
|
||||
&t.QuotedTweetID, &mentions, &reply_mentions, &hashtags, &t.SpaceID, &t.TombstoneType, &t.IsStub, &t.IsContentDownloaded,
|
||||
&t.IsConversationScraped, &t.LastScrapedAt)
|
||||
if err != nil {
|
||||
return t, fmt.Errorf("Error parsing result in GetTweetByID(%d):\n %w", id, err)
|
||||
@ -166,8 +165,8 @@ func (p Profile) GetTweetById(id scraper.TweetID) (scraper.Tweet, error) {
|
||||
}
|
||||
|
||||
t.Spaces = []scraper.Space{}
|
||||
if space_id != "" {
|
||||
space, err := p.GetSpace(space_id)
|
||||
if t.SpaceID != "" {
|
||||
space, err := p.GetSpaceById(t.SpaceID)
|
||||
if err != nil {
|
||||
return t, err
|
||||
}
|
||||
|
@ -31,6 +31,12 @@ func (p Profile) SaveTweetTrove(trove TweetTrove) {
|
||||
trove.Retweets[j] = retweet
|
||||
}
|
||||
}
|
||||
for j, space := range trove.Spaces {
|
||||
if space.CreatedById == trove.Users[i].ID {
|
||||
space.CreatedById = u.ID
|
||||
trove.Spaces[j] = space
|
||||
}
|
||||
}
|
||||
trove.Users[i] = u
|
||||
|
||||
// Download their tiny profile image
|
||||
@ -40,10 +46,12 @@ func (p Profile) SaveTweetTrove(trove TweetTrove) {
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: this is called earlier in the process as well, before parsing. Is that call redundant? Too tired to figure out right now
|
||||
// Update: Yes it's redundant. Places that return tweet troves should call `PostProcess`
|
||||
// before returning, which includes `FillMissingUserIDs`.
|
||||
// trove.FillMissingUserIDs()
|
||||
for _, s := range trove.Spaces {
|
||||
err := p.SaveSpace(s)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("Error saving space with ID %s:\n %w", s.ID, err))
|
||||
}
|
||||
}
|
||||
|
||||
for _, t := range trove.Tweets {
|
||||
err := p.SaveTweet(t)
|
||||
|
@ -174,6 +174,7 @@ func create_stable_tweet() scraper.Tweet {
|
||||
Spaces: []scraper.Space{
|
||||
create_space_from_id(-1),
|
||||
},
|
||||
SpaceID: scraper.SpaceID("some_id_-1"),
|
||||
IsConversationScraped: true,
|
||||
LastScrapedAt: scraper.TimestampFromUnix(100000000),
|
||||
}
|
||||
@ -241,6 +242,9 @@ func create_dummy_tweet() scraper.Tweet {
|
||||
poll := create_poll_from_id(rand.Int())
|
||||
poll.TweetID = tweet_id
|
||||
|
||||
space := create_space_from_id(rand.Int())
|
||||
space_id := space.ID
|
||||
|
||||
return scraper.Tweet{
|
||||
ID: tweet_id,
|
||||
UserID: -1,
|
||||
@ -257,7 +261,8 @@ func create_dummy_tweet() scraper.Tweet {
|
||||
ReplyMentions: []scraper.UserHandle{"replymention1", "replymention2"},
|
||||
Hashtags: []string{"hash1", "hash2"},
|
||||
Polls: []scraper.Poll{poll},
|
||||
Spaces: []scraper.Space{create_space_from_id(rand.Int())},
|
||||
Spaces: []scraper.Space{space},
|
||||
SpaceID: space_id,
|
||||
}
|
||||
}
|
||||
|
||||
@ -302,5 +307,13 @@ func create_space_from_id(id int) scraper.Space {
|
||||
return scraper.Space{
|
||||
ID: scraper.SpaceID(fmt.Sprintf("some_id_%d", id)),
|
||||
ShortUrl: fmt.Sprintf("short_url_%d", id),
|
||||
State: "Ended",
|
||||
Title: "Some Title",
|
||||
CreatedAt: scraper.TimestampFromUnix(1000),
|
||||
StartedAt: scraper.TimestampFromUnix(2000),
|
||||
EndedAt: scraper.TimestampFromUnix(3000),
|
||||
UpdatedAt: scraper.TimestampFromUnix(4000),
|
||||
CreatedById: -1,
|
||||
ParticipantIds: []scraper.UserID{-1},
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user