Add Space persistence queries for Space details
This commit is contained in:
parent
a81d0e80fe
commit
58c19df11a
@ -102,17 +102,6 @@ func (p Profile) SavePoll(poll scraper.Poll) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Save a Space
|
|
||||||
*/
|
|
||||||
func (p Profile) SaveSpace(space scraper.Space) error {
|
|
||||||
_, err := p.DB.NamedExec(`insert into spaces (id, short_url) values (:id, :short_url) on conflict do nothing`, space)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("Error saving Space (ID %s):\n %w", space.ID, err)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the list of images for a tweet
|
* Get the list of images for a tweet
|
||||||
*/
|
*/
|
||||||
@ -162,11 +151,3 @@ func (p Profile) GetPollsForTweet(t scraper.Tweet) (polls []scraper.Poll, err er
|
|||||||
`, t.ID)
|
`, t.ID)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Get a Space by ID
|
|
||||||
*/
|
|
||||||
func (p Profile) GetSpace(id scraper.SpaceID) (space scraper.Space, err error) {
|
|
||||||
err = p.DB.Get(&space, `select id, short_url from spaces where id = ?`, id)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
@ -277,22 +277,3 @@ func TestModifyPoll(t *testing.T) {
|
|||||||
t.Error(diff)
|
t.Error(diff)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Create a Space, save it, reload it, and make sure it comes back the same
|
|
||||||
*/
|
|
||||||
func TestSaveAndLoadSpace(t *testing.T) {
|
|
||||||
require := require.New(t)
|
|
||||||
profile_path := "test_profiles/TestMediaQueries"
|
|
||||||
profile := create_or_load_profile(profile_path)
|
|
||||||
|
|
||||||
space := create_space_from_id(rand.Int())
|
|
||||||
err := profile.SaveSpace(space)
|
|
||||||
require.NoError(err)
|
|
||||||
|
|
||||||
new_space, err := profile.GetSpace(space.ID)
|
|
||||||
require.NoError(err)
|
|
||||||
if diff := deep.Equal(space, new_space); diff != nil {
|
|
||||||
t.Error(diff)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
@ -116,7 +116,29 @@ create table polls (rowid integer primary key,
|
|||||||
|
|
||||||
create table spaces(rowid integer primary key,
|
create table spaces(rowid integer primary key,
|
||||||
id text unique not null,
|
id text unique not null,
|
||||||
short_url text not null
|
created_by_id integer,
|
||||||
|
short_url text not null,
|
||||||
|
state text not null,
|
||||||
|
title text not null,
|
||||||
|
created_at integer not null,
|
||||||
|
started_at integer not null,
|
||||||
|
ended_at integer not null,
|
||||||
|
updated_at integer not null,
|
||||||
|
is_available_for_replay boolean not null,
|
||||||
|
replay_watch_count integer,
|
||||||
|
live_listeners_count integer,
|
||||||
|
is_details_fetched boolean not null default 0,
|
||||||
|
|
||||||
|
foreign key(created_by_id) references users(id)
|
||||||
|
);
|
||||||
|
|
||||||
|
create table space_participants(rowid integer primary key,
|
||||||
|
user_id integer not null,
|
||||||
|
space_id not null,
|
||||||
|
|
||||||
|
foreign key(space_id) references spaces(id)
|
||||||
|
-- No foreign key for users, since they may not be downloaded yet and I don't want to
|
||||||
|
-- download every user who joins a space
|
||||||
);
|
);
|
||||||
|
|
||||||
create table images (rowid integer primary key,
|
create table images (rowid integer primary key,
|
||||||
|
85
persistence/space_queries.go
Normal file
85
persistence/space_queries.go
Normal file
@ -0,0 +1,85 @@
|
|||||||
|
package persistence
|
||||||
|
|
||||||
|
import (
|
||||||
|
"database/sql"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"offline_twitter/scraper"
|
||||||
|
)
|
||||||
|
|
||||||
|
type SpaceParticipant struct {
|
||||||
|
UserID scraper.UserID `db:"user_id"`
|
||||||
|
SpaceID scraper.SpaceID `db:"space_id"`
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Save a Space
|
||||||
|
*/
|
||||||
|
func (p Profile) SaveSpace(s scraper.Space) error {
|
||||||
|
_, err := p.DB.NamedExec(`
|
||||||
|
insert into spaces (id, created_by_id, short_url, state, title, created_at, started_at, ended_at, updated_at,
|
||||||
|
is_available_for_replay, replay_watch_count, live_listeners_count, is_details_fetched)
|
||||||
|
values (:id, nullif(:created_by_id, 0), :short_url, :state, :title, :created_at, :started_at, :ended_at, :updated_at,
|
||||||
|
:is_available_for_replay, :replay_watch_count, :live_listeners_count, :is_details_fetched)
|
||||||
|
on conflict do update
|
||||||
|
set id=:id,
|
||||||
|
created_by_id=case when created_by_id is not null then created_by_id else nullif(:created_by_id, 0) end,
|
||||||
|
short_url=case when short_url == "" then :short_url else short_url end,
|
||||||
|
state=:state,
|
||||||
|
title=:title,
|
||||||
|
updated_at=:updated_at,
|
||||||
|
is_available_for_replay=:is_available_for_replay,
|
||||||
|
replay_watch_count=:replay_watch_count,
|
||||||
|
live_listeners_count=:live_listeners_count,
|
||||||
|
is_details_fetched=:is_details_fetched
|
||||||
|
`, &s)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("Error saving space (space ID %q, value: %#v):\n %w", s.ID, s, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
space_participants := []SpaceParticipant{}
|
||||||
|
for _, participant_id := range s.ParticipantIds {
|
||||||
|
space_participants = append(space_participants, SpaceParticipant{UserID: participant_id, SpaceID: s.ID})
|
||||||
|
}
|
||||||
|
if len(space_participants) > 0 {
|
||||||
|
_, err = p.DB.NamedExec(`
|
||||||
|
insert into space_participants (user_id, space_id) values (:user_id, :space_id)
|
||||||
|
`, space_participants)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("Error saving participants (space ID %q, participants: %#v):\n %w", s.ID, space_participants, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get a Space by ID
|
||||||
|
*/
|
||||||
|
func (p Profile) GetSpaceById(id scraper.SpaceID) (space scraper.Space, err error) {
|
||||||
|
err = p.DB.Get(&space,
|
||||||
|
`select id, created_by_id, short_url, state, title, created_at, started_at, ended_at, updated_at, is_available_for_replay,
|
||||||
|
replay_watch_count, live_listeners_count, is_details_fetched
|
||||||
|
from spaces
|
||||||
|
where id = ?`, id)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
space.ParticipantIds = []scraper.UserID{}
|
||||||
|
rows, err := p.DB.Query(`select user_id from space_participants where space_id = ?`, id)
|
||||||
|
if errors.Is(err, sql.ErrNoRows) {
|
||||||
|
err = nil
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
var participant_id scraper.UserID
|
||||||
|
for rows.Next() {
|
||||||
|
err = rows.Scan(&participant_id)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
space.ParticipantIds = append(space.ParticipantIds, participant_id)
|
||||||
|
}
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
62
persistence/space_queries_test.go
Normal file
62
persistence/space_queries_test.go
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
package persistence_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
"offline_twitter/scraper"
|
||||||
|
|
||||||
|
"github.com/go-test/deep"
|
||||||
|
"math/rand"
|
||||||
|
)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a Space, save it, reload it, and make sure it comes back the same
|
||||||
|
*/
|
||||||
|
func TestSaveAndLoadSpace(t *testing.T) {
|
||||||
|
require := require.New(t)
|
||||||
|
profile_path := "test_profiles/TestMediaQueries"
|
||||||
|
profile := create_or_load_profile(profile_path)
|
||||||
|
|
||||||
|
space := create_space_from_id(rand.Int())
|
||||||
|
err := profile.SaveSpace(space)
|
||||||
|
require.NoError(err)
|
||||||
|
|
||||||
|
new_space, err := profile.GetSpaceById(space.ID)
|
||||||
|
require.NoError(err)
|
||||||
|
if diff := deep.Equal(space, new_space); diff != nil {
|
||||||
|
t.Error(diff)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNoWorseningSpace(t *testing.T) {
|
||||||
|
require := require.New(t)
|
||||||
|
assert := assert.New(t)
|
||||||
|
profile_path := "test_profiles/TestMediaQueries"
|
||||||
|
profile := create_or_load_profile(profile_path)
|
||||||
|
|
||||||
|
space := create_space_from_id(rand.Int())
|
||||||
|
space.ShortUrl = "Some Short Url"
|
||||||
|
space.CreatedAt = scraper.TimestampFromUnix(1000)
|
||||||
|
space.CreatedById = scraper.UserID(-1)
|
||||||
|
|
||||||
|
// Save the space
|
||||||
|
err := profile.SaveSpace(space)
|
||||||
|
require.NoError(err)
|
||||||
|
|
||||||
|
// Worsen the space, then re-save
|
||||||
|
space.ShortUrl = ""
|
||||||
|
space.CreatedAt = scraper.TimestampFromUnix(0)
|
||||||
|
space.CreatedById = scraper.UserID(0)
|
||||||
|
err = profile.SaveSpace(space)
|
||||||
|
require.NoError(err)
|
||||||
|
|
||||||
|
// Reload it
|
||||||
|
new_space, err := profile.GetSpaceById(space.ID)
|
||||||
|
require.NoError(err)
|
||||||
|
|
||||||
|
assert.Equal(new_space.ShortUrl, "Some Short Url")
|
||||||
|
assert.Equal(new_space.CreatedAt, scraper.TimestampFromUnix(1000))
|
||||||
|
assert.Equal(new_space.CreatedById, scraper.UserID(-1))
|
||||||
|
}
|
@ -14,13 +14,12 @@ func (p Profile) SaveTweet(t scraper.Tweet) error {
|
|||||||
|
|
||||||
tx := db.MustBegin()
|
tx := db.MustBegin()
|
||||||
|
|
||||||
var space_id scraper.SpaceID
|
// Has to be done first since Tweet has a foreign key to Space
|
||||||
for _, space := range t.Spaces {
|
for _, space := range t.Spaces {
|
||||||
err := p.SaveSpace(space)
|
err := p.SaveSpace(space)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
space_id = space.ID
|
|
||||||
}
|
}
|
||||||
|
|
||||||
_, err := db.Exec(`
|
_, err := db.Exec(`
|
||||||
@ -54,7 +53,8 @@ func (p Profile) SaveTweet(t scraper.Tweet) error {
|
|||||||
`,
|
`,
|
||||||
t.ID, t.UserID, t.Text, t.PostedAt, t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.InReplyToID,
|
t.ID, t.UserID, t.Text, t.PostedAt, t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.InReplyToID,
|
||||||
t.QuotedTweetID, scraper.JoinArrayOfHandles(t.Mentions), scraper.JoinArrayOfHandles(t.ReplyMentions),
|
t.QuotedTweetID, scraper.JoinArrayOfHandles(t.Mentions), scraper.JoinArrayOfHandles(t.ReplyMentions),
|
||||||
strings.Join(t.Hashtags, ","), space_id, t.TombstoneType, t.IsStub, t.IsContentDownloaded, t.IsConversationScraped, t.LastScrapedAt,
|
strings.Join(t.Hashtags, ","), t.SpaceID, t.TombstoneType, t.IsStub, t.IsContentDownloaded, t.IsConversationScraped,
|
||||||
|
t.LastScrapedAt,
|
||||||
|
|
||||||
t.Text, t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.IsStub, t.TombstoneType, t.TombstoneType,
|
t.Text, t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.IsStub, t.TombstoneType, t.TombstoneType,
|
||||||
t.IsContentDownloaded, t.IsConversationScraped, t.LastScrapedAt,
|
t.IsContentDownloaded, t.IsConversationScraped, t.LastScrapedAt,
|
||||||
@ -136,11 +136,10 @@ func (p Profile) GetTweetById(id scraper.TweetID) (scraper.Tweet, error) {
|
|||||||
var mentions string
|
var mentions string
|
||||||
var reply_mentions string
|
var reply_mentions string
|
||||||
var hashtags string
|
var hashtags string
|
||||||
var space_id scraper.SpaceID
|
|
||||||
|
|
||||||
row := stmt.QueryRow(id)
|
row := stmt.QueryRow(id)
|
||||||
err = row.Scan(&t.ID, &t.UserID, &t.Text, &t.PostedAt, &t.NumLikes, &t.NumRetweets, &t.NumReplies, &t.NumQuoteTweets, &t.InReplyToID,
|
err = row.Scan(&t.ID, &t.UserID, &t.Text, &t.PostedAt, &t.NumLikes, &t.NumRetweets, &t.NumReplies, &t.NumQuoteTweets, &t.InReplyToID,
|
||||||
&t.QuotedTweetID, &mentions, &reply_mentions, &hashtags, &space_id, &t.TombstoneType, &t.IsStub, &t.IsContentDownloaded,
|
&t.QuotedTweetID, &mentions, &reply_mentions, &hashtags, &t.SpaceID, &t.TombstoneType, &t.IsStub, &t.IsContentDownloaded,
|
||||||
&t.IsConversationScraped, &t.LastScrapedAt)
|
&t.IsConversationScraped, &t.LastScrapedAt)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return t, fmt.Errorf("Error parsing result in GetTweetByID(%d):\n %w", id, err)
|
return t, fmt.Errorf("Error parsing result in GetTweetByID(%d):\n %w", id, err)
|
||||||
@ -166,8 +165,8 @@ func (p Profile) GetTweetById(id scraper.TweetID) (scraper.Tweet, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
t.Spaces = []scraper.Space{}
|
t.Spaces = []scraper.Space{}
|
||||||
if space_id != "" {
|
if t.SpaceID != "" {
|
||||||
space, err := p.GetSpace(space_id)
|
space, err := p.GetSpaceById(t.SpaceID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return t, err
|
return t, err
|
||||||
}
|
}
|
||||||
|
@ -31,6 +31,12 @@ func (p Profile) SaveTweetTrove(trove TweetTrove) {
|
|||||||
trove.Retweets[j] = retweet
|
trove.Retweets[j] = retweet
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
for j, space := range trove.Spaces {
|
||||||
|
if space.CreatedById == trove.Users[i].ID {
|
||||||
|
space.CreatedById = u.ID
|
||||||
|
trove.Spaces[j] = space
|
||||||
|
}
|
||||||
|
}
|
||||||
trove.Users[i] = u
|
trove.Users[i] = u
|
||||||
|
|
||||||
// Download their tiny profile image
|
// Download their tiny profile image
|
||||||
@ -40,10 +46,12 @@ func (p Profile) SaveTweetTrove(trove TweetTrove) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: this is called earlier in the process as well, before parsing. Is that call redundant? Too tired to figure out right now
|
for _, s := range trove.Spaces {
|
||||||
// Update: Yes it's redundant. Places that return tweet troves should call `PostProcess`
|
err := p.SaveSpace(s)
|
||||||
// before returning, which includes `FillMissingUserIDs`.
|
if err != nil {
|
||||||
// trove.FillMissingUserIDs()
|
panic(fmt.Errorf("Error saving space with ID %s:\n %w", s.ID, err))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for _, t := range trove.Tweets {
|
for _, t := range trove.Tweets {
|
||||||
err := p.SaveTweet(t)
|
err := p.SaveTweet(t)
|
||||||
|
@ -174,6 +174,7 @@ func create_stable_tweet() scraper.Tweet {
|
|||||||
Spaces: []scraper.Space{
|
Spaces: []scraper.Space{
|
||||||
create_space_from_id(-1),
|
create_space_from_id(-1),
|
||||||
},
|
},
|
||||||
|
SpaceID: scraper.SpaceID("some_id_-1"),
|
||||||
IsConversationScraped: true,
|
IsConversationScraped: true,
|
||||||
LastScrapedAt: scraper.TimestampFromUnix(100000000),
|
LastScrapedAt: scraper.TimestampFromUnix(100000000),
|
||||||
}
|
}
|
||||||
@ -241,6 +242,9 @@ func create_dummy_tweet() scraper.Tweet {
|
|||||||
poll := create_poll_from_id(rand.Int())
|
poll := create_poll_from_id(rand.Int())
|
||||||
poll.TweetID = tweet_id
|
poll.TweetID = tweet_id
|
||||||
|
|
||||||
|
space := create_space_from_id(rand.Int())
|
||||||
|
space_id := space.ID
|
||||||
|
|
||||||
return scraper.Tweet{
|
return scraper.Tweet{
|
||||||
ID: tweet_id,
|
ID: tweet_id,
|
||||||
UserID: -1,
|
UserID: -1,
|
||||||
@ -257,7 +261,8 @@ func create_dummy_tweet() scraper.Tweet {
|
|||||||
ReplyMentions: []scraper.UserHandle{"replymention1", "replymention2"},
|
ReplyMentions: []scraper.UserHandle{"replymention1", "replymention2"},
|
||||||
Hashtags: []string{"hash1", "hash2"},
|
Hashtags: []string{"hash1", "hash2"},
|
||||||
Polls: []scraper.Poll{poll},
|
Polls: []scraper.Poll{poll},
|
||||||
Spaces: []scraper.Space{create_space_from_id(rand.Int())},
|
Spaces: []scraper.Space{space},
|
||||||
|
SpaceID: space_id,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -302,5 +307,13 @@ func create_space_from_id(id int) scraper.Space {
|
|||||||
return scraper.Space{
|
return scraper.Space{
|
||||||
ID: scraper.SpaceID(fmt.Sprintf("some_id_%d", id)),
|
ID: scraper.SpaceID(fmt.Sprintf("some_id_%d", id)),
|
||||||
ShortUrl: fmt.Sprintf("short_url_%d", id),
|
ShortUrl: fmt.Sprintf("short_url_%d", id),
|
||||||
|
State: "Ended",
|
||||||
|
Title: "Some Title",
|
||||||
|
CreatedAt: scraper.TimestampFromUnix(1000),
|
||||||
|
StartedAt: scraper.TimestampFromUnix(2000),
|
||||||
|
EndedAt: scraper.TimestampFromUnix(3000),
|
||||||
|
UpdatedAt: scraper.TimestampFromUnix(4000),
|
||||||
|
CreatedById: -1,
|
||||||
|
ParticipantIds: []scraper.UserID{-1},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user