offline-twitter/persistence/tweet_queries.go
2022-05-14 16:43:39 -07:00

235 lines
6.4 KiB
Go

package persistence
import (
"database/sql"
"errors"
"fmt"
"strings"
"offline_twitter/scraper"
)
func (p Profile) SaveTweet(t scraper.Tweet) error {
db := p.DB
tx := db.MustBegin()
var space_id scraper.SpaceID
for _, space := range t.Spaces {
err := p.SaveSpace(space)
if err != nil {
return err
}
space_id = space.ID
}
_, err := db.Exec(`
insert into tweets (id, user_id, text, posted_at, num_likes, num_retweets, num_replies, num_quote_tweets, in_reply_to_id,
quoted_tweet_id, mentions, reply_mentions, hashtags, space_id, tombstone_type, is_stub, is_content_downloaded,
is_conversation_scraped, last_scraped_at)
values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, nullif(?, ''), (select rowid from tombstone_types where short_name=?), ?, ?, ?, ?)
on conflict do update
set text=(case
when is_stub then
?
else
text
end
),
num_likes=?,
num_retweets=?,
num_replies=?,
num_quote_tweets=?,
is_stub=(is_stub and ?),
tombstone_type=(case
when ?='unavailable' and tombstone_type not in (0, 4) then
tombstone_type
else
(select rowid from tombstone_types where short_name=?)
end
),
is_content_downloaded=(is_content_downloaded or ?),
is_conversation_scraped=(is_conversation_scraped or ?),
last_scraped_at=max(last_scraped_at, ?)
`,
t.ID, t.UserID, t.Text, t.PostedAt, t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.InReplyToID,
t.QuotedTweetID, scraper.JoinArrayOfHandles(t.Mentions), scraper.JoinArrayOfHandles(t.ReplyMentions),
strings.Join(t.Hashtags, ","), space_id, t.TombstoneType, t.IsStub, t.IsContentDownloaded, t.IsConversationScraped, t.LastScrapedAt,
t.Text, t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.IsStub, t.TombstoneType, t.TombstoneType,
t.IsContentDownloaded, t.IsConversationScraped, t.LastScrapedAt,
)
if err != nil {
return fmt.Errorf("Error executing SaveTweet(ID %d):\n %w", t.ID, err)
}
for _, url := range t.Urls {
err := p.SaveUrl(url)
if err != nil {
return err
}
}
for _, image := range t.Images {
err := p.SaveImage(image)
if err != nil {
return err
}
}
for _, video := range t.Videos {
err := p.SaveVideo(video)
if err != nil {
return err
}
}
for _, hashtag := range t.Hashtags {
_, err := db.Exec("insert into hashtags (tweet_id, text) values (?, ?) on conflict do nothing", t.ID, hashtag)
if err != nil {
return fmt.Errorf("Error inserting hashtag %q on tweet ID %d:\n %w", hashtag, t.ID, err)
}
}
for _, poll := range t.Polls {
err := p.SavePoll(poll)
if err != nil {
return err
}
}
err = tx.Commit()
if err != nil {
return fmt.Errorf("Error committing SaveTweet transaction:\n %w", err)
}
return nil
}
func (p Profile) IsTweetInDatabase(id scraper.TweetID) bool {
db := p.DB
var dummy string
err := db.QueryRow("select 1 from tweets where id = ?", id).Scan(&dummy)
if err != nil {
if !errors.Is(err, sql.ErrNoRows) {
// A real error
panic(err)
}
return false
}
return true
}
func (p Profile) GetTweetById(id scraper.TweetID) (scraper.Tweet, error) {
db := p.DB
stmt, err := db.Prepare(`
select id, user_id, text, posted_at, num_likes, num_retweets, num_replies, num_quote_tweets, in_reply_to_id, quoted_tweet_id,
mentions, reply_mentions, hashtags, ifnull(space_id, ''), ifnull(tombstone_types.short_name, ""), is_stub,
is_content_downloaded, is_conversation_scraped, last_scraped_at
from tweets left join tombstone_types on tweets.tombstone_type = tombstone_types.rowid
where id = ?
`)
if err != nil {
return scraper.Tweet{}, fmt.Errorf("Error preparing statement in GetTweetByID(%d):\n %w", id, err)
}
defer stmt.Close()
var t scraper.Tweet
var mentions string
var reply_mentions string
var hashtags string
var space_id scraper.SpaceID
row := stmt.QueryRow(id)
err = row.Scan(&t.ID, &t.UserID, &t.Text, &t.PostedAt, &t.NumLikes, &t.NumRetweets, &t.NumReplies, &t.NumQuoteTweets, &t.InReplyToID,
&t.QuotedTweetID, &mentions, &reply_mentions, &hashtags, &space_id, &t.TombstoneType, &t.IsStub, &t.IsContentDownloaded,
&t.IsConversationScraped, &t.LastScrapedAt)
if err != nil {
return t, fmt.Errorf("Error parsing result in GetTweetByID(%d):\n %w", id, err)
}
t.Mentions = []scraper.UserHandle{}
for _, m := range strings.Split(mentions, ",") {
if m != "" {
t.Mentions = append(t.Mentions, scraper.UserHandle(m))
}
}
t.ReplyMentions = []scraper.UserHandle{}
for _, m := range strings.Split(reply_mentions, ",") {
if m != "" {
t.ReplyMentions = append(t.ReplyMentions, scraper.UserHandle(m))
}
}
t.Hashtags = []string{}
for _, h := range strings.Split(hashtags, ",") {
if h != "" {
t.Hashtags = append(t.Hashtags, h)
}
}
t.Spaces = []scraper.Space{}
if space_id != "" {
space, err := p.GetSpace(space_id)
if err != nil {
return t, err
}
t.Spaces = append(t.Spaces, space)
}
imgs, err := p.GetImagesForTweet(t)
if err != nil {
return t, err
}
t.Images = imgs
vids, err := p.GetVideosForTweet(t)
if err != nil {
return t, err
}
t.Videos = vids
polls, err := p.GetPollsForTweet(t)
if err != nil {
return t, err
}
t.Polls = polls
urls, err := p.GetUrlsForTweet(t)
t.Urls = urls
return t, err
}
/**
* Populate the `User` field on a tweet with an actual User
*/
func (p Profile) LoadUserFor(t *scraper.Tweet) error {
if t.User != nil {
// Already there, no need to load it
return nil
}
user, err := p.GetUserByID(t.UserID)
if err != nil {
return err
}
t.User = &user
return nil
}
/**
* Return `false` if the tweet is in the DB and has had its content downloaded, `false` otherwise
*/
func (p Profile) CheckTweetContentDownloadNeeded(tweet scraper.Tweet) bool {
row := p.DB.QueryRow(`select is_content_downloaded from tweets where id = ?`, tweet.ID)
var is_content_downloaded bool
err := row.Scan(&is_content_downloaded)
if err != nil {
if errors.Is(err, sql.ErrNoRows) {
return true
} else {
panic(err)
}
}
return !is_content_downloaded
}