256 lines
7.3 KiB
Go
256 lines
7.3 KiB
Go
package persistence
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"strings"
|
|
|
|
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
|
)
|
|
|
|
var (
|
|
ErrEndOfFeed = errors.New("end of feed")
|
|
)
|
|
|
|
func (p Profile) fill_content(trove *TweetTrove) {
|
|
if len(trove.Tweets) == 0 {
|
|
// Empty trove, nothing to fetch
|
|
return
|
|
}
|
|
|
|
// Fetch quote-tweets
|
|
// TODO: use recursive Common Table Expressions?
|
|
quoted_ids := []interface{}{}
|
|
for _, t := range trove.Tweets {
|
|
if t.QuotedTweetID != 0 {
|
|
quoted_ids = append(quoted_ids, t.QuotedTweetID)
|
|
}
|
|
}
|
|
if len(quoted_ids) > 0 {
|
|
var quoted_tweets []Tweet
|
|
err := p.DB.Select("ed_tweets,
|
|
`select id, user_id, text, posted_at, num_likes, num_retweets, num_replies, num_quote_tweets, in_reply_to_id, quoted_tweet_id,
|
|
mentions, reply_mentions, hashtags, ifnull(space_id, '') space_id,
|
|
ifnull(tombstone_types.short_name, "") tombstone_type, is_expandable,
|
|
is_stub, is_content_downloaded, is_conversation_scraped, last_scraped_at
|
|
from tweets
|
|
left join tombstone_types on tweets.tombstone_type = tombstone_types.rowid
|
|
where id in (`+strings.Repeat("?,", len(quoted_ids)-1)+`?)`, quoted_ids...)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
for _, t := range quoted_tweets {
|
|
trove.Tweets[t.ID] = t
|
|
}
|
|
}
|
|
|
|
in_clause := ""
|
|
user_ids := []interface{}{}
|
|
tweet_ids := []interface{}{}
|
|
for _, t := range trove.Tweets {
|
|
in_clause += "?,"
|
|
user_ids = append(user_ids, int(t.UserID))
|
|
tweet_ids = append(tweet_ids, t.ID)
|
|
}
|
|
in_clause = in_clause[:len(in_clause)-1]
|
|
|
|
for _, r := range trove.Retweets {
|
|
user_ids = append(user_ids, int(r.RetweetedByID))
|
|
}
|
|
|
|
// Get all the users
|
|
if len(user_ids) > 0 { // It could be a search with no results, end of feed, etc-- strings.Repeat will fail!
|
|
var users []User
|
|
userquery := `
|
|
select id, display_name, handle, bio, following_count, followers_count, location, website, join_date, is_private, is_verified,
|
|
is_banned, profile_image_url, profile_image_local_path, banner_image_url, banner_image_local_path, pinned_tweet_id,
|
|
is_content_downloaded, is_followed
|
|
from users
|
|
where id in (` + strings.Repeat("?,", len(user_ids)-1) + `?)`
|
|
fmt.Printf("%s\n", userquery)
|
|
err := p.DB.Select(&users, userquery, user_ids...)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
for _, u := range users {
|
|
trove.Users[u.ID] = u
|
|
}
|
|
}
|
|
|
|
// Get all the Images
|
|
var images []Image
|
|
imgquery := `
|
|
select id, tweet_id, width, height, remote_url, local_filename, is_downloaded from images where tweet_id in (` + in_clause + `)`
|
|
fmt.Printf("%s\n", imgquery) // TODO: SQL logger
|
|
err := p.DB.Select(&images, imgquery, tweet_ids...)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
for _, i := range images {
|
|
t, is_ok := trove.Tweets[i.TweetID]
|
|
if !is_ok {
|
|
panic(i)
|
|
}
|
|
t.Images = append(t.Images, i)
|
|
trove.Tweets[t.ID] = t
|
|
}
|
|
|
|
// Get all the Videos
|
|
var videos []Video
|
|
err = p.DB.Select(&videos, `
|
|
select id, tweet_id, width, height, remote_url, local_filename, thumbnail_remote_url, thumbnail_local_filename, duration,
|
|
view_count, is_downloaded, is_blocked_by_dmca, is_gif
|
|
from videos
|
|
where tweet_id in (`+in_clause+`)`, tweet_ids...)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
for _, v := range videos {
|
|
t, is_ok := trove.Tweets[v.TweetID]
|
|
if !is_ok {
|
|
panic(v)
|
|
}
|
|
t.Videos = append(t.Videos, v)
|
|
trove.Tweets[t.ID] = t
|
|
}
|
|
|
|
// Get all the Urls
|
|
var urls []Url
|
|
err = p.DB.Select(&urls, `
|
|
select tweet_id, domain, text, short_text, title, description, creator_id, site_id, thumbnail_width, thumbnail_height,
|
|
thumbnail_remote_url, thumbnail_local_path, has_card, has_thumbnail, is_content_downloaded
|
|
from urls
|
|
where tweet_id in (`+in_clause+`)`, tweet_ids...)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
for _, u := range urls {
|
|
t, is_ok := trove.Tweets[u.TweetID]
|
|
if !is_ok {
|
|
panic(u)
|
|
}
|
|
t.Urls = append(t.Urls, u)
|
|
trove.Tweets[t.ID] = t
|
|
}
|
|
|
|
// Get all the Polls
|
|
var polls []Poll
|
|
err = p.DB.Select(&polls, `
|
|
select id, tweet_id, num_choices, choice1, choice1_votes, choice2, choice2_votes, choice3, choice3_votes, choice4, choice4_votes,
|
|
voting_duration, voting_ends_at, last_scraped_at
|
|
from polls
|
|
where tweet_id in (`+in_clause+`)`, tweet_ids...)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
for _, p := range polls {
|
|
t, is_ok := trove.Tweets[p.TweetID]
|
|
if !is_ok {
|
|
panic(p)
|
|
}
|
|
t.Polls = append(t.Polls, p)
|
|
trove.Tweets[t.ID] = t
|
|
}
|
|
}
|
|
|
|
// TODO: compound-query-structs
|
|
type FeedItem struct {
|
|
TweetID
|
|
RetweetID TweetID
|
|
}
|
|
type Feed struct {
|
|
Items []FeedItem
|
|
TweetTrove
|
|
}
|
|
|
|
func (f Feed) BottomTimestamp() Timestamp {
|
|
if len(f.Items) == 0 {
|
|
return TimestampFromUnix(0)
|
|
}
|
|
last := f.Items[len(f.Items)-1]
|
|
if last.RetweetID != 0 {
|
|
return f.Retweets[last.RetweetID].RetweetedAt
|
|
}
|
|
return f.Tweets[last.TweetID].PostedAt
|
|
}
|
|
|
|
func NewFeed() Feed {
|
|
return Feed{
|
|
Items: []FeedItem{},
|
|
TweetTrove: NewTweetTrove(),
|
|
}
|
|
}
|
|
|
|
// Return the given tweet, all its parent tweets, and a list of conversation threads
|
|
func (p Profile) GetUserFeed(id UserID, count int, max_posted_at Timestamp) (Feed, error) {
|
|
ret := NewFeed()
|
|
|
|
tweet_max_clause := ""
|
|
retweet_max_clause := ""
|
|
if max_posted_at.Unix() > 0 {
|
|
tweet_max_clause = " and posted_at < :max_posted_at "
|
|
retweet_max_clause = " and retweeted_at < :max_posted_at "
|
|
}
|
|
|
|
q := `select id, user_id, text, posted_at, num_likes, num_retweets, num_replies, num_quote_tweets, in_reply_to_id, quoted_tweet_id,
|
|
mentions, reply_mentions, hashtags, ifnull(space_id, '') space_id, ifnull(tombstone_types.short_name, "") tombstone_type,
|
|
is_expandable,
|
|
is_stub, is_content_downloaded, is_conversation_scraped, last_scraped_at,
|
|
0 tweet_id, 0 retweet_id, 0 retweeted_by, 0 retweeted_at,
|
|
posted_at order_by
|
|
from tweets
|
|
left join tombstone_types on tweets.tombstone_type = tombstone_types.rowid
|
|
where user_id = :id` + tweet_max_clause + `
|
|
|
|
union
|
|
|
|
select id, user_id, text, posted_at, num_likes, num_retweets, num_replies, num_quote_tweets, in_reply_to_id, quoted_tweet_id,
|
|
mentions, reply_mentions, hashtags, ifnull(space_id, '') space_id, ifnull(tombstone_types.short_name, "") tombstone_type,
|
|
is_expandable,
|
|
is_stub, is_content_downloaded, is_conversation_scraped, last_scraped_at,
|
|
tweet_id, retweet_id, retweeted_by, retweeted_at,
|
|
retweeted_at order_by
|
|
from retweets
|
|
left join tweets on retweets.tweet_id = tweets.id
|
|
left join tombstone_types on tweets.tombstone_type = tombstone_types.rowid
|
|
where retweeted_by = :id` + retweet_max_clause + `
|
|
|
|
order by order_by desc
|
|
limit :limit`
|
|
|
|
stmt, err := p.DB.PrepareNamed(q)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
|
|
args := map[string]interface{}{
|
|
"id": id,
|
|
"limit": count,
|
|
"max_posted_at": max_posted_at,
|
|
}
|
|
var results []struct {
|
|
Tweet
|
|
Retweet
|
|
OrderBy int `db:"order_by"`
|
|
}
|
|
err = stmt.Select(&results, args)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
if len(results) == 0 {
|
|
return NewFeed(), ErrEndOfFeed
|
|
}
|
|
|
|
for _, val := range results {
|
|
ret.Tweets[val.Tweet.ID] = val.Tweet
|
|
if val.Retweet.RetweetID != 0 {
|
|
ret.Retweets[val.Retweet.RetweetID] = val.Retweet
|
|
}
|
|
ret.Items = append(ret.Items, FeedItem{TweetID: val.Tweet.ID, RetweetID: val.Retweet.RetweetID})
|
|
}
|
|
|
|
p.fill_content(&ret.TweetTrove)
|
|
|
|
return ret, nil
|
|
}
|