diff --git a/pkg/persistence/compound_queries.go b/pkg/persistence/compound_queries.go index 23339de..09c2e84 100644 --- a/pkg/persistence/compound_queries.go +++ b/pkg/persistence/compound_queries.go @@ -1,6 +1,7 @@ package persistence import ( + "database/sql" "errors" "fmt" "strings" @@ -10,6 +11,7 @@ import ( var ( ErrEndOfFeed = errors.New("end of feed") + ErrNotInDB = errors.New("not in database") ) func (p Profile) fill_content(trove *TweetTrove) { @@ -153,6 +155,126 @@ func (p Profile) fill_content(trove *TweetTrove) { } } +// TODO: compound-query-structs +type TweetDetailView struct { + TweetTrove + ParentIDs []TweetID + MainTweetID TweetID + ReplyChains [][]TweetID +} + +func NewTweetDetailView() TweetDetailView { + return TweetDetailView{ + TweetTrove: NewTweetTrove(), + ParentIDs: []TweetID{}, + ReplyChains: [][]TweetID{}, + } +} + +// Return the given tweet, all its parent tweets, and a list of conversation threads +func (p Profile) GetTweetDetail(id TweetID) (TweetDetailView, error) { + // TODO: compound-query-structs + ret := NewTweetDetailView() + + stmt, err := p.DB.Preparex( + `select id, user_id, text, posted_at, num_likes, num_retweets, num_replies, num_quote_tweets, in_reply_to_id, quoted_tweet_id, + mentions, reply_mentions, hashtags, ifnull(space_id, '') space_id, ifnull(tombstone_types.short_name, "") tombstone_type, + is_expandable, + is_stub, is_content_downloaded, is_conversation_scraped, last_scraped_at + from tweets + left join tombstone_types on tweets.tombstone_type = tombstone_types.rowid + where id = ?`) + if err != nil { + panic(err) + } + + // Main tweet + var tweet Tweet + err = stmt.Get(&tweet, id) + if err != nil { + if errors.Is(err, sql.ErrNoRows) { + return ret, fmt.Errorf("Tweet ID %d: %w", id, ErrNotInDB) + } else { + panic(err) + } + } + ret.Tweets[tweet.ID] = tweet + ret.MainTweetID = tweet.ID + + // Parent tweets + for tweet.InReplyToID != 0 { + err := stmt.Get(&tweet, tweet.InReplyToID) + if err != nil { + if errors.Is(err, sql.ErrNoRows) { + break + } else { + panic(err) + } + } + ret.Tweets[tweet.ID] = tweet + ret.ParentIDs = append([]TweetID{tweet.ID}, ret.ParentIDs...) + } + + // TODO: tweet-detail-thread-chains + + // Replies lvl 1 + // TODO: this can be done with a recursive Common Table Expression: + // - https://www.sqlite.org/lang_with.html#recursive_query_examples + var replies []Tweet + stmt, err = p.DB.Preparex( + `select id, user_id, text, posted_at, num_likes, num_retweets, num_replies, num_quote_tweets, in_reply_to_id, quoted_tweet_id, + mentions, reply_mentions, hashtags, ifnull(space_id, '') space_id, ifnull(tombstone_types.short_name, "") tombstone_type, + is_expandable, + is_stub, is_content_downloaded, is_conversation_scraped, last_scraped_at + from tweets + left join tombstone_types on tweets.tombstone_type = tombstone_types.rowid + where in_reply_to_id = ? + order by num_likes desc + limit 50`) + if err != nil { + panic(err) + } + err = stmt.Select(&replies, id) + if err != nil { + panic(err) + } + if len(replies) > 0 { + reply_1_ids := []interface{}{} + for _, r := range replies { + ret.Tweets[r.ID] = r + reply_1_ids = append(reply_1_ids, r.ID) + ret.ReplyChains = append(ret.ReplyChains, []TweetID{r.ID}) + } + reply2_query := ` + select id, user_id, text, posted_at, num_likes, num_retweets, num_replies, num_quote_tweets, in_reply_to_id, quoted_tweet_id, + mentions, reply_mentions, hashtags, ifnull(space_id, '') space_id, ifnull(tombstone_types.short_name, "") tombstone_type, + is_expandable, + is_stub, is_content_downloaded, is_conversation_scraped, last_scraped_at + from tweets + left join tombstone_types on tweets.tombstone_type = tombstone_types.rowid + where in_reply_to_id in (` + strings.Repeat("?,", len(reply_1_ids)-1) + `?) + order by num_likes desc + limit 50` + err = p.DB.Select(&replies, reply2_query, reply_1_ids...) + if err != nil { + panic(err) + } + for _, r := range replies { + ret.Tweets[r.ID] = r + for i, chain := range ret.ReplyChains { + if chain[0] == r.InReplyToID { + ret.ReplyChains[i] = append(chain, r.ID) + break + } + // TODO: Log weird situation + } + } + } + + p.fill_content(&ret.TweetTrove) + return ret, nil +} + // TODO: compound-query-structs type FeedItem struct { TweetID diff --git a/pkg/persistence/compound_queries_test.go b/pkg/persistence/compound_queries_test.go index 9a6c29e..512fbcf 100644 --- a/pkg/persistence/compound_queries_test.go +++ b/pkg/persistence/compound_queries_test.go @@ -101,3 +101,94 @@ func TestBuildUserFeedEnd(t *testing.T) { assert.Len(feed.Users, 0) require.Len(feed.Items, 0) } + +func TestTweetDetailWithReplies(t *testing.T) { + require := require.New(t) + assert := assert.New(t) + + profile, err := persistence.LoadProfile("../../sample_data/profile") + require.NoError(err) + + tweet_detail, err := profile.GetTweetDetail(TweetID(1413646595493568516)) + require.NoError(err) + + assert.Len(tweet_detail.Retweets, 0) + + assert.Len(tweet_detail.Tweets, 8) + for _, id := range []TweetID{ + 1413646309047767042, + 1413646595493568516, + 1413647919215906817, + 1413657324267311104, + 1413658466795737091, + 1413650853081276421, + 1413772782358433792, + 1413773185296650241, + }{ + _, is_ok := tweet_detail.Tweets[id] + assert.True(is_ok) + } + + assert.Len(tweet_detail.Users, 4) + for _, id := range []UserID{ + 1032468021485293568, + 1372116552942764034, + 1067869346775646208, + 1304281147074064385, + }{ + _, is_ok := tweet_detail.Users[id] + assert.True(is_ok) + } + + require.Len(tweet_detail.ParentIDs, 1) + assert.Equal(tweet_detail.ParentIDs[0], TweetID(1413646309047767042)) + + require.Len(tweet_detail.ReplyChains, 4) + assert.Len(tweet_detail.ReplyChains[0], 1) + assert.Equal(tweet_detail.ReplyChains[0][0], TweetID(1413647919215906817)) + assert.Len(tweet_detail.ReplyChains[1], 2) + assert.Equal(tweet_detail.ReplyChains[1][0], TweetID(1413657324267311104)) + assert.Equal(tweet_detail.ReplyChains[1][1], TweetID(1413658466795737091)) + assert.Len(tweet_detail.ReplyChains[2], 1) + assert.Equal(tweet_detail.ReplyChains[2][0], TweetID(1413650853081276421)) + assert.Len(tweet_detail.ReplyChains[3], 2) + assert.Equal(tweet_detail.ReplyChains[3][0], TweetID(1413772782358433792)) + assert.Equal(tweet_detail.ReplyChains[3][1], TweetID(1413773185296650241)) +} + +func TestTweetDetailWithParents(t *testing.T) { + require := require.New(t) + assert := assert.New(t) + + profile, err := persistence.LoadProfile("../../sample_data/profile") + require.NoError(err) + + tweet_detail, err := profile.GetTweetDetail(TweetID(1413773185296650241)) + require.NoError(err) + + assert.Len(tweet_detail.Retweets, 0) + + assert.Len(tweet_detail.Tweets, 4) + for _, id := range []TweetID{ + 1413646309047767042, + 1413646595493568516, + 1413772782358433792, + 1413773185296650241, + }{ + _, is_ok := tweet_detail.Tweets[id] + assert.True(is_ok) + } + + assert.Len(tweet_detail.Users, 2) + _, is_ok := tweet_detail.Users[1032468021485293568] + assert.True(is_ok) + _, is_ok = tweet_detail.Users[1372116552942764034] + assert.True(is_ok) + + require.Len(tweet_detail.ParentIDs, 3) + assert.Equal(tweet_detail.ParentIDs[0], TweetID(1413646309047767042)) + assert.Equal(tweet_detail.ParentIDs[1], TweetID(1413646595493568516)) + assert.Equal(tweet_detail.ParentIDs[2], TweetID(1413772782358433792)) + + require.Len(tweet_detail.ReplyChains, 0) +} diff --git a/pkg/persistence/tweet_queries.go b/pkg/persistence/tweet_queries.go index 32c7e3d..ec67f8c 100644 --- a/pkg/persistence/tweet_queries.go +++ b/pkg/persistence/tweet_queries.go @@ -8,7 +8,6 @@ import ( "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper" ) -var ErrNotInDB = errors.New("not in database") func (p Profile) SaveTweet(t scraper.Tweet) error { db := p.DB