BUGFIX: don't clobber num_likes, num_retweets etc. if tweet is a stub

- Convert Tweet queries to sqlx, including new CommaSeparatedList type to convert Hashtags, Mentions and ReplyMentions
2023-06-03 08:30:49 -03:00 · 2023-06-03 08:30:49 -03:00 · 815a8180da
commit 815a8180da
parent dea37c7556
6 changed files with 93 additions and 96 deletions
--- a/persistence/tweet_queries.go
+++ b/persistence/tweet_queries.go
@ -4,7 +4,6 @@ import (
 	"database/sql"
 	"errors"
 	"fmt"
 	"strings"
 	"offline_twitter/scraper"
 )
@ -22,42 +21,36 @@ func (p Profile) SaveTweet(t scraper.Tweet) error {
 		}
 	}
-	_, err := db.Exec(`
+	_, err := db.NamedExec(`
        insert into tweets (id, user_id, text, posted_at, num_likes, num_retweets, num_replies, num_quote_tweets, in_reply_to_id,
                            quoted_tweet_id, mentions, reply_mentions, hashtags, space_id, tombstone_type, is_stub, is_content_downloaded,
                            is_conversation_scraped, last_scraped_at)
-        values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, nullif(?, ''), (select rowid from tombstone_types where short_name=?), ?, ?, ?, ?)
+        values (:id, :user_id, :text, :posted_at, :num_likes, :num_retweets, :num_replies, :num_quote_tweets, :in_reply_to_id, :quoted_tweet_id, :mentions, :reply_mentions, :hashtags, nullif(:space_id, ''), (select rowid from tombstone_types where short_name=:tombstone_type), :is_stub, :is_content_downloaded, :is_conversation_scraped, :last_scraped_at)
            on conflict do update
           set text=(case
                     when is_stub then
-                         ?
+                         :text
                     else
                         text
                     end
               ),
-               num_likes=?,
+               num_likes=(case when :is_stub then num_likes else :num_likes end),
-               num_retweets=?,
+               num_retweets=(case when :is_stub then num_retweets else :num_retweets end),
-               num_replies=?,
+               num_replies=(case when :is_stub then num_replies else :num_replies end),
-               num_quote_tweets=?,
+               num_quote_tweets=(case when :is_stub then num_quote_tweets else :num_quote_tweets end),
-               is_stub=(is_stub and ?),
+               is_stub=(is_stub and :is_stub),
               tombstone_type=(case
-                               when ?='unavailable' and tombstone_type not in (0, 4) then
+                               when :tombstone_type='unavailable' and tombstone_type not in (0, 4) then
                                   tombstone_type
                               else
-                                   (select rowid from tombstone_types where short_name=?)
+                                   (select rowid from tombstone_types where short_name=:tombstone_type)
                               end
               ),
-               is_content_downloaded=(is_content_downloaded or ?),
+               is_content_downloaded=(is_content_downloaded or :is_content_downloaded),
-               is_conversation_scraped=(is_conversation_scraped or ?),
+               is_conversation_scraped=(is_conversation_scraped or :is_conversation_scraped),
-               last_scraped_at=max(last_scraped_at, ?)
+               last_scraped_at=max(last_scraped_at, :last_scraped_at)
        `,
-		t.ID, t.UserID, t.Text, t.PostedAt, t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.InReplyToID,
+		t,
 		t.QuotedTweetID, scraper.JoinArrayOfHandles(t.Mentions), scraper.JoinArrayOfHandles(t.ReplyMentions),
 		strings.Join(t.Hashtags, ","), t.SpaceID, t.TombstoneType, t.IsStub, t.IsContentDownloaded, t.IsConversationScraped,
 		t.LastScrapedAt,
 		t.Text, t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.IsStub, t.TombstoneType, t.TombstoneType,
 		t.IsContentDownloaded, t.IsConversationScraped, t.LastScrapedAt,
 	)
 	if err != nil {
@ -119,82 +112,53 @@ func (p Profile) IsTweetInDatabase(id scraper.TweetID) bool {
 func (p Profile) GetTweetById(id scraper.TweetID) (scraper.Tweet, error) {
 	db := p.DB
-	stmt, err := db.Prepare(`
+	var t scraper.Tweet
 	err := db.Get(&t, `
        select id, user_id, text, posted_at, num_likes, num_retweets, num_replies, num_quote_tweets, in_reply_to_id, quoted_tweet_id,
-               mentions, reply_mentions, hashtags, ifnull(space_id, ''), ifnull(tombstone_types.short_name, ""), is_stub,
+               mentions, reply_mentions, hashtags, ifnull(space_id, '') space_id, ifnull(tombstone_types.short_name, "") tombstone_type, is_stub,
               is_content_downloaded, is_conversation_scraped, last_scraped_at
          from tweets left join tombstone_types on tweets.tombstone_type = tombstone_types.rowid
         where id = ?
-    `)
+    `, id)
 	if err != nil {
-		return scraper.Tweet{}, fmt.Errorf("Error preparing statement in GetTweetByID(%d):\n  %w", id, err)
+		return scraper.Tweet{}, fmt.Errorf("Error executing GetTweetByID(%d):\n  %w", id, err)
 	}
 	defer stmt.Close()
 	var t scraper.Tweet
 	var mentions string
 	var reply_mentions string
 	var hashtags string
 	row := stmt.QueryRow(id)
 	err = row.Scan(&t.ID, &t.UserID, &t.Text, &t.PostedAt, &t.NumLikes, &t.NumRetweets, &t.NumReplies, &t.NumQuoteTweets, &t.InReplyToID,
 		&t.QuotedTweetID, &mentions, &reply_mentions, &hashtags, &t.SpaceID, &t.TombstoneType, &t.IsStub, &t.IsContentDownloaded,
 		&t.IsConversationScraped, &t.LastScrapedAt)
 	if err != nil {
 		return t, fmt.Errorf("Error parsing result in GetTweetByID(%d):\n  %w", id, err)
 	}
 	t.Mentions = []scraper.UserHandle{}
 	for _, m := range strings.Split(mentions, ",") {
 		if m != "" {
 			t.Mentions = append(t.Mentions, scraper.UserHandle(m))
 		}
 	}
 	t.ReplyMentions = []scraper.UserHandle{}
 	for _, m := range strings.Split(reply_mentions, ",") {
 		if m != "" {
 			t.ReplyMentions = append(t.ReplyMentions, scraper.UserHandle(m))
 		}
 	}
 	t.Hashtags = []string{}
 	for _, h := range strings.Split(hashtags, ",") {
 		if h != "" {
 			t.Hashtags = append(t.Hashtags, h)
 		}
 	}
 	t.Spaces = []scraper.Space{}
 	if t.SpaceID != "" {
 		space, err := p.GetSpaceById(t.SpaceID)
 		if err != nil {
-			return t, err
+			return t, fmt.Errorf("Error retrieving space with ID %s (tweet %d):\n  %w", t.SpaceID, t.ID, err)
 		}
 		t.Spaces = append(t.Spaces, space)
 	}
 	imgs, err := p.GetImagesForTweet(t)
 	if err != nil {
-		return t, err
+		return t, fmt.Errorf("Error retrieving images for tweet %d:\n  %w", t.ID, err)
 	}
 	t.Images = imgs
 	vids, err := p.GetVideosForTweet(t)
 	if err != nil {
-		return t, err
+		return t, fmt.Errorf("Error retrieving videos for tweet %d:\n  %w", t.ID, err)
 	}
 	t.Videos = vids
 	polls, err := p.GetPollsForTweet(t)
 	if err != nil {
-		return t, err
+		return t, fmt.Errorf("Error retrieving polls for tweet %d:\n  %w", t.ID, err)
 	}
 	t.Polls = polls
 	urls, err := p.GetUrlsForTweet(t)
 	if err != nil {
 		return t, fmt.Errorf("Error retrieving urls for tweet %d:\n  %w", t.ID, err)
 	}
 	t.Urls = urls
-	return t, err
+	return t, nil
 }
 /**
--- a/persistence/tweet_queries_test.go
+++ b/persistence/tweet_queries_test.go
@ -77,6 +77,10 @@ func TestNoWorseningTweet(t *testing.T) {
 	tweet.IsConversationScraped = true
 	tweet.LastScrapedAt = scraper.TimestampFromUnix(1000)
 	tweet.Text = "Yes text"
 	tweet.NumLikes = 10
 	tweet.NumRetweets = 11
 	tweet.NumQuoteTweets = 12
 	tweet.NumReplies = 13
 	// Save the tweet
 	err := profile.SaveTweet(tweet)
@ -90,6 +94,10 @@ func TestNoWorseningTweet(t *testing.T) {
 	tweet.Text = ""
 	err = profile.SaveTweet(tweet)
 	require.NoError(err)
 	tweet.NumLikes = 0
 	tweet.NumRetweets = 0
 	tweet.NumQuoteTweets = 0
 	tweet.NumReplies = 0
 	// Reload the tweet
 	new_tweet, err := profile.GetTweetById(tweet.ID)
@ -100,6 +108,10 @@ func TestNoWorseningTweet(t *testing.T) {
 	assert.True(new_tweet.IsConversationScraped, "Should have preserved is-conversation-scraped status")
 	assert.Equal(int64(1000), new_tweet.LastScrapedAt.Unix(), "Should have preserved last-scraped-at time")
 	assert.Equal(new_tweet.Text, "Yes text", "Text should not get clobbered if it becomes unavailable")
 	assert.Equal(10, new_tweet.NumLikes)
 	assert.Equal(11, new_tweet.NumRetweets)
 	assert.Equal(12, new_tweet.NumQuoteTweets)
 	assert.Equal(13, new_tweet.NumReplies)
 }
 /**
--- a/persistence/utils_test.go
+++ b/persistence/utils_test.go
@ -166,8 +166,8 @@ func create_stable_tweet() scraper.Tweet {
 		Images: []scraper.Image{
 			create_image_from_id(-1),
 		},
-		Mentions: []scraper.UserHandle{},
+		Mentions: scraper.CommaSeparatedList{},
-		Hashtags: []string{},
+		Hashtags: scraper.CommaSeparatedList{},
 		Polls: []scraper.Poll{
 			create_poll_from_id(-1),
 		},
@ -257,9 +257,9 @@ func create_dummy_tweet() scraper.Tweet {
 		Videos:         []scraper.Video{vid},
 		Urls:           []scraper.Url{url1, url2},
 		Images:         []scraper.Image{img1, img2},
-		Mentions:       []scraper.UserHandle{"mention1", "mention2"},
+		Mentions:       scraper.CommaSeparatedList{"mention1", "mention2"},
-		ReplyMentions:  []scraper.UserHandle{"replymention1", "replymention2"},
+		ReplyMentions:  scraper.CommaSeparatedList{"replymention1", "replymention2"},
-		Hashtags:       []string{"hash1", "hash2"},
+		Hashtags:       scraper.CommaSeparatedList{"hash1", "hash2"},
 		Polls:          []scraper.Poll{poll},
 		Spaces:         []scraper.Space{space},
 		SpaceID:        space_id,
@ -278,9 +278,9 @@ func create_dummy_tombstone() scraper.Tweet {
 		UserID:        -1,
 		TombstoneType: "deleted",
 		IsStub:        true,
-		Mentions:      []scraper.UserHandle{},
+		Mentions:      scraper.CommaSeparatedList{},
-		ReplyMentions: []scraper.UserHandle{},
+		ReplyMentions: scraper.CommaSeparatedList{},
-		Hashtags:      []string{},
+		Hashtags:      scraper.CommaSeparatedList{},
 		Spaces:        []scraper.Space{},
 	}
 }
--- a/scraper/api_types_v2_test.go
+++ b/scraper/api_types_v2_test.go
@ -125,7 +125,7 @@ func TestAPIV2ParseTweetWithQuotedTweet(t *testing.T) {
 	assert.Equal(TweetID(1485689207435710464), quoted_tweet.InReplyToID)
 	assert.Equal(TweetID(0), quoted_tweet.QuotedTweetID)
 	assert.Equal(1, len(quoted_tweet.ReplyMentions))
-	assert.Contains(quoted_tweet.ReplyMentions, UserHandle("michaelmalice"))
+	assert.Contains(quoted_tweet.ReplyMentions, "michaelmalice")
 	assert.Equal(1, quoted_tweet.NumReplies)
 	assert.Equal(12, quoted_tweet.NumLikes)
--- a/scraper/tweet.go
+++ b/scraper/tweet.go
@ -1,6 +1,7 @@
 package scraper
 import (
 	"database/sql/driver"
 	"fmt"
 	log "github.com/sirupsen/logrus"
 	"strings"
@ -9,44 +10,64 @@ import (
 	"offline_twitter/terminal_utils"
 )
 const DEFAULT_MAX_REPLIES_EAGER_LOAD = 50
 type TweetID int64
 type CommaSeparatedList []string
 func (l *CommaSeparatedList) Scan(src interface{}) error {
 	*l = CommaSeparatedList{}
 	switch src.(type) {
 	case string:
 		for _, v := range strings.Split(src.(string), ",") {
 			if v != "" {
 				*l = append(*l, v)
 			}
 		}
 	default:
 		panic("Should be a string")
 	}
 	return nil
 }
 func (l CommaSeparatedList) Value() (driver.Value, error) {
 	return strings.Join(l, ","), nil
 }
 type Tweet struct {
-	ID             TweetID
+	ID             TweetID    `db:"id"`
-	UserID         UserID
+	UserID         UserID     `db:"user_id"`
 	UserHandle     UserHandle // For processing tombstones
 	User           *User
-	Text           string
+	Text           string    `db:"text"`
-	PostedAt       Timestamp
+	IsExpandable   bool      `db:"is_expandable"`
-	NumLikes       int
+	PostedAt       Timestamp `db:"posted_at"`
-	NumRetweets    int
+	NumLikes       int       `db:"num_likes"`
-	NumReplies     int
+	NumRetweets    int       `db:"num_retweets"`
-	NumQuoteTweets int
+	NumReplies     int       `db:"num_replies"`
-	InReplyToID    TweetID
+	NumQuoteTweets int       `db:"num_quote_tweets"`
-	QuotedTweetID  TweetID
+	InReplyToID    TweetID   `db:"in_reply_to_id"`
 	QuotedTweetID  TweetID   `db:"quoted_tweet_id"`
 	Images        []Image
 	Videos        []Video
 	Mentions      []UserHandle
 	ReplyMentions []UserHandle
 	Hashtags      []string
 	Urls          []Url
 	Polls         []Poll
 	Mentions      CommaSeparatedList `db:"mentions"`
 	ReplyMentions CommaSeparatedList `db:"reply_mentions"`
 	Hashtags      CommaSeparatedList `db:"hashtags"`
 	// TODO get-rid-of-spaces: Might be good to get rid of `Spaces`.  Only used in APIv1 I think.
 	// A first-step would be to delete the Spaces after pulling them out of a Tweet into the Trove
 	// in ParseTweetResponse.  Then they will only be getting saved once rather than twice.
 	Spaces  []Space
-	SpaceID SpaceID
+	SpaceID SpaceID `db:"space_id"`
-	TombstoneType string
+	TombstoneType string `db:"tombstone_type"`
-	IsStub        bool
+	IsStub        bool   `db:"is_stub"`
-	IsContentDownloaded   bool
+	IsContentDownloaded   bool      `db:"is_content_downloaded"`
-	IsConversationScraped bool
+	IsConversationScraped bool      `db:"is_conversation_scraped"`
-	LastScrapedAt         Timestamp
+	LastScrapedAt         Timestamp `db:"last_scraped_at"`
 }
 func (t Tweet) String() string {
@ -150,14 +171,14 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
 	// Process `@` mentions and reply-mentions
 	for _, mention := range apiTweet.Entities.Mentions {
-		ret.Mentions = append(ret.Mentions, UserHandle(mention.UserName))
+		ret.Mentions = append(ret.Mentions, mention.UserName)
 	}
 	for _, mention := range strings.Split(apiTweet.Entities.ReplyMentions, " ") {
 		if mention != "" {
 			if mention[0] != '@' {
 				panic(fmt.Errorf("Unknown ReplyMention value %q:\n  %w", apiTweet.Entities.ReplyMentions, EXTERNAL_API_ERROR))
 			}
-			ret.ReplyMentions = append(ret.ReplyMentions, UserHandle(mention[1:]))
+			ret.ReplyMentions = append(ret.ReplyMentions, mention[1:])
 		}
 	}
--- a/scraper/tweet_test.go
+++ b/scraper/tweet_test.go
@ -35,7 +35,7 @@ func TestParseSingleTweet(t *testing.T) {
 	assert.Equal("The fact that @michaelmalice new book ‘The Anarchist Handbook’ is just absolutely destroying on the charts is the "+
 		"largest white pill I’ve swallowed in years.", tweet.Text)
 	assert.Len(tweet.Mentions, 1)
-	assert.Contains(tweet.Mentions, UserHandle("michaelmalice"))
+	assert.Contains(tweet.Mentions, "michaelmalice")
 	assert.Empty(tweet.Urls)
 	assert.Equal(int64(1621639105), tweet.PostedAt.Unix())
 	assert.Zero(tweet.QuotedTweetID)
@ -179,7 +179,7 @@ func TestTweetWithLotsOfReplyMentions(t *testing.T) {
 	tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_at_mentions_in_front.json")
 	assert.Len(tweet.ReplyMentions, 4)
-	for i, v := range []UserHandle{"rob_mose", "primalpoly", "jmasseypoet", "SpaceX"} {
+	for i, v := range []string{"rob_mose", "primalpoly", "jmasseypoet", "SpaceX"} {
 		assert.Equal(v, tweet.ReplyMentions[i])
 	}
 }