BUGFIX: don't clobber num_likes, num_retweets etc. if tweet is a stub

- Convert Tweet queries to sqlx, including new CommaSeparatedList type to convert Hashtags, Mentions and ReplyMentions
2023-06-03 08:30:49 -03:00 · 2023-06-03 08:30:49 -03:00 · 815a8180da
commit 815a8180da
parent dea37c7556
6 changed files with 93 additions and 96 deletions
--- a/persistence/tweet_queries.go
+++ b/persistence/tweet_queries.go
@ -4,7 +4,6 @@ import (
 	"database/sql"
 	"errors"
 	"fmt"
-	"strings"

 	"offline_twitter/scraper"
 )
@ -22,42 +21,36 @@ func (p Profile) SaveTweet(t scraper.Tweet) error {
 		}
 	}

-	_, err := db.Exec(`
+	_, err := db.NamedExec(`
        insert into tweets (id, user_id, text, posted_at, num_likes, num_retweets, num_replies, num_quote_tweets, in_reply_to_id,
                            quoted_tweet_id, mentions, reply_mentions, hashtags, space_id, tombstone_type, is_stub, is_content_downloaded,
                            is_conversation_scraped, last_scraped_at)
-        values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, nullif(?, ''), (select rowid from tombstone_types where short_name=?), ?, ?, ?, ?)
+        values (:id, :user_id, :text, :posted_at, :num_likes, :num_retweets, :num_replies, :num_quote_tweets, :in_reply_to_id, :quoted_tweet_id, :mentions, :reply_mentions, :hashtags, nullif(:space_id, ''), (select rowid from tombstone_types where short_name=:tombstone_type), :is_stub, :is_content_downloaded, :is_conversation_scraped, :last_scraped_at)
            on conflict do update
           set text=(case
                     when is_stub then
-                         ?
+                         :text
                     else
                         text
                     end
               ),
-               num_likes=?,
-               num_retweets=?,
-               num_replies=?,
-               num_quote_tweets=?,
-               is_stub=(is_stub and ?),
+               num_likes=(case when :is_stub then num_likes else :num_likes end),
+               num_retweets=(case when :is_stub then num_retweets else :num_retweets end),
+               num_replies=(case when :is_stub then num_replies else :num_replies end),
+               num_quote_tweets=(case when :is_stub then num_quote_tweets else :num_quote_tweets end),
+               is_stub=(is_stub and :is_stub),
               tombstone_type=(case
-                               when ?='unavailable' and tombstone_type not in (0, 4) then
+                               when :tombstone_type='unavailable' and tombstone_type not in (0, 4) then
                                   tombstone_type
                               else
-                                   (select rowid from tombstone_types where short_name=?)
+                                   (select rowid from tombstone_types where short_name=:tombstone_type)
                               end
               ),
-               is_content_downloaded=(is_content_downloaded or ?),
-               is_conversation_scraped=(is_conversation_scraped or ?),
-               last_scraped_at=max(last_scraped_at, ?)
+               is_content_downloaded=(is_content_downloaded or :is_content_downloaded),
+               is_conversation_scraped=(is_conversation_scraped or :is_conversation_scraped),
+               last_scraped_at=max(last_scraped_at, :last_scraped_at)
        `,
-		t.ID, t.UserID, t.Text, t.PostedAt, t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.InReplyToID,
-		t.QuotedTweetID, scraper.JoinArrayOfHandles(t.Mentions), scraper.JoinArrayOfHandles(t.ReplyMentions),
-		strings.Join(t.Hashtags, ","), t.SpaceID, t.TombstoneType, t.IsStub, t.IsContentDownloaded, t.IsConversationScraped,
-		t.LastScrapedAt,
-
-		t.Text, t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.IsStub, t.TombstoneType, t.TombstoneType,
-		t.IsContentDownloaded, t.IsConversationScraped, t.LastScrapedAt,
+		t,
 	)

 	if err != nil {
@ -119,82 +112,53 @@ func (p Profile) IsTweetInDatabase(id scraper.TweetID) bool {
 func (p Profile) GetTweetById(id scraper.TweetID) (scraper.Tweet, error) {
 	db := p.DB

-	stmt, err := db.Prepare(`
+	var t scraper.Tweet
+	err := db.Get(&t, `
        select id, user_id, text, posted_at, num_likes, num_retweets, num_replies, num_quote_tweets, in_reply_to_id, quoted_tweet_id,
-               mentions, reply_mentions, hashtags, ifnull(space_id, ''), ifnull(tombstone_types.short_name, ""), is_stub,
+               mentions, reply_mentions, hashtags, ifnull(space_id, '') space_id, ifnull(tombstone_types.short_name, "") tombstone_type, is_stub,
               is_content_downloaded, is_conversation_scraped, last_scraped_at
          from tweets left join tombstone_types on tweets.tombstone_type = tombstone_types.rowid
         where id = ?
-    `)
+    `, id)

 	if err != nil {
-		return scraper.Tweet{}, fmt.Errorf("Error preparing statement in GetTweetByID(%d):\n  %w", id, err)
-	}
-	defer stmt.Close()
-
-	var t scraper.Tweet
-	var mentions string
-	var reply_mentions string
-	var hashtags string
-
-	row := stmt.QueryRow(id)
-	err = row.Scan(&t.ID, &t.UserID, &t.Text, &t.PostedAt, &t.NumLikes, &t.NumRetweets, &t.NumReplies, &t.NumQuoteTweets, &t.InReplyToID,
-		&t.QuotedTweetID, &mentions, &reply_mentions, &hashtags, &t.SpaceID, &t.TombstoneType, &t.IsStub, &t.IsContentDownloaded,
-		&t.IsConversationScraped, &t.LastScrapedAt)
-	if err != nil {
-		return t, fmt.Errorf("Error parsing result in GetTweetByID(%d):\n  %w", id, err)
-	}
-
-	t.Mentions = []scraper.UserHandle{}
-	for _, m := range strings.Split(mentions, ",") {
-		if m != "" {
-			t.Mentions = append(t.Mentions, scraper.UserHandle(m))
-		}
-	}
-	t.ReplyMentions = []scraper.UserHandle{}
-	for _, m := range strings.Split(reply_mentions, ",") {
-		if m != "" {
-			t.ReplyMentions = append(t.ReplyMentions, scraper.UserHandle(m))
-		}
-	}
-	t.Hashtags = []string{}
-	for _, h := range strings.Split(hashtags, ",") {
-		if h != "" {
-			t.Hashtags = append(t.Hashtags, h)
-		}
+		return scraper.Tweet{}, fmt.Errorf("Error executing GetTweetByID(%d):\n  %w", id, err)
 	}

 	t.Spaces = []scraper.Space{}
 	if t.SpaceID != "" {
 		space, err := p.GetSpaceById(t.SpaceID)
 		if err != nil {
-			return t, err
+			return t, fmt.Errorf("Error retrieving space with ID %s (tweet %d):\n  %w", t.SpaceID, t.ID, err)
 		}
 		t.Spaces = append(t.Spaces, space)
 	}

 	imgs, err := p.GetImagesForTweet(t)
 	if err != nil {
-		return t, err
+		return t, fmt.Errorf("Error retrieving images for tweet %d:\n  %w", t.ID, err)
 	}
 	t.Images = imgs

 	vids, err := p.GetVideosForTweet(t)
 	if err != nil {
-		return t, err
+		return t, fmt.Errorf("Error retrieving videos for tweet %d:\n  %w", t.ID, err)
 	}
 	t.Videos = vids

 	polls, err := p.GetPollsForTweet(t)
 	if err != nil {
-		return t, err
+		return t, fmt.Errorf("Error retrieving polls for tweet %d:\n  %w", t.ID, err)
 	}
 	t.Polls = polls

 	urls, err := p.GetUrlsForTweet(t)
+	if err != nil {
+		return t, fmt.Errorf("Error retrieving urls for tweet %d:\n  %w", t.ID, err)
+	}
 	t.Urls = urls

-	return t, err
+	return t, nil
 }

 /**
--- a/persistence/tweet_queries_test.go
+++ b/persistence/tweet_queries_test.go
@ -77,6 +77,10 @@ func TestNoWorseningTweet(t *testing.T) {
 	tweet.IsConversationScraped = true
 	tweet.LastScrapedAt = scraper.TimestampFromUnix(1000)
 	tweet.Text = "Yes text"
+	tweet.NumLikes = 10
+	tweet.NumRetweets = 11
+	tweet.NumQuoteTweets = 12
+	tweet.NumReplies = 13

 	// Save the tweet
 	err := profile.SaveTweet(tweet)
@ -90,6 +94,10 @@ func TestNoWorseningTweet(t *testing.T) {
 	tweet.Text = ""
 	err = profile.SaveTweet(tweet)
 	require.NoError(err)
+	tweet.NumLikes = 0
+	tweet.NumRetweets = 0
+	tweet.NumQuoteTweets = 0
+	tweet.NumReplies = 0

 	// Reload the tweet
 	new_tweet, err := profile.GetTweetById(tweet.ID)
@ -100,6 +108,10 @@ func TestNoWorseningTweet(t *testing.T) {
 	assert.True(new_tweet.IsConversationScraped, "Should have preserved is-conversation-scraped status")
 	assert.Equal(int64(1000), new_tweet.LastScrapedAt.Unix(), "Should have preserved last-scraped-at time")
 	assert.Equal(new_tweet.Text, "Yes text", "Text should not get clobbered if it becomes unavailable")
+	assert.Equal(10, new_tweet.NumLikes)
+	assert.Equal(11, new_tweet.NumRetweets)
+	assert.Equal(12, new_tweet.NumQuoteTweets)
+	assert.Equal(13, new_tweet.NumReplies)
 }

 /**
--- a/persistence/utils_test.go
+++ b/persistence/utils_test.go
@ -166,8 +166,8 @@ func create_stable_tweet() scraper.Tweet {
 		Images: []scraper.Image{
 			create_image_from_id(-1),
 		},
-		Mentions: []scraper.UserHandle{},
-		Hashtags: []string{},
+		Mentions: scraper.CommaSeparatedList{},
+		Hashtags: scraper.CommaSeparatedList{},
 		Polls: []scraper.Poll{
 			create_poll_from_id(-1),
 		},
@ -257,9 +257,9 @@ func create_dummy_tweet() scraper.Tweet {
 		Videos:         []scraper.Video{vid},
 		Urls:           []scraper.Url{url1, url2},
 		Images:         []scraper.Image{img1, img2},
-		Mentions:       []scraper.UserHandle{"mention1", "mention2"},
-		ReplyMentions:  []scraper.UserHandle{"replymention1", "replymention2"},
-		Hashtags:       []string{"hash1", "hash2"},
+		Mentions:       scraper.CommaSeparatedList{"mention1", "mention2"},
+		ReplyMentions:  scraper.CommaSeparatedList{"replymention1", "replymention2"},
+		Hashtags:       scraper.CommaSeparatedList{"hash1", "hash2"},
 		Polls:          []scraper.Poll{poll},
 		Spaces:         []scraper.Space{space},
 		SpaceID:        space_id,
@ -278,9 +278,9 @@ func create_dummy_tombstone() scraper.Tweet {
 		UserID:        -1,
 		TombstoneType: "deleted",
 		IsStub:        true,
-		Mentions:      []scraper.UserHandle{},
-		ReplyMentions: []scraper.UserHandle{},
-		Hashtags:      []string{},
+		Mentions:      scraper.CommaSeparatedList{},
+		ReplyMentions: scraper.CommaSeparatedList{},
+		Hashtags:      scraper.CommaSeparatedList{},
 		Spaces:        []scraper.Space{},
 	}
 }
--- a/scraper/api_types_v2_test.go
+++ b/scraper/api_types_v2_test.go
@ -125,7 +125,7 @@ func TestAPIV2ParseTweetWithQuotedTweet(t *testing.T) {
 	assert.Equal(TweetID(1485689207435710464), quoted_tweet.InReplyToID)
 	assert.Equal(TweetID(0), quoted_tweet.QuotedTweetID)
 	assert.Equal(1, len(quoted_tweet.ReplyMentions))
-	assert.Contains(quoted_tweet.ReplyMentions, UserHandle("michaelmalice"))
+	assert.Contains(quoted_tweet.ReplyMentions, "michaelmalice")
 	assert.Equal(1, quoted_tweet.NumReplies)
 	assert.Equal(12, quoted_tweet.NumLikes)

--- a/scraper/tweet.go
+++ b/scraper/tweet.go
@ -1,6 +1,7 @@
 package scraper

 import (
+	"database/sql/driver"
 	"fmt"
 	log "github.com/sirupsen/logrus"
 	"strings"
@ -9,44 +10,64 @@ import (
 	"offline_twitter/terminal_utils"
 )

-const DEFAULT_MAX_REPLIES_EAGER_LOAD = 50

 type TweetID int64

+type CommaSeparatedList []string
+
+func (l *CommaSeparatedList) Scan(src interface{}) error {
+	*l = CommaSeparatedList{}
+	switch src.(type) {
+	case string:
+		for _, v := range strings.Split(src.(string), ",") {
+			if v != "" {
+				*l = append(*l, v)
+			}
+		}
+	default:
+		panic("Should be a string")
+	}
+	return nil
+}
+func (l CommaSeparatedList) Value() (driver.Value, error) {
+	return strings.Join(l, ","), nil
+}
+
 type Tweet struct {
-	ID             TweetID
-	UserID         UserID
+	ID             TweetID    `db:"id"`
+	UserID         UserID     `db:"user_id"`
 	UserHandle     UserHandle // For processing tombstones
 	User           *User
-	Text           string
-	PostedAt       Timestamp
-	NumLikes       int
-	NumRetweets    int
-	NumReplies     int
-	NumQuoteTweets int
-	InReplyToID    TweetID
-	QuotedTweetID  TweetID
+	Text           string    `db:"text"`
+	IsExpandable   bool      `db:"is_expandable"`
+	PostedAt       Timestamp `db:"posted_at"`
+	NumLikes       int       `db:"num_likes"`
+	NumRetweets    int       `db:"num_retweets"`
+	NumReplies     int       `db:"num_replies"`
+	NumQuoteTweets int       `db:"num_quote_tweets"`
+	InReplyToID    TweetID   `db:"in_reply_to_id"`
+	QuotedTweetID  TweetID   `db:"quoted_tweet_id"`

 	Images        []Image
 	Videos        []Video
-	Mentions      []UserHandle
-	ReplyMentions []UserHandle
-	Hashtags      []string
 	Urls          []Url
 	Polls         []Poll
+	Mentions      CommaSeparatedList `db:"mentions"`
+	ReplyMentions CommaSeparatedList `db:"reply_mentions"`
+	Hashtags      CommaSeparatedList `db:"hashtags"`

 	// TODO get-rid-of-spaces: Might be good to get rid of `Spaces`.  Only used in APIv1 I think.
 	// A first-step would be to delete the Spaces after pulling them out of a Tweet into the Trove
 	// in ParseTweetResponse.  Then they will only be getting saved once rather than twice.
 	Spaces  []Space
-	SpaceID SpaceID
+	SpaceID SpaceID `db:"space_id"`

-	TombstoneType string
-	IsStub        bool
+	TombstoneType string `db:"tombstone_type"`
+	IsStub        bool   `db:"is_stub"`

-	IsContentDownloaded   bool
-	IsConversationScraped bool
-	LastScrapedAt         Timestamp
+	IsContentDownloaded   bool      `db:"is_content_downloaded"`
+	IsConversationScraped bool      `db:"is_conversation_scraped"`
+	LastScrapedAt         Timestamp `db:"last_scraped_at"`
 }

 func (t Tweet) String() string {
@ -150,14 +171,14 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {

 	// Process `@` mentions and reply-mentions
 	for _, mention := range apiTweet.Entities.Mentions {
-		ret.Mentions = append(ret.Mentions, UserHandle(mention.UserName))
+		ret.Mentions = append(ret.Mentions, mention.UserName)
 	}
 	for _, mention := range strings.Split(apiTweet.Entities.ReplyMentions, " ") {
 		if mention != "" {
 			if mention[0] != '@' {
 				panic(fmt.Errorf("Unknown ReplyMention value %q:\n  %w", apiTweet.Entities.ReplyMentions, EXTERNAL_API_ERROR))
 			}
-			ret.ReplyMentions = append(ret.ReplyMentions, UserHandle(mention[1:]))
+			ret.ReplyMentions = append(ret.ReplyMentions, mention[1:])
 		}
 	}

--- a/scraper/tweet_test.go
+++ b/scraper/tweet_test.go
@ -35,7 +35,7 @@ func TestParseSingleTweet(t *testing.T) {
 	assert.Equal("The fact that @michaelmalice new book ‘The Anarchist Handbook’ is just absolutely destroying on the charts is the "+
 		"largest white pill I’ve swallowed in years.", tweet.Text)
 	assert.Len(tweet.Mentions, 1)
-	assert.Contains(tweet.Mentions, UserHandle("michaelmalice"))
+	assert.Contains(tweet.Mentions, "michaelmalice")
 	assert.Empty(tweet.Urls)
 	assert.Equal(int64(1621639105), tweet.PostedAt.Unix())
 	assert.Zero(tweet.QuotedTweetID)
@ -179,7 +179,7 @@ func TestTweetWithLotsOfReplyMentions(t *testing.T) {
 	tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_at_mentions_in_front.json")
 	assert.Len(tweet.ReplyMentions, 4)

-	for i, v := range []UserHandle{"rob_mose", "primalpoly", "jmasseypoet", "SpaceX"} {
+	for i, v := range []string{"rob_mose", "primalpoly", "jmasseypoet", "SpaceX"} {
 		assert.Equal(v, tweet.ReplyMentions[i])
 	}
 }