Add Tweet#last_scraped_at
and Tweet#is_conversation_scraped
fields
This commit is contained in:
parent
c95ed6c918
commit
c1bcd54a11
@ -49,6 +49,8 @@ create table tweets (rowid integer primary key,
|
||||
is_stub boolean default 0,
|
||||
|
||||
is_content_downloaded boolean default 0,
|
||||
is_conversation_scraped boolean default 0,
|
||||
last_scraped_at integer not null default 0,
|
||||
foreign key(user_id) references users(id)
|
||||
);
|
||||
|
||||
|
@ -16,18 +16,20 @@ func (p Profile) SaveTweet(t scraper.Tweet) error {
|
||||
return err
|
||||
}
|
||||
_, err = db.Exec(`
|
||||
insert into tweets (id, user_id, text, posted_at, num_likes, num_retweets, num_replies, num_quote_tweets, in_reply_to_id, quoted_tweet_id, mentions, reply_mentions, hashtags, tombstone_type, is_stub, is_content_downloaded)
|
||||
values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, (select rowid from tombstone_types where short_name=?), ?, ?)
|
||||
insert into tweets (id, user_id, text, posted_at, num_likes, num_retweets, num_replies, num_quote_tweets, in_reply_to_id, quoted_tweet_id, mentions, reply_mentions, hashtags, tombstone_type, is_stub, is_content_downloaded, is_conversation_scraped, last_scraped_at)
|
||||
values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, (select rowid from tombstone_types where short_name=?), ?, ?, ?, ?)
|
||||
on conflict do update
|
||||
set num_likes=?,
|
||||
num_retweets=?,
|
||||
num_replies=?,
|
||||
num_quote_tweets=?,
|
||||
is_stub=(is_stub and ?),
|
||||
is_content_downloaded=(is_content_downloaded or ?)
|
||||
is_content_downloaded=(is_content_downloaded or ?),
|
||||
is_conversation_scraped=(is_conversation_scraped or ?),
|
||||
last_scraped_at=max(last_scraped_at, ?)
|
||||
`,
|
||||
t.ID, t.UserID, t.Text, t.PostedAt.Unix(), t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.InReplyToID, t.QuotedTweetID, scraper.JoinArrayOfHandles(t.Mentions), scraper.JoinArrayOfHandles(t.ReplyMentions), strings.Join(t.Hashtags, ","), t.TombstoneType, t.IsStub, t.IsContentDownloaded,
|
||||
t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.IsStub, t.IsContentDownloaded,
|
||||
t.ID, t.UserID, t.Text, t.PostedAt.Unix(), t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.InReplyToID, t.QuotedTweetID, scraper.JoinArrayOfHandles(t.Mentions), scraper.JoinArrayOfHandles(t.ReplyMentions), strings.Join(t.Hashtags, ","), t.TombstoneType, t.IsStub, t.IsContentDownloaded, t.IsConversationScraped, t.LastScrapedAt.Unix(),
|
||||
t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.IsStub, t.IsContentDownloaded, t.IsConversationScraped, t.LastScrapedAt.Unix(),
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
@ -90,7 +92,7 @@ func (p Profile) GetTweetById(id scraper.TweetID) (scraper.Tweet, error) {
|
||||
db := p.DB
|
||||
|
||||
stmt, err := db.Prepare(`
|
||||
select id, user_id, text, posted_at, num_likes, num_retweets, num_replies, num_quote_tweets, in_reply_to_id, quoted_tweet_id, mentions, reply_mentions, hashtags, ifnull(tombstone_types.short_name, ""), is_stub, is_content_downloaded
|
||||
select id, user_id, text, posted_at, num_likes, num_retweets, num_replies, num_quote_tweets, in_reply_to_id, quoted_tweet_id, mentions, reply_mentions, hashtags, ifnull(tombstone_types.short_name, ""), is_stub, is_content_downloaded, is_conversation_scraped, last_scraped_at
|
||||
from tweets left join tombstone_types on tweets.tombstone_type = tombstone_types.rowid
|
||||
where id = ?
|
||||
`)
|
||||
@ -102,17 +104,20 @@ func (p Profile) GetTweetById(id scraper.TweetID) (scraper.Tweet, error) {
|
||||
|
||||
var t scraper.Tweet
|
||||
var postedAt int
|
||||
var last_scraped_at int
|
||||
var mentions string
|
||||
var reply_mentions string
|
||||
var hashtags string
|
||||
|
||||
row := stmt.QueryRow(id)
|
||||
err = row.Scan(&t.ID, &t.UserID, &t.Text, &postedAt, &t.NumLikes, &t.NumRetweets, &t.NumReplies, &t.NumQuoteTweets, &t.InReplyToID, &t.QuotedTweetID, &mentions, &reply_mentions, &hashtags, &t.TombstoneType, &t.IsStub, &t.IsContentDownloaded)
|
||||
err = row.Scan(&t.ID, &t.UserID, &t.Text, &postedAt, &t.NumLikes, &t.NumRetweets, &t.NumReplies, &t.NumQuoteTweets, &t.InReplyToID, &t.QuotedTweetID, &mentions, &reply_mentions, &hashtags, &t.TombstoneType, &t.IsStub, &t.IsContentDownloaded, &t.IsConversationScraped, &last_scraped_at)
|
||||
if err != nil {
|
||||
return t, err
|
||||
}
|
||||
|
||||
t.PostedAt = time.Unix(int64(postedAt), 0) // args are `seconds` and `nanoseconds`
|
||||
t.LastScrapedAt = time.Unix(int64(last_scraped_at), 0)
|
||||
|
||||
t.Mentions = []scraper.UserHandle{}
|
||||
for _, m := range strings.Split(mentions, ",") {
|
||||
if m != "" {
|
||||
|
@ -2,6 +2,7 @@ package persistence_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/go-test/deep"
|
||||
)
|
||||
@ -75,6 +76,8 @@ func TestNoWorseningTweet(t *testing.T) {
|
||||
tweet := create_dummy_tweet()
|
||||
tweet.IsContentDownloaded = true
|
||||
tweet.IsStub = false
|
||||
tweet.IsConversationScraped = true
|
||||
tweet.LastScrapedAt = time.Unix(1000, 0)
|
||||
|
||||
// Save the tweet
|
||||
err := profile.SaveTweet(tweet)
|
||||
@ -85,6 +88,8 @@ func TestNoWorseningTweet(t *testing.T) {
|
||||
// Worsen the tweet and re-save it
|
||||
tweet.IsContentDownloaded = false
|
||||
tweet.IsStub = true
|
||||
tweet.IsConversationScraped = false
|
||||
tweet.LastScrapedAt = time.Unix(500, 0)
|
||||
err = profile.SaveTweet(tweet)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to save the tweet: %s", err.Error())
|
||||
@ -102,6 +107,77 @@ func TestNoWorseningTweet(t *testing.T) {
|
||||
if new_tweet.IsContentDownloaded != true {
|
||||
t.Errorf("Should have preserved is-content-downloaded status")
|
||||
}
|
||||
if new_tweet.IsConversationScraped == false {
|
||||
t.Errorf("Should have preserved is-conversation-scraped status")
|
||||
}
|
||||
if new_tweet.LastScrapedAt.Unix() != 1000 {
|
||||
t.Errorf("Should have preserved last-scraped-at time")
|
||||
}
|
||||
}
|
||||
|
||||
func TestModifyTweet(t *testing.T) {
|
||||
profile_path := "test_profiles/TestTweetQueries"
|
||||
profile := create_or_load_profile(profile_path)
|
||||
|
||||
tweet := create_dummy_tweet()
|
||||
tweet.NumLikes = 1000
|
||||
tweet.NumRetweets = 2000
|
||||
tweet.NumReplies = 3000
|
||||
tweet.NumQuoteTweets = 4000
|
||||
tweet.IsStub = true
|
||||
tweet.IsContentDownloaded = false
|
||||
tweet.IsConversationScraped = false
|
||||
tweet.LastScrapedAt = time.Unix(1000, 0)
|
||||
|
||||
err := profile.SaveTweet(tweet)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to save the tweet: %s", err.Error())
|
||||
}
|
||||
|
||||
tweet.NumLikes = 1500
|
||||
tweet.NumRetweets = 2500
|
||||
tweet.NumReplies = 3500
|
||||
tweet.NumQuoteTweets = 4500
|
||||
tweet.IsStub = false
|
||||
tweet.IsContentDownloaded = true
|
||||
tweet.IsConversationScraped = true
|
||||
tweet.LastScrapedAt = time.Unix(2000, 0)
|
||||
|
||||
err = profile.SaveTweet(tweet)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to re-save the tweet: %s", err.Error())
|
||||
}
|
||||
|
||||
// Reload the tweet
|
||||
new_tweet, err := profile.GetTweetById(tweet.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to load the tweet: %s", err.Error())
|
||||
}
|
||||
|
||||
if new_tweet.NumLikes != 1500 {
|
||||
t.Errorf("Expected %d likes, got %d", 1500, new_tweet.NumLikes)
|
||||
}
|
||||
if new_tweet.NumRetweets != 2500 {
|
||||
t.Errorf("Expected %d retweets, got %d", 2500, new_tweet.NumRetweets)
|
||||
}
|
||||
if new_tweet.NumReplies != 3500 {
|
||||
t.Errorf("Expected %d replies, got %d", 1500, new_tweet.NumReplies)
|
||||
}
|
||||
if new_tweet.NumQuoteTweets != 4500 {
|
||||
t.Errorf("Expected %d quote tweets, got %d", 4500, new_tweet.NumQuoteTweets)
|
||||
}
|
||||
if new_tweet.IsStub != false {
|
||||
t.Errorf("Expected tweet to not be a stub, but it was")
|
||||
}
|
||||
if new_tweet.IsContentDownloaded != true {
|
||||
t.Errorf("Expected tweet content to be downloaded, but it wasn't")
|
||||
}
|
||||
if new_tweet.IsConversationScraped != true {
|
||||
t.Errorf("Expected conversation to be scraped, but it wasn't")
|
||||
}
|
||||
if new_tweet.LastScrapedAt.Unix() != 2000 {
|
||||
t.Errorf("Expected tweet to be scraped at %d (unix timestamp), but got %d", 2000, new_tweet.LastScrapedAt.Unix())
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -168,6 +168,8 @@ func create_stable_tweet() scraper.Tweet {
|
||||
Polls: []scraper.Poll{
|
||||
create_poll_from_id(-1),
|
||||
},
|
||||
IsConversationScraped: true,
|
||||
LastScrapedAt: time.Unix(100000000, 0),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -8,7 +8,7 @@ import (
|
||||
)
|
||||
|
||||
|
||||
const ENGINE_DATABASE_VERSION = 1
|
||||
const ENGINE_DATABASE_VERSION = 2
|
||||
|
||||
|
||||
type VersionMismatchError struct {
|
||||
@ -49,6 +49,8 @@ var MIGRATIONS = []string{
|
||||
|
||||
foreign key(tweet_id) references tweets(id)
|
||||
);`,
|
||||
`alter table tweets add column is_conversation_scraped boolean default 0;
|
||||
alter table tweets add column last_scraped_at integer not null default 0`,
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -37,6 +37,8 @@ type Tweet struct {
|
||||
IsStub bool
|
||||
|
||||
IsContentDownloaded bool
|
||||
IsConversationScraped bool
|
||||
LastScrapedAt time.Time
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user