Add Tweet#last_scraped_at
and Tweet#is_conversation_scraped
fields
This commit is contained in:
parent
c95ed6c918
commit
c1bcd54a11
@ -49,6 +49,8 @@ create table tweets (rowid integer primary key,
|
|||||||
is_stub boolean default 0,
|
is_stub boolean default 0,
|
||||||
|
|
||||||
is_content_downloaded boolean default 0,
|
is_content_downloaded boolean default 0,
|
||||||
|
is_conversation_scraped boolean default 0,
|
||||||
|
last_scraped_at integer not null default 0,
|
||||||
foreign key(user_id) references users(id)
|
foreign key(user_id) references users(id)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -16,18 +16,20 @@ func (p Profile) SaveTweet(t scraper.Tweet) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
_, err = db.Exec(`
|
_, err = db.Exec(`
|
||||||
insert into tweets (id, user_id, text, posted_at, num_likes, num_retweets, num_replies, num_quote_tweets, in_reply_to_id, quoted_tweet_id, mentions, reply_mentions, hashtags, tombstone_type, is_stub, is_content_downloaded)
|
insert into tweets (id, user_id, text, posted_at, num_likes, num_retweets, num_replies, num_quote_tweets, in_reply_to_id, quoted_tweet_id, mentions, reply_mentions, hashtags, tombstone_type, is_stub, is_content_downloaded, is_conversation_scraped, last_scraped_at)
|
||||||
values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, (select rowid from tombstone_types where short_name=?), ?, ?)
|
values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, (select rowid from tombstone_types where short_name=?), ?, ?, ?, ?)
|
||||||
on conflict do update
|
on conflict do update
|
||||||
set num_likes=?,
|
set num_likes=?,
|
||||||
num_retweets=?,
|
num_retweets=?,
|
||||||
num_replies=?,
|
num_replies=?,
|
||||||
num_quote_tweets=?,
|
num_quote_tweets=?,
|
||||||
is_stub=(is_stub and ?),
|
is_stub=(is_stub and ?),
|
||||||
is_content_downloaded=(is_content_downloaded or ?)
|
is_content_downloaded=(is_content_downloaded or ?),
|
||||||
|
is_conversation_scraped=(is_conversation_scraped or ?),
|
||||||
|
last_scraped_at=max(last_scraped_at, ?)
|
||||||
`,
|
`,
|
||||||
t.ID, t.UserID, t.Text, t.PostedAt.Unix(), t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.InReplyToID, t.QuotedTweetID, scraper.JoinArrayOfHandles(t.Mentions), scraper.JoinArrayOfHandles(t.ReplyMentions), strings.Join(t.Hashtags, ","), t.TombstoneType, t.IsStub, t.IsContentDownloaded,
|
t.ID, t.UserID, t.Text, t.PostedAt.Unix(), t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.InReplyToID, t.QuotedTweetID, scraper.JoinArrayOfHandles(t.Mentions), scraper.JoinArrayOfHandles(t.ReplyMentions), strings.Join(t.Hashtags, ","), t.TombstoneType, t.IsStub, t.IsContentDownloaded, t.IsConversationScraped, t.LastScrapedAt.Unix(),
|
||||||
t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.IsStub, t.IsContentDownloaded,
|
t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.IsStub, t.IsContentDownloaded, t.IsConversationScraped, t.LastScrapedAt.Unix(),
|
||||||
)
|
)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -90,7 +92,7 @@ func (p Profile) GetTweetById(id scraper.TweetID) (scraper.Tweet, error) {
|
|||||||
db := p.DB
|
db := p.DB
|
||||||
|
|
||||||
stmt, err := db.Prepare(`
|
stmt, err := db.Prepare(`
|
||||||
select id, user_id, text, posted_at, num_likes, num_retweets, num_replies, num_quote_tweets, in_reply_to_id, quoted_tweet_id, mentions, reply_mentions, hashtags, ifnull(tombstone_types.short_name, ""), is_stub, is_content_downloaded
|
select id, user_id, text, posted_at, num_likes, num_retweets, num_replies, num_quote_tweets, in_reply_to_id, quoted_tweet_id, mentions, reply_mentions, hashtags, ifnull(tombstone_types.short_name, ""), is_stub, is_content_downloaded, is_conversation_scraped, last_scraped_at
|
||||||
from tweets left join tombstone_types on tweets.tombstone_type = tombstone_types.rowid
|
from tweets left join tombstone_types on tweets.tombstone_type = tombstone_types.rowid
|
||||||
where id = ?
|
where id = ?
|
||||||
`)
|
`)
|
||||||
@ -102,17 +104,20 @@ func (p Profile) GetTweetById(id scraper.TweetID) (scraper.Tweet, error) {
|
|||||||
|
|
||||||
var t scraper.Tweet
|
var t scraper.Tweet
|
||||||
var postedAt int
|
var postedAt int
|
||||||
|
var last_scraped_at int
|
||||||
var mentions string
|
var mentions string
|
||||||
var reply_mentions string
|
var reply_mentions string
|
||||||
var hashtags string
|
var hashtags string
|
||||||
|
|
||||||
row := stmt.QueryRow(id)
|
row := stmt.QueryRow(id)
|
||||||
err = row.Scan(&t.ID, &t.UserID, &t.Text, &postedAt, &t.NumLikes, &t.NumRetweets, &t.NumReplies, &t.NumQuoteTweets, &t.InReplyToID, &t.QuotedTweetID, &mentions, &reply_mentions, &hashtags, &t.TombstoneType, &t.IsStub, &t.IsContentDownloaded)
|
err = row.Scan(&t.ID, &t.UserID, &t.Text, &postedAt, &t.NumLikes, &t.NumRetweets, &t.NumReplies, &t.NumQuoteTweets, &t.InReplyToID, &t.QuotedTweetID, &mentions, &reply_mentions, &hashtags, &t.TombstoneType, &t.IsStub, &t.IsContentDownloaded, &t.IsConversationScraped, &last_scraped_at)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return t, err
|
return t, err
|
||||||
}
|
}
|
||||||
|
|
||||||
t.PostedAt = time.Unix(int64(postedAt), 0) // args are `seconds` and `nanoseconds`
|
t.PostedAt = time.Unix(int64(postedAt), 0) // args are `seconds` and `nanoseconds`
|
||||||
|
t.LastScrapedAt = time.Unix(int64(last_scraped_at), 0)
|
||||||
|
|
||||||
t.Mentions = []scraper.UserHandle{}
|
t.Mentions = []scraper.UserHandle{}
|
||||||
for _, m := range strings.Split(mentions, ",") {
|
for _, m := range strings.Split(mentions, ",") {
|
||||||
if m != "" {
|
if m != "" {
|
||||||
|
@ -2,6 +2,7 @@ package persistence_test
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"testing"
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/go-test/deep"
|
"github.com/go-test/deep"
|
||||||
)
|
)
|
||||||
@ -75,6 +76,8 @@ func TestNoWorseningTweet(t *testing.T) {
|
|||||||
tweet := create_dummy_tweet()
|
tweet := create_dummy_tweet()
|
||||||
tweet.IsContentDownloaded = true
|
tweet.IsContentDownloaded = true
|
||||||
tweet.IsStub = false
|
tweet.IsStub = false
|
||||||
|
tweet.IsConversationScraped = true
|
||||||
|
tweet.LastScrapedAt = time.Unix(1000, 0)
|
||||||
|
|
||||||
// Save the tweet
|
// Save the tweet
|
||||||
err := profile.SaveTweet(tweet)
|
err := profile.SaveTweet(tweet)
|
||||||
@ -85,6 +88,8 @@ func TestNoWorseningTweet(t *testing.T) {
|
|||||||
// Worsen the tweet and re-save it
|
// Worsen the tweet and re-save it
|
||||||
tweet.IsContentDownloaded = false
|
tweet.IsContentDownloaded = false
|
||||||
tweet.IsStub = true
|
tweet.IsStub = true
|
||||||
|
tweet.IsConversationScraped = false
|
||||||
|
tweet.LastScrapedAt = time.Unix(500, 0)
|
||||||
err = profile.SaveTweet(tweet)
|
err = profile.SaveTweet(tweet)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Failed to save the tweet: %s", err.Error())
|
t.Fatalf("Failed to save the tweet: %s", err.Error())
|
||||||
@ -102,6 +107,77 @@ func TestNoWorseningTweet(t *testing.T) {
|
|||||||
if new_tweet.IsContentDownloaded != true {
|
if new_tweet.IsContentDownloaded != true {
|
||||||
t.Errorf("Should have preserved is-content-downloaded status")
|
t.Errorf("Should have preserved is-content-downloaded status")
|
||||||
}
|
}
|
||||||
|
if new_tweet.IsConversationScraped == false {
|
||||||
|
t.Errorf("Should have preserved is-conversation-scraped status")
|
||||||
|
}
|
||||||
|
if new_tweet.LastScrapedAt.Unix() != 1000 {
|
||||||
|
t.Errorf("Should have preserved last-scraped-at time")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestModifyTweet(t *testing.T) {
|
||||||
|
profile_path := "test_profiles/TestTweetQueries"
|
||||||
|
profile := create_or_load_profile(profile_path)
|
||||||
|
|
||||||
|
tweet := create_dummy_tweet()
|
||||||
|
tweet.NumLikes = 1000
|
||||||
|
tweet.NumRetweets = 2000
|
||||||
|
tweet.NumReplies = 3000
|
||||||
|
tweet.NumQuoteTweets = 4000
|
||||||
|
tweet.IsStub = true
|
||||||
|
tweet.IsContentDownloaded = false
|
||||||
|
tweet.IsConversationScraped = false
|
||||||
|
tweet.LastScrapedAt = time.Unix(1000, 0)
|
||||||
|
|
||||||
|
err := profile.SaveTweet(tweet)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Failed to save the tweet: %s", err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
tweet.NumLikes = 1500
|
||||||
|
tweet.NumRetweets = 2500
|
||||||
|
tweet.NumReplies = 3500
|
||||||
|
tweet.NumQuoteTweets = 4500
|
||||||
|
tweet.IsStub = false
|
||||||
|
tweet.IsContentDownloaded = true
|
||||||
|
tweet.IsConversationScraped = true
|
||||||
|
tweet.LastScrapedAt = time.Unix(2000, 0)
|
||||||
|
|
||||||
|
err = profile.SaveTweet(tweet)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Failed to re-save the tweet: %s", err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reload the tweet
|
||||||
|
new_tweet, err := profile.GetTweetById(tweet.ID)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Failed to load the tweet: %s", err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
if new_tweet.NumLikes != 1500 {
|
||||||
|
t.Errorf("Expected %d likes, got %d", 1500, new_tweet.NumLikes)
|
||||||
|
}
|
||||||
|
if new_tweet.NumRetweets != 2500 {
|
||||||
|
t.Errorf("Expected %d retweets, got %d", 2500, new_tweet.NumRetweets)
|
||||||
|
}
|
||||||
|
if new_tweet.NumReplies != 3500 {
|
||||||
|
t.Errorf("Expected %d replies, got %d", 1500, new_tweet.NumReplies)
|
||||||
|
}
|
||||||
|
if new_tweet.NumQuoteTweets != 4500 {
|
||||||
|
t.Errorf("Expected %d quote tweets, got %d", 4500, new_tweet.NumQuoteTweets)
|
||||||
|
}
|
||||||
|
if new_tweet.IsStub != false {
|
||||||
|
t.Errorf("Expected tweet to not be a stub, but it was")
|
||||||
|
}
|
||||||
|
if new_tweet.IsContentDownloaded != true {
|
||||||
|
t.Errorf("Expected tweet content to be downloaded, but it wasn't")
|
||||||
|
}
|
||||||
|
if new_tweet.IsConversationScraped != true {
|
||||||
|
t.Errorf("Expected conversation to be scraped, but it wasn't")
|
||||||
|
}
|
||||||
|
if new_tweet.LastScrapedAt.Unix() != 2000 {
|
||||||
|
t.Errorf("Expected tweet to be scraped at %d (unix timestamp), but got %d", 2000, new_tweet.LastScrapedAt.Unix())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -168,6 +168,8 @@ func create_stable_tweet() scraper.Tweet {
|
|||||||
Polls: []scraper.Poll{
|
Polls: []scraper.Poll{
|
||||||
create_poll_from_id(-1),
|
create_poll_from_id(-1),
|
||||||
},
|
},
|
||||||
|
IsConversationScraped: true,
|
||||||
|
LastScrapedAt: time.Unix(100000000, 0),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -8,7 +8,7 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
const ENGINE_DATABASE_VERSION = 1
|
const ENGINE_DATABASE_VERSION = 2
|
||||||
|
|
||||||
|
|
||||||
type VersionMismatchError struct {
|
type VersionMismatchError struct {
|
||||||
@ -48,7 +48,9 @@ var MIGRATIONS = []string{
|
|||||||
last_scraped_at integer not null,
|
last_scraped_at integer not null,
|
||||||
|
|
||||||
foreign key(tweet_id) references tweets(id)
|
foreign key(tweet_id) references tweets(id)
|
||||||
);`,
|
);`,
|
||||||
|
`alter table tweets add column is_conversation_scraped boolean default 0;
|
||||||
|
alter table tweets add column last_scraped_at integer not null default 0`,
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -37,6 +37,8 @@ type Tweet struct {
|
|||||||
IsStub bool
|
IsStub bool
|
||||||
|
|
||||||
IsContentDownloaded bool
|
IsContentDownloaded bool
|
||||||
|
IsConversationScraped bool
|
||||||
|
LastScrapedAt time.Time
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user