Create new Timestamp helper type

This commit is contained in:
Alessio 2022-03-06 18:09:43 -08:00
parent 7edc8ad5d3
commit 26ab9ec937
14 changed files with 253 additions and 247 deletions

View File

@ -1,8 +1,6 @@
package persistence package persistence
import ( import (
"time"
"offline_twitter/scraper" "offline_twitter/scraper"
) )
@ -83,9 +81,9 @@ func (p Profile) SavePoll(poll scraper.Poll) error {
last_scraped_at=? last_scraped_at=?
`, `,
poll.ID, poll.TweetID, poll.NumChoices, poll.Choice1, poll.Choice1_Votes, poll.Choice2, poll.Choice2_Votes, poll.Choice3, poll.ID, poll.TweetID, poll.NumChoices, poll.Choice1, poll.Choice1_Votes, poll.Choice2, poll.Choice2_Votes, poll.Choice3,
poll.Choice3_Votes, poll.Choice4, poll.Choice4_Votes, poll.VotingDuration, poll.VotingEndsAt.Unix(), poll.LastUpdatedAt.Unix(), poll.Choice3_Votes, poll.Choice4, poll.Choice4_Votes, poll.VotingDuration, poll.VotingEndsAt, poll.LastUpdatedAt,
poll.Choice1_Votes, poll.Choice2_Votes, poll.Choice3_Votes, poll.Choice4_Votes, poll.LastUpdatedAt.Unix(), poll.Choice1_Votes, poll.Choice2_Votes, poll.Choice3_Votes, poll.Choice4_Votes, poll.LastUpdatedAt,
) )
return err return err
} }
@ -199,17 +197,13 @@ func (p Profile) GetPollsForTweet(t scraper.Tweet) (polls []scraper.Poll, err er
return return
} }
var poll scraper.Poll var poll scraper.Poll
var voting_ends_at int
var last_scraped_at int
for rows.Next() { for rows.Next() {
err = rows.Scan(&poll.ID, &poll.NumChoices, &poll.Choice1, &poll.Choice1_Votes, &poll.Choice2, &poll.Choice2_Votes, &poll.Choice3, err = rows.Scan(&poll.ID, &poll.NumChoices, &poll.Choice1, &poll.Choice1_Votes, &poll.Choice2, &poll.Choice2_Votes, &poll.Choice3,
&poll.Choice3_Votes, &poll.Choice4, &poll.Choice4_Votes, &poll.VotingDuration, &voting_ends_at, &last_scraped_at) &poll.Choice3_Votes, &poll.Choice4, &poll.Choice4_Votes, &poll.VotingDuration, &poll.VotingEndsAt, &poll.LastUpdatedAt)
if err != nil { if err != nil {
return return
} }
poll.TweetID = t.ID poll.TweetID = t.ID
poll.VotingEndsAt = time.Unix(int64(voting_ends_at), 0)
poll.LastUpdatedAt = time.Unix(int64(last_scraped_at), 0)
polls = append(polls, poll) polls = append(polls, poll)
} }
return return

View File

@ -1,8 +1,6 @@
package persistence package persistence
import ( import (
"time"
"offline_twitter/scraper" "offline_twitter/scraper"
) )
@ -35,14 +33,11 @@ func (p Profile) GetRetweetById(id scraper.TweetID) (scraper.Retweet, error) {
defer stmt.Close() defer stmt.Close()
var r scraper.Retweet var r scraper.Retweet
var retweeted_at int
row := stmt.QueryRow(id) row := stmt.QueryRow(id)
err = row.Scan(&r.RetweetID, &r.TweetID, &r.RetweetedByID, &retweeted_at) err = row.Scan(&r.RetweetID, &r.TweetID, &r.RetweetedByID, &r.RetweetedAt)
if err != nil { if err != nil {
return scraper.Retweet{}, err return scraper.Retweet{}, err
} }
r.RetweetedAt = time.Unix(int64(retweeted_at), 0)
return r, nil return r, nil
} }

View File

@ -3,7 +3,6 @@ package persistence
import ( import (
"database/sql" "database/sql"
"strings" "strings"
"time"
"offline_twitter/scraper" "offline_twitter/scraper"
) )
@ -30,12 +29,12 @@ func (p Profile) SaveTweet(t scraper.Tweet) error {
is_conversation_scraped=(is_conversation_scraped or ?), is_conversation_scraped=(is_conversation_scraped or ?),
last_scraped_at=max(last_scraped_at, ?) last_scraped_at=max(last_scraped_at, ?)
`, `,
t.ID, t.UserID, t.Text, t.PostedAt.Unix(), t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.InReplyToID, t.ID, t.UserID, t.Text, t.PostedAt, t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.InReplyToID,
t.QuotedTweetID, scraper.JoinArrayOfHandles(t.Mentions), scraper.JoinArrayOfHandles(t.ReplyMentions), t.QuotedTweetID, scraper.JoinArrayOfHandles(t.Mentions), scraper.JoinArrayOfHandles(t.ReplyMentions),
strings.Join(t.Hashtags, ","), t.TombstoneType, t.IsStub, t.IsContentDownloaded, t.IsConversationScraped, t.LastScrapedAt.Unix(), strings.Join(t.Hashtags, ","), t.TombstoneType, t.IsStub, t.IsContentDownloaded, t.IsConversationScraped, t.LastScrapedAt,
t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.IsStub, t.IsContentDownloaded, t.IsConversationScraped, t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.IsStub, t.IsContentDownloaded, t.IsConversationScraped,
t.LastScrapedAt.Unix(), t.LastScrapedAt,
) )
if err != nil { if err != nil {
@ -111,23 +110,18 @@ func (p Profile) GetTweetById(id scraper.TweetID) (scraper.Tweet, error) {
defer stmt.Close() defer stmt.Close()
var t scraper.Tweet var t scraper.Tweet
var postedAt int
var last_scraped_at int
var mentions string var mentions string
var reply_mentions string var reply_mentions string
var hashtags string var hashtags string
row := stmt.QueryRow(id) row := stmt.QueryRow(id)
err = row.Scan(&t.ID, &t.UserID, &t.Text, &postedAt, &t.NumLikes, &t.NumRetweets, &t.NumReplies, &t.NumQuoteTweets, &t.InReplyToID, err = row.Scan(&t.ID, &t.UserID, &t.Text, &t.PostedAt, &t.NumLikes, &t.NumRetweets, &t.NumReplies, &t.NumQuoteTweets, &t.InReplyToID,
&t.QuotedTweetID, &mentions, &reply_mentions, &hashtags, &t.TombstoneType, &t.IsStub, &t.IsContentDownloaded, &t.QuotedTweetID, &mentions, &reply_mentions, &hashtags, &t.TombstoneType, &t.IsStub, &t.IsContentDownloaded,
&t.IsConversationScraped, &last_scraped_at) &t.IsConversationScraped, &t.LastScrapedAt)
if err != nil { if err != nil {
return t, err return t, err
} }
t.PostedAt = time.Unix(int64(postedAt), 0) // args are `seconds` and `nanoseconds`
t.LastScrapedAt = time.Unix(int64(last_scraped_at), 0)
t.Mentions = []scraper.UserHandle{} t.Mentions = []scraper.UserHandle{}
for _, m := range strings.Split(mentions, ",") { for _, m := range strings.Split(mentions, ",") {
if m != "" { if m != "" {

View File

@ -2,12 +2,13 @@ package persistence_test
import ( import (
"testing" "testing"
"time"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"github.com/go-test/deep" "github.com/go-test/deep"
"offline_twitter/scraper"
) )
/** /**
@ -74,7 +75,7 @@ func TestNoWorseningTweet(t *testing.T) {
tweet.IsContentDownloaded = true tweet.IsContentDownloaded = true
tweet.IsStub = false tweet.IsStub = false
tweet.IsConversationScraped = true tweet.IsConversationScraped = true
tweet.LastScrapedAt = time.Unix(1000, 0) tweet.LastScrapedAt = scraper.TimestampFromUnix(1000)
// Save the tweet // Save the tweet
err := profile.SaveTweet(tweet) err := profile.SaveTweet(tweet)
@ -84,7 +85,7 @@ func TestNoWorseningTweet(t *testing.T) {
tweet.IsContentDownloaded = false tweet.IsContentDownloaded = false
tweet.IsStub = true tweet.IsStub = true
tweet.IsConversationScraped = false tweet.IsConversationScraped = false
tweet.LastScrapedAt = time.Unix(500, 0) tweet.LastScrapedAt = scraper.TimestampFromUnix(500)
err = profile.SaveTweet(tweet) err = profile.SaveTweet(tweet)
require.NoError(err) require.NoError(err)
@ -113,7 +114,7 @@ func TestModifyTweet(t *testing.T) {
tweet.IsStub = true tweet.IsStub = true
tweet.IsContentDownloaded = false tweet.IsContentDownloaded = false
tweet.IsConversationScraped = false tweet.IsConversationScraped = false
tweet.LastScrapedAt = time.Unix(1000, 0) tweet.LastScrapedAt = scraper.TimestampFromUnix(1000)
err := profile.SaveTweet(tweet) err := profile.SaveTweet(tweet)
require.NoError(err) require.NoError(err)
@ -125,7 +126,7 @@ func TestModifyTweet(t *testing.T) {
tweet.IsStub = false tweet.IsStub = false
tweet.IsContentDownloaded = true tweet.IsContentDownloaded = true
tweet.IsConversationScraped = true tweet.IsConversationScraped = true
tweet.LastScrapedAt = time.Unix(2000, 0) tweet.LastScrapedAt = scraper.TimestampFromUnix(2000)
err = profile.SaveTweet(tweet) err = profile.SaveTweet(tweet)
require.NoError(err) require.NoError(err)

View File

@ -1,10 +1,9 @@
package persistence package persistence
import ( import (
"fmt" "fmt"
"database/sql" "database/sql"
"time" "offline_twitter/scraper"
"offline_twitter/scraper"
) )
/** /**
@ -51,21 +50,20 @@ func (p Profile) SaveUser(u *scraper.User) error {
pinned_tweet_id=?, pinned_tweet_id=?,
is_content_downloaded=(is_content_downloaded or ?) is_content_downloaded=(is_content_downloaded or ?)
`, `,
u.ID, u.DisplayName, u.Handle, u.Bio, u.FollowingCount, u.FollowersCount, u.Location, u.Website, u.JoinDate.Unix(), u.IsPrivate, u.ID, u.DisplayName, u.Handle, u.Bio, u.FollowingCount, u.FollowersCount, u.Location, u.Website, u.JoinDate, u.IsPrivate,
u.IsVerified, u.IsBanned, u.ProfileImageUrl, u.ProfileImageLocalPath, u.BannerImageUrl, u.BannerImageLocalPath, u.PinnedTweetID, u.IsVerified, u.IsBanned, u.ProfileImageUrl, u.ProfileImageLocalPath, u.BannerImageUrl, u.BannerImageLocalPath, u.PinnedTweetID,
u.IsContentDownloaded, u.IsIdFake, u.IsContentDownloaded, u.IsIdFake,
u.Bio, u.DisplayName, u.FollowingCount, u.FollowersCount, u.Location, u.Website, u.IsPrivate, u.IsVerified, u.IsBanned, u.Bio, u.DisplayName, u.FollowingCount, u.FollowersCount, u.Location, u.Website, u.IsPrivate, u.IsVerified, u.IsBanned,
u.ProfileImageUrl, u.ProfileImageLocalPath, u.BannerImageUrl, u.BannerImageLocalPath, u.PinnedTweetID, u.IsContentDownloaded, u.ProfileImageUrl, u.ProfileImageLocalPath, u.BannerImageUrl, u.BannerImageLocalPath, u.PinnedTweetID, u.IsContentDownloaded,
) )
if err != nil { if err != nil {
return err return err
} }
return nil return nil
} }
/** /**
* Check if the database has a User with the given user handle. * Check if the database has a User with the given user handle.
* *
@ -189,89 +187,89 @@ func (p Profile) GetUserByID(id scraper.UserID) (scraper.User, error) {
* why the No Worsening Principle is needed. * why the No Worsening Principle is needed.
*/ */
func (p Profile) CheckUserContentDownloadNeeded(user scraper.User) bool { func (p Profile) CheckUserContentDownloadNeeded(user scraper.User) bool {
row := p.DB.QueryRow(`select is_content_downloaded, profile_image_url, banner_image_url from users where id = ?`, user.ID) row := p.DB.QueryRow(`select is_content_downloaded, profile_image_url, banner_image_url from users where id = ?`, user.ID)
var is_content_downloaded bool var is_content_downloaded bool
var profile_image_url string var profile_image_url string
var banner_image_url string var banner_image_url string
err := row.Scan(&is_content_downloaded, &profile_image_url, &banner_image_url) err := row.Scan(&is_content_downloaded, &profile_image_url, &banner_image_url)
if err != nil { if err != nil {
if err == sql.ErrNoRows { if err == sql.ErrNoRows {
return true return true
} else { } else {
panic(err) panic(err)
} }
} }
if !is_content_downloaded { if !is_content_downloaded {
return true return true
} }
if banner_image_url != user.BannerImageUrl { if banner_image_url != user.BannerImageUrl {
return true return true
} }
if profile_image_url != user.ProfileImageUrl { if profile_image_url != user.ProfileImageUrl {
return true return true
} }
return false return false
} }
/** /**
* Follow / unfollow a user. Update the given User object's IsFollowed field. * Follow / unfollow a user. Update the given User object's IsFollowed field.
*/ */
func (p Profile) SetUserFollowed(user *scraper.User, is_followed bool) { func (p Profile) SetUserFollowed(user *scraper.User, is_followed bool) {
result, err := p.DB.Exec("update users set is_followed = ? where id = ?", is_followed, user.ID) result, err := p.DB.Exec("update users set is_followed = ? where id = ?", is_followed, user.ID)
if err != nil { if err != nil {
panic(fmt.Sprintf("Error inserting user with handle %q: %s", user.Handle, err.Error())) panic(fmt.Sprintf("Error inserting user with handle %q: %s", user.Handle, err.Error()))
} }
count, err := result.RowsAffected() count, err := result.RowsAffected()
if err != nil { if err != nil {
panic("Unknown error: " + err.Error()) panic("Unknown error: " + err.Error())
} }
if count != 1 { if count != 1 {
panic(fmt.Sprintf("User with handle %q not found", user.Handle)) panic(fmt.Sprintf("User with handle %q not found", user.Handle))
} }
user.IsFollowed = is_followed user.IsFollowed = is_followed
} }
func (p Profile) NextFakeUserID() scraper.UserID { func (p Profile) NextFakeUserID() scraper.UserID {
_, err := p.DB.Exec("update fake_user_sequence set latest_fake_id = latest_fake_id + 1") _, err := p.DB.Exec("update fake_user_sequence set latest_fake_id = latest_fake_id + 1")
if err != nil { if err != nil {
panic(err) panic(err)
} }
var ret scraper.UserID var ret scraper.UserID
err = p.DB.QueryRow("select latest_fake_id from fake_user_sequence").Scan(&ret) err = p.DB.QueryRow("select latest_fake_id from fake_user_sequence").Scan(&ret)
if err != nil { if err != nil {
panic(err) panic(err)
} }
return ret return ret
} }
func (p Profile) GetAllFollowedUsers() []scraper.UserHandle { func (p Profile) GetAllFollowedUsers() []scraper.UserHandle {
rows, err := p.DB.Query("select handle from users where is_followed = 1") rows, err := p.DB.Query("select handle from users where is_followed = 1")
if err != nil { if err != nil {
panic(err) panic(err)
} }
ret := []scraper.UserHandle{} ret := []scraper.UserHandle{}
var tmp scraper.UserHandle var tmp scraper.UserHandle
for rows.Next() { for rows.Next() {
err = rows.Scan(&tmp) err = rows.Scan(&tmp)
if err != nil { if err != nil {
panic(err) panic(err)
} }
ret = append(ret, tmp) ret = append(ret, tmp)
} }
return ret return ret
} }
func (p Profile) IsFollowing(handle scraper.UserHandle) bool { func (p Profile) IsFollowing(handle scraper.UserHandle) bool {
for _, follow := range p.GetAllFollowedUsers() { for _, follow := range p.GetAllFollowedUsers() {
if follow == handle { if follow == handle {
return true; return true
} }
} }
return false; return false
} }

View File

@ -2,19 +2,17 @@ package persistence_test
import ( import (
"testing" "testing"
"time"
"fmt" "fmt"
"math/rand" "math/rand"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"github.com/go-test/deep" "github.com/go-test/deep"
"offline_twitter/scraper" "offline_twitter/scraper"
) )
/** /**
* Create a user, save it, reload it, and make sure it comes back the same * Create a user, save it, reload it, and make sure it comes back the same
*/ */
@ -62,7 +60,7 @@ func TestModifyUser(t *testing.T) {
fake_user.IsVerified = false fake_user.IsVerified = false
fake_user.IsBanned = false fake_user.IsBanned = false
fake_user.FollowersCount = 1000 fake_user.FollowersCount = 1000
fake_user.JoinDate = time.Unix(1000, 0) fake_user.JoinDate = scraper.TimestampFromUnix(1000)
fake_user.ProfileImageUrl = "asdf" fake_user.ProfileImageUrl = "asdf"
fake_user.IsContentDownloaded = true fake_user.IsContentDownloaded = true
@ -70,14 +68,13 @@ func TestModifyUser(t *testing.T) {
err := profile.SaveUser(&fake_user) err := profile.SaveUser(&fake_user)
require.NoError(err) require.NoError(err)
fake_user.DisplayName = "Display Name 2" fake_user.DisplayName = "Display Name 2"
fake_user.Location = "location2" fake_user.Location = "location2"
fake_user.IsPrivate = true fake_user.IsPrivate = true
fake_user.IsVerified = true fake_user.IsVerified = true
fake_user.IsBanned = true fake_user.IsBanned = true
fake_user.FollowersCount = 2000 fake_user.FollowersCount = 2000
fake_user.JoinDate = time.Unix(2000, 0) fake_user.JoinDate = scraper.TimestampFromUnix(2000)
fake_user.ProfileImageUrl = "asdf2" fake_user.ProfileImageUrl = "asdf2"
fake_user.IsContentDownloaded = false // test No Worsening fake_user.IsContentDownloaded = false // test No Worsening
@ -114,7 +111,6 @@ func TestHandleIsCaseInsensitive(t *testing.T) {
} }
} }
/** /**
* Should correctly report whether the user exists in the database * Should correctly report whether the user exists in the database
*/ */

View File

@ -12,6 +12,7 @@ type ErrNotInDatabase struct {
Table string Table string
Value interface{} Value interface{}
} }
func (err ErrNotInDatabase) Error() string { func (err ErrNotInDatabase) Error() string {
return fmt.Sprintf("Not in database: %s %q", err.Table, err.Value) return fmt.Sprintf("Not in database: %s %q", err.Table, err.Value)
} }

View File

@ -1,12 +1,12 @@
package persistence_test package persistence_test
import ( import (
"time"
"fmt" "fmt"
"math/rand" "math/rand"
"time"
"offline_twitter/scraper"
"offline_twitter/persistence" "offline_twitter/persistence"
"offline_twitter/scraper"
) )
/** /**
@ -40,28 +40,27 @@ func create_or_load_profile(profile_path string) persistence.Profile {
return profile return profile
} }
/** /**
* Create a stable user with a fixed ID and handle * Create a stable user with a fixed ID and handle
*/ */
func create_stable_user() scraper.User { func create_stable_user() scraper.User {
return scraper.User{ return scraper.User{
ID: scraper.UserID(-1), ID: scraper.UserID(-1),
DisplayName: "stable display name", DisplayName: "stable display name",
Handle: scraper.UserHandle("handle stable"), Handle: scraper.UserHandle("handle stable"),
Bio: "stable bio", Bio: "stable bio",
FollowersCount: 10, FollowersCount: 10,
FollowingCount: 2000, FollowingCount: 2000,
Location: "stable location", Location: "stable location",
Website:"stable website", Website: "stable website",
JoinDate: time.Unix(10000000, 0), JoinDate: scraper.TimestampFromUnix(10000000),
IsVerified: true, IsVerified: true,
IsPrivate: false, IsPrivate: false,
ProfileImageUrl: "stable profile image url", ProfileImageUrl: "stable profile image url",
ProfileImageLocalPath: "stable profile image local path", ProfileImageLocalPath: "stable profile image local path",
BannerImageUrl: "stable banner image url", BannerImageUrl: "stable banner image url",
BannerImageLocalPath: "stable image local path", BannerImageLocalPath: "stable image local path",
PinnedTweetID: scraper.TweetID(345), PinnedTweetID: scraper.TweetID(345),
} }
} }
@ -107,58 +106,56 @@ func create_video_from_id(id int) scraper.Video {
*/ */
func create_url_from_id(id int) scraper.Url { func create_url_from_id(id int) scraper.Url {
s := fmt.Sprint(id) s := fmt.Sprint(id)
return scraper.Url { return scraper.Url{
TweetID: -1, TweetID: -1,
Domain: s + "domain", Domain: s + "domain",
Text: s + "text", Text: s + "text",
ShortText: s + "shorttext", ShortText: s + "shorttext",
Title: s + "title", Title: s + "title",
Description: s + "description", Description: s + "description",
ThumbnailWidth: id * 23, ThumbnailWidth: id * 23,
ThumbnailHeight: id * 7, ThumbnailHeight: id * 7,
ThumbnailRemoteUrl: s + "remote url", ThumbnailRemoteUrl: s + "remote url",
ThumbnailLocalPath: s + "local path", ThumbnailLocalPath: s + "local path",
CreatorID: scraper.UserID(id), CreatorID: scraper.UserID(id),
SiteID: scraper.UserID(id), SiteID: scraper.UserID(id),
HasCard: true, HasCard: true,
IsContentDownloaded: false, IsContentDownloaded: false,
} }
} }
/** /**
* Create a semi-stable Poll based on the given ID * Create a semi-stable Poll based on the given ID
*/ */
func create_poll_from_id(id int) scraper.Poll { func create_poll_from_id(id int) scraper.Poll {
s := fmt.Sprint(id) s := fmt.Sprint(id)
return scraper.Poll{ return scraper.Poll{
ID: scraper.PollID(id), ID: scraper.PollID(id),
TweetID: -1, TweetID: -1,
NumChoices: 2, NumChoices: 2,
Choice1: s, Choice1: s,
Choice1_Votes: 1000, Choice1_Votes: 1000,
Choice2: "Not " + s, Choice2: "Not " + s,
Choice2_Votes: 1500, Choice2_Votes: 1500,
VotingDuration: 10, VotingDuration: 10,
VotingEndsAt: time.Unix(10000000, 0), VotingEndsAt: scraper.TimestampFromUnix(10000000),
LastUpdatedAt: time.Unix(10000, 0), LastUpdatedAt: scraper.TimestampFromUnix(10000),
} }
} }
/** /**
* Create a stable tweet with a fixed ID and content * Create a stable tweet with a fixed ID and content
*/ */
func create_stable_tweet() scraper.Tweet { func create_stable_tweet() scraper.Tweet {
tweet_id := scraper.TweetID(-1) tweet_id := scraper.TweetID(-1)
return scraper.Tweet{ return scraper.Tweet{
ID: tweet_id, ID: tweet_id,
UserID: -1, UserID: -1,
Text: "stable text", Text: "stable text",
PostedAt: time.Unix(10000000, 0), PostedAt: scraper.TimestampFromUnix(10000000),
NumLikes: 10, NumLikes: 10,
NumRetweets: 10, NumRetweets: 10,
NumReplies: 10, NumReplies: 10,
NumQuoteTweets: 10, NumQuoteTweets: 10,
Videos: []scraper.Video{ Videos: []scraper.Video{
create_video_from_id(-1), create_video_from_id(-1),
@ -175,7 +172,7 @@ func create_stable_tweet() scraper.Tweet {
create_poll_from_id(-1), create_poll_from_id(-1),
}, },
IsConversationScraped: true, IsConversationScraped: true,
LastScrapedAt: time.Unix(100000000, 0), LastScrapedAt: scraper.TimestampFromUnix(100000000),
} }
} }
@ -185,10 +182,10 @@ func create_stable_tweet() scraper.Tweet {
func create_stable_retweet() scraper.Retweet { func create_stable_retweet() scraper.Retweet {
retweet_id := scraper.TweetID(-1) retweet_id := scraper.TweetID(-1)
return scraper.Retweet{ return scraper.Retweet{
RetweetID: retweet_id, RetweetID: retweet_id,
TweetID: -1, TweetID: -1,
RetweetedByID: -1, RetweetedByID: -1,
RetweetedAt: time.Unix(20000000, 0), RetweetedAt: scraper.TimestampFromUnix(20000000),
} }
} }
@ -200,26 +197,25 @@ func create_dummy_user() scraper.User {
userID := rand.Int() userID := rand.Int()
return scraper.User{ return scraper.User{
ID: scraper.UserID(userID), ID: scraper.UserID(userID),
DisplayName: "display name", DisplayName: "display name",
Handle: scraper.UserHandle(fmt.Sprintf("handle%d", userID)), Handle: scraper.UserHandle(fmt.Sprintf("handle%d", userID)),
Bio: "bio", Bio: "bio",
FollowersCount: 0, FollowersCount: 0,
FollowingCount: 1000, FollowingCount: 1000,
Location: "location", Location: "location",
Website:"website", Website: "website",
JoinDate: time.Now().Truncate(1e9), // Round to nearest second JoinDate: scraper.Timestamp{time.Now().Truncate(1e9)}, // Round to nearest second
IsVerified: false, IsVerified: false,
IsPrivate: true, IsPrivate: true,
ProfileImageUrl: "profile image url", ProfileImageUrl: "profile image url",
ProfileImageLocalPath: "profile image local path", ProfileImageLocalPath: "profile image local path",
BannerImageUrl: "banner image url", BannerImageUrl: "banner image url",
BannerImageLocalPath: "banner image local path", BannerImageLocalPath: "banner image local path",
PinnedTweetID: scraper.TweetID(234), PinnedTweetID: scraper.TweetID(234),
} }
} }
/** /**
* Create a new tweet with a random ID and content * Create a new tweet with a random ID and content
*/ */
@ -243,21 +239,21 @@ func create_dummy_tweet() scraper.Tweet {
poll.TweetID = tweet_id poll.TweetID = tweet_id
return scraper.Tweet{ return scraper.Tweet{
ID: tweet_id, ID: tweet_id,
UserID: -1, UserID: -1,
Text: "text", Text: "text",
PostedAt: time.Now().Truncate(1e9), // Round to nearest second PostedAt: scraper.Timestamp{time.Now().Truncate(1e9)}, // Round to nearest second
NumLikes: 1, NumLikes: 1,
NumRetweets: 2, NumRetweets: 2,
NumReplies: 3, NumReplies: 3,
NumQuoteTweets: 4, NumQuoteTweets: 4,
Videos: []scraper.Video{vid}, Videos: []scraper.Video{vid},
Urls: []scraper.Url{url1, url2}, Urls: []scraper.Url{url1, url2},
Images: []scraper.Image{img1, img2}, Images: []scraper.Image{img1, img2},
Mentions: []scraper.UserHandle{"mention1", "mention2"}, Mentions: []scraper.UserHandle{"mention1", "mention2"},
ReplyMentions: []scraper.UserHandle{"replymention1", "replymention2"}, ReplyMentions: []scraper.UserHandle{"replymention1", "replymention2"},
Hashtags: []string{"hash1", "hash2"}, Hashtags: []string{"hash1", "hash2"},
Polls: []scraper.Poll{poll}, Polls: []scraper.Poll{poll},
} }
} }
@ -287,9 +283,9 @@ func create_dummy_retweet(tweet_id scraper.TweetID) scraper.Retweet {
retweet_id := scraper.TweetID(rand.Int()) retweet_id := scraper.TweetID(rand.Int())
return scraper.Retweet{ return scraper.Retweet{
RetweetID: retweet_id, RetweetID: retweet_id,
TweetID: tweet_id, TweetID: tweet_id,
RetweetedByID: -1, RetweetedByID: -1,
RetweetedAt: time.Unix(20000000, 0), RetweetedAt: scraper.TimestampFromUnix(20000000),
} }
} }

View File

@ -84,21 +84,18 @@ func (card APIV2Card) ParseAsPoll() Poll {
} }
id := int_or_panic(card_url.Hostname()) id := int_or_panic(card_url.Hostname())
voting_ends_at, err := time.Parse(time.RFC3339, values["end_datetime_utc"].StringValue)
if err != nil {
panic(err)
}
last_updated_at, err := time.Parse(time.RFC3339, values["last_updated_datetime_utc"].StringValue)
if err != nil {
panic(err)
}
ret := Poll{} ret := Poll{}
ret.ID = PollID(id) ret.ID = PollID(id)
ret.NumChoices = parse_num_choices(card.Legacy.Name) ret.NumChoices = parse_num_choices(card.Legacy.Name)
ret.VotingDuration = int_or_panic(values["duration_minutes"].StringValue) * 60 ret.VotingDuration = int_or_panic(values["duration_minutes"].StringValue) * 60
ret.VotingEndsAt = voting_ends_at ret.VotingEndsAt, err = TimestampFromString(values["end_datetime_utc"].StringValue)
ret.LastUpdatedAt = last_updated_at if err != nil {
panic(err)
}
ret.LastUpdatedAt, err = TimestampFromString(values["last_updated_datetime_utc"].StringValue)
if err != nil {
panic(err)
}
ret.Choice1 = values["choice1_label"].StringValue ret.Choice1 = values["choice1_label"].StringValue
ret.Choice1_Votes = int_or_panic(values["choice1_count"].StringValue) ret.Choice1_Votes = int_or_panic(values["choice1_count"].StringValue)
@ -262,7 +259,7 @@ func (api_v2_tweet APIV2Tweet) ToTweetTrove() TweetTrove {
retweet.RetweetID = TweetID(api_v2_tweet.ID) retweet.RetweetID = TweetID(api_v2_tweet.ID)
retweet.TweetID = TweetID(api_v2_tweet.RetweetedStatusResult.Result.ID) retweet.TweetID = TweetID(api_v2_tweet.RetweetedStatusResult.Result.ID)
retweet.RetweetedByID = UserID(api_v2_tweet.APITweet.UserID) retweet.RetweetedByID = UserID(api_v2_tweet.APITweet.UserID)
retweet.RetweetedAt, err = time.Parse(time.RubyDate, api_v2_tweet.APITweet.CreatedAt) retweet.RetweetedAt, err = TimestampFromString(api_v2_tweet.APITweet.CreatedAt)
if err != nil { if err != nil {
fmt.Printf("%v\n", api_v2_tweet) fmt.Printf("%v\n", api_v2_tweet)
panic(err) panic(err)

View File

@ -1,7 +1,6 @@
package scraper package scraper
import ( import (
"time"
"strings" "strings"
"strconv" "strconv"
"net/url" "net/url"
@ -24,9 +23,9 @@ type Poll struct {
Choice4_Votes int Choice4_Votes int
VotingDuration int // In seconds VotingDuration int // In seconds
VotingEndsAt time.Time VotingEndsAt Timestamp
LastUpdatedAt time.Time LastUpdatedAt Timestamp
} }
func ParseAPIPoll(apiCard APICard) Poll { func ParseAPIPoll(apiCard APICard) Poll {
@ -36,21 +35,18 @@ func ParseAPIPoll(apiCard APICard) Poll {
} }
id := int_or_panic(card_url.Hostname()) id := int_or_panic(card_url.Hostname())
voting_ends_at, err := time.Parse(time.RFC3339, apiCard.BindingValues.EndDatetimeUTC.StringValue)
if err != nil {
panic(err)
}
last_updated_at, err := time.Parse(time.RFC3339, apiCard.BindingValues.LastUpdatedAt.StringValue)
if err != nil {
panic(err)
}
ret := Poll{} ret := Poll{}
ret.ID = PollID(id) ret.ID = PollID(id)
ret.NumChoices = parse_num_choices(apiCard.Name) ret.NumChoices = parse_num_choices(apiCard.Name)
ret.VotingDuration = int_or_panic(apiCard.BindingValues.DurationMinutes.StringValue) * 60 ret.VotingDuration = int_or_panic(apiCard.BindingValues.DurationMinutes.StringValue) * 60
ret.VotingEndsAt = voting_ends_at ret.VotingEndsAt, err = TimestampFromString(apiCard.BindingValues.EndDatetimeUTC.StringValue)
ret.LastUpdatedAt = last_updated_at if err != nil {
panic(err)
}
ret.LastUpdatedAt, err = TimestampFromString(apiCard.BindingValues.LastUpdatedAt.StringValue)
if err != nil {
panic(err)
}
ret.Choice1 = apiCard.BindingValues.Choice1.StringValue ret.Choice1 = apiCard.BindingValues.Choice1.StringValue
ret.Choice1_Votes = int_or_panic(apiCard.BindingValues.Choice1_Count.StringValue) ret.Choice1_Votes = int_or_panic(apiCard.BindingValues.Choice1_Count.StringValue)

View File

@ -1,16 +1,12 @@
package scraper package scraper
import (
"time"
)
type Retweet struct { type Retweet struct {
RetweetID TweetID RetweetID TweetID
TweetID TweetID TweetID TweetID
Tweet *Tweet Tweet *Tweet
RetweetedByID UserID RetweetedByID UserID
RetweetedBy *User RetweetedBy *User
RetweetedAt time.Time RetweetedAt Timestamp
} }
func ParseSingleRetweet(apiTweet APITweet) (ret Retweet, err error) { func ParseSingleRetweet(apiTweet APITweet) (ret Retweet, err error) {
@ -19,6 +15,9 @@ func ParseSingleRetweet(apiTweet APITweet) (ret Retweet, err error) {
ret.RetweetID = TweetID(apiTweet.ID) ret.RetweetID = TweetID(apiTweet.ID)
ret.TweetID = TweetID(apiTweet.RetweetedStatusID) ret.TweetID = TweetID(apiTweet.RetweetedStatusID)
ret.RetweetedByID = UserID(apiTweet.UserID) ret.RetweetedByID = UserID(apiTweet.UserID)
ret.RetweetedAt, err = time.Parse(time.RubyDate, apiTweet.CreatedAt) ret.RetweetedAt, err = TimestampFromString(apiTweet.CreatedAt)
if err != nil {
panic(err)
}
return return
} }

40
scraper/timestamp_type.go Normal file
View File

@ -0,0 +1,40 @@
package scraper
import (
"time"
"fmt"
"database/sql/driver"
)
type Timestamp struct {
time.Time
}
func (t Timestamp) Value() (driver.Value, error) {
return t.Unix(), nil
}
func (t *Timestamp) Scan(src interface{}) error {
val, is_ok := src.(int64)
if !is_ok {
return fmt.Errorf("Incompatible type for Timestamp: %#v", src)
}
*t = Timestamp{time.Unix(val, 0)}
return nil
}
func TimestampFromString(s string) (Timestamp, error) {
tmp, err := time.Parse(time.RubyDate, s)
if err == nil {
return Timestamp{tmp}, nil
}
tmp, err = time.Parse(time.RFC3339, s)
if err == nil {
return Timestamp{tmp}, nil
}
return Timestamp{}, err
}
func TimestampFromUnix(num int64) Timestamp {
return Timestamp{time.Unix(10000000, 0)}
}

View File

@ -18,7 +18,7 @@ type Tweet struct {
UserHandle UserHandle // For processing tombstones UserHandle UserHandle // For processing tombstones
User *User User *User
Text string Text string
PostedAt time.Time PostedAt Timestamp
NumLikes int NumLikes int
NumRetweets int NumRetweets int
NumReplies int NumReplies int
@ -39,7 +39,7 @@ type Tweet struct {
IsContentDownloaded bool IsContentDownloaded bool
IsConversationScraped bool IsConversationScraped bool
LastScrapedAt time.Time LastScrapedAt Timestamp
} }
@ -58,7 +58,7 @@ func (t Tweet) String() string {
Replies: %d RT: %d QT: %d Likes: %d Replies: %d RT: %d QT: %d Likes: %d
`, `,
author, author,
terminal_utils.FormatDate(t.PostedAt), terminal_utils.FormatDate(t.PostedAt.Time),
terminal_utils.WrapText(t.Text, 60), terminal_utils.WrapText(t.Text, 60),
t.NumReplies, t.NumReplies,
t.NumRetweets, t.NumRetweets,
@ -91,7 +91,7 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
// Process "posted-at" date and time // Process "posted-at" date and time
if apiTweet.TombstoneText == "" { // Skip time parsing for tombstones if apiTweet.TombstoneText == "" { // Skip time parsing for tombstones
ret.PostedAt, err = time.Parse(time.RubyDate, apiTweet.CreatedAt) ret.PostedAt, err = TimestampFromString(apiTweet.CreatedAt)
if err != nil { if err != nil {
return return
} }
@ -175,7 +175,7 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
// Process tombstones and other metadata // Process tombstones and other metadata
ret.TombstoneType = apiTweet.TombstoneText ret.TombstoneType = apiTweet.TombstoneText
ret.IsStub = !(ret.TombstoneType == "") ret.IsStub = !(ret.TombstoneType == "")
ret.LastScrapedAt = time.Unix(0, 0) // Caller will change this for the tweet that was actually scraped ret.LastScrapedAt = TimestampFromUnix(0) // Caller will change this for the tweet that was actually scraped
ret.IsConversationScraped = false // Safe due to the "No Worsening" principle ret.IsConversationScraped = false // Safe due to the "No Worsening" principle
return return
@ -251,7 +251,7 @@ func GetTweetFull(id TweetID) (trove TweetTrove, err error) {
if !ok { if !ok {
panic("Trove didn't contain its own tweet!") panic("Trove didn't contain its own tweet!")
} }
tweet.LastScrapedAt = time.Now() tweet.LastScrapedAt = Timestamp{time.Now()}
tweet.IsConversationScraped = true tweet.IsConversationScraped = true
trove.Tweets[id] = tweet trove.Tweets[id] = tweet

View File

@ -1,7 +1,6 @@
package scraper package scraper
import ( import (
"time"
"fmt" "fmt"
"strings" "strings"
"regexp" "regexp"
@ -32,7 +31,7 @@ type User struct {
FollowersCount int FollowersCount int
Location string Location string
Website string Website string
JoinDate time.Time JoinDate Timestamp
IsPrivate bool IsPrivate bool
IsVerified bool IsVerified bool
IsBanned bool IsBanned bool
@ -72,7 +71,7 @@ Joined %s
terminal_utils.WrapText(u.Bio, 60), terminal_utils.WrapText(u.Bio, 60),
u.FollowingCount, u.FollowingCount,
u.FollowersCount, u.FollowersCount,
terminal_utils.FormatDate(u.JoinDate), terminal_utils.FormatDate(u.JoinDate.Time),
u.Location, u.Location,
u.Website, u.Website,
) )
@ -100,7 +99,7 @@ func GetUnknownUserWithHandle(handle UserHandle) User {
FollowingCount: 0, FollowingCount: 0,
Location: "<blank>", Location: "<blank>",
Website:"<blank>", Website:"<blank>",
JoinDate: time.Unix(0, 0), JoinDate: TimestampFromUnix(0),
IsVerified: false, IsVerified: false,
IsPrivate: false, IsPrivate: false,
IsNeedingFakeID: true, IsNeedingFakeID: true,
@ -133,7 +132,7 @@ func ParseSingleUser(apiUser APIUser) (ret User, err error) {
if len(apiUser.Entities.URL.Urls) > 0 { if len(apiUser.Entities.URL.Urls) > 0 {
ret.Website = apiUser.Entities.URL.Urls[0].ExpandedURL ret.Website = apiUser.Entities.URL.Urls[0].ExpandedURL
} }
ret.JoinDate, err = time.Parse(time.RubyDate, apiUser.CreatedAt) ret.JoinDate, err = TimestampFromString(apiUser.CreatedAt)
if err != nil { if err != nil {
return return
} }