Create new Timestamp helper type

This commit is contained in:
Alessio 2022-03-06 18:09:43 -08:00
parent 7edc8ad5d3
commit 26ab9ec937
14 changed files with 253 additions and 247 deletions

View File

@ -1,8 +1,6 @@
package persistence
import (
"time"
"offline_twitter/scraper"
)
@ -83,9 +81,9 @@ func (p Profile) SavePoll(poll scraper.Poll) error {
last_scraped_at=?
`,
poll.ID, poll.TweetID, poll.NumChoices, poll.Choice1, poll.Choice1_Votes, poll.Choice2, poll.Choice2_Votes, poll.Choice3,
poll.Choice3_Votes, poll.Choice4, poll.Choice4_Votes, poll.VotingDuration, poll.VotingEndsAt.Unix(), poll.LastUpdatedAt.Unix(),
poll.Choice3_Votes, poll.Choice4, poll.Choice4_Votes, poll.VotingDuration, poll.VotingEndsAt, poll.LastUpdatedAt,
poll.Choice1_Votes, poll.Choice2_Votes, poll.Choice3_Votes, poll.Choice4_Votes, poll.LastUpdatedAt.Unix(),
poll.Choice1_Votes, poll.Choice2_Votes, poll.Choice3_Votes, poll.Choice4_Votes, poll.LastUpdatedAt,
)
return err
}
@ -199,17 +197,13 @@ func (p Profile) GetPollsForTweet(t scraper.Tweet) (polls []scraper.Poll, err er
return
}
var poll scraper.Poll
var voting_ends_at int
var last_scraped_at int
for rows.Next() {
err = rows.Scan(&poll.ID, &poll.NumChoices, &poll.Choice1, &poll.Choice1_Votes, &poll.Choice2, &poll.Choice2_Votes, &poll.Choice3,
&poll.Choice3_Votes, &poll.Choice4, &poll.Choice4_Votes, &poll.VotingDuration, &voting_ends_at, &last_scraped_at)
&poll.Choice3_Votes, &poll.Choice4, &poll.Choice4_Votes, &poll.VotingDuration, &poll.VotingEndsAt, &poll.LastUpdatedAt)
if err != nil {
return
}
poll.TweetID = t.ID
poll.VotingEndsAt = time.Unix(int64(voting_ends_at), 0)
poll.LastUpdatedAt = time.Unix(int64(last_scraped_at), 0)
polls = append(polls, poll)
}
return

View File

@ -1,8 +1,6 @@
package persistence
import (
"time"
"offline_twitter/scraper"
)
@ -35,14 +33,11 @@ func (p Profile) GetRetweetById(id scraper.TweetID) (scraper.Retweet, error) {
defer stmt.Close()
var r scraper.Retweet
var retweeted_at int
row := stmt.QueryRow(id)
err = row.Scan(&r.RetweetID, &r.TweetID, &r.RetweetedByID, &retweeted_at)
err = row.Scan(&r.RetweetID, &r.TweetID, &r.RetweetedByID, &r.RetweetedAt)
if err != nil {
return scraper.Retweet{}, err
}
r.RetweetedAt = time.Unix(int64(retweeted_at), 0)
return r, nil
}

View File

@ -3,7 +3,6 @@ package persistence
import (
"database/sql"
"strings"
"time"
"offline_twitter/scraper"
)
@ -30,12 +29,12 @@ func (p Profile) SaveTweet(t scraper.Tweet) error {
is_conversation_scraped=(is_conversation_scraped or ?),
last_scraped_at=max(last_scraped_at, ?)
`,
t.ID, t.UserID, t.Text, t.PostedAt.Unix(), t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.InReplyToID,
t.ID, t.UserID, t.Text, t.PostedAt, t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.InReplyToID,
t.QuotedTweetID, scraper.JoinArrayOfHandles(t.Mentions), scraper.JoinArrayOfHandles(t.ReplyMentions),
strings.Join(t.Hashtags, ","), t.TombstoneType, t.IsStub, t.IsContentDownloaded, t.IsConversationScraped, t.LastScrapedAt.Unix(),
strings.Join(t.Hashtags, ","), t.TombstoneType, t.IsStub, t.IsContentDownloaded, t.IsConversationScraped, t.LastScrapedAt,
t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.IsStub, t.IsContentDownloaded, t.IsConversationScraped,
t.LastScrapedAt.Unix(),
t.LastScrapedAt,
)
if err != nil {
@ -111,23 +110,18 @@ func (p Profile) GetTweetById(id scraper.TweetID) (scraper.Tweet, error) {
defer stmt.Close()
var t scraper.Tweet
var postedAt int
var last_scraped_at int
var mentions string
var reply_mentions string
var hashtags string
row := stmt.QueryRow(id)
err = row.Scan(&t.ID, &t.UserID, &t.Text, &postedAt, &t.NumLikes, &t.NumRetweets, &t.NumReplies, &t.NumQuoteTweets, &t.InReplyToID,
err = row.Scan(&t.ID, &t.UserID, &t.Text, &t.PostedAt, &t.NumLikes, &t.NumRetweets, &t.NumReplies, &t.NumQuoteTweets, &t.InReplyToID,
&t.QuotedTweetID, &mentions, &reply_mentions, &hashtags, &t.TombstoneType, &t.IsStub, &t.IsContentDownloaded,
&t.IsConversationScraped, &last_scraped_at)
&t.IsConversationScraped, &t.LastScrapedAt)
if err != nil {
return t, err
}
t.PostedAt = time.Unix(int64(postedAt), 0) // args are `seconds` and `nanoseconds`
t.LastScrapedAt = time.Unix(int64(last_scraped_at), 0)
t.Mentions = []scraper.UserHandle{}
for _, m := range strings.Split(mentions, ",") {
if m != "" {

View File

@ -2,12 +2,13 @@ package persistence_test
import (
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/go-test/deep"
"offline_twitter/scraper"
)
/**
@ -74,7 +75,7 @@ func TestNoWorseningTweet(t *testing.T) {
tweet.IsContentDownloaded = true
tweet.IsStub = false
tweet.IsConversationScraped = true
tweet.LastScrapedAt = time.Unix(1000, 0)
tweet.LastScrapedAt = scraper.TimestampFromUnix(1000)
// Save the tweet
err := profile.SaveTweet(tweet)
@ -84,7 +85,7 @@ func TestNoWorseningTweet(t *testing.T) {
tweet.IsContentDownloaded = false
tweet.IsStub = true
tweet.IsConversationScraped = false
tweet.LastScrapedAt = time.Unix(500, 0)
tweet.LastScrapedAt = scraper.TimestampFromUnix(500)
err = profile.SaveTweet(tweet)
require.NoError(err)
@ -113,7 +114,7 @@ func TestModifyTweet(t *testing.T) {
tweet.IsStub = true
tweet.IsContentDownloaded = false
tweet.IsConversationScraped = false
tweet.LastScrapedAt = time.Unix(1000, 0)
tweet.LastScrapedAt = scraper.TimestampFromUnix(1000)
err := profile.SaveTweet(tweet)
require.NoError(err)
@ -125,7 +126,7 @@ func TestModifyTweet(t *testing.T) {
tweet.IsStub = false
tweet.IsContentDownloaded = true
tweet.IsConversationScraped = true
tweet.LastScrapedAt = time.Unix(2000, 0)
tweet.LastScrapedAt = scraper.TimestampFromUnix(2000)
err = profile.SaveTweet(tweet)
require.NoError(err)

View File

@ -3,7 +3,6 @@ package persistence
import (
"fmt"
"database/sql"
"time"
"offline_twitter/scraper"
)
@ -51,7 +50,7 @@ func (p Profile) SaveUser(u *scraper.User) error {
pinned_tweet_id=?,
is_content_downloaded=(is_content_downloaded or ?)
`,
u.ID, u.DisplayName, u.Handle, u.Bio, u.FollowingCount, u.FollowersCount, u.Location, u.Website, u.JoinDate.Unix(), u.IsPrivate,
u.ID, u.DisplayName, u.Handle, u.Bio, u.FollowingCount, u.FollowersCount, u.Location, u.Website, u.JoinDate, u.IsPrivate,
u.IsVerified, u.IsBanned, u.ProfileImageUrl, u.ProfileImageLocalPath, u.BannerImageUrl, u.BannerImageLocalPath, u.PinnedTweetID,
u.IsContentDownloaded, u.IsIdFake,
@ -65,7 +64,6 @@ func (p Profile) SaveUser(u *scraper.User) error {
return nil
}
/**
* Check if the database has a User with the given user handle.
*
@ -270,8 +268,8 @@ func (p Profile) GetAllFollowedUsers() []scraper.UserHandle {
func (p Profile) IsFollowing(handle scraper.UserHandle) bool {
for _, follow := range p.GetAllFollowedUsers() {
if follow == handle {
return true;
return true
}
}
return false;
return false
}

View File

@ -2,19 +2,17 @@ package persistence_test
import (
"testing"
"time"
"fmt"
"math/rand"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/go-test/deep"
"offline_twitter/scraper"
)
/**
* Create a user, save it, reload it, and make sure it comes back the same
*/
@ -62,7 +60,7 @@ func TestModifyUser(t *testing.T) {
fake_user.IsVerified = false
fake_user.IsBanned = false
fake_user.FollowersCount = 1000
fake_user.JoinDate = time.Unix(1000, 0)
fake_user.JoinDate = scraper.TimestampFromUnix(1000)
fake_user.ProfileImageUrl = "asdf"
fake_user.IsContentDownloaded = true
@ -70,14 +68,13 @@ func TestModifyUser(t *testing.T) {
err := profile.SaveUser(&fake_user)
require.NoError(err)
fake_user.DisplayName = "Display Name 2"
fake_user.Location = "location2"
fake_user.IsPrivate = true
fake_user.IsVerified = true
fake_user.IsBanned = true
fake_user.FollowersCount = 2000
fake_user.JoinDate = time.Unix(2000, 0)
fake_user.JoinDate = scraper.TimestampFromUnix(2000)
fake_user.ProfileImageUrl = "asdf2"
fake_user.IsContentDownloaded = false // test No Worsening
@ -114,7 +111,6 @@ func TestHandleIsCaseInsensitive(t *testing.T) {
}
}
/**
* Should correctly report whether the user exists in the database
*/

View File

@ -12,6 +12,7 @@ type ErrNotInDatabase struct {
Table string
Value interface{}
}
func (err ErrNotInDatabase) Error() string {
return fmt.Sprintf("Not in database: %s %q", err.Table, err.Value)
}

View File

@ -1,12 +1,12 @@
package persistence_test
import (
"time"
"fmt"
"math/rand"
"time"
"offline_twitter/scraper"
"offline_twitter/persistence"
"offline_twitter/scraper"
)
/**
@ -40,7 +40,6 @@ func create_or_load_profile(profile_path string) persistence.Profile {
return profile
}
/**
* Create a stable user with a fixed ID and handle
*/
@ -53,8 +52,8 @@ func create_stable_user() scraper.User {
FollowersCount: 10,
FollowingCount: 2000,
Location: "stable location",
Website:"stable website",
JoinDate: time.Unix(10000000, 0),
Website: "stable website",
JoinDate: scraper.TimestampFromUnix(10000000),
IsVerified: true,
IsPrivate: false,
ProfileImageUrl: "stable profile image url",
@ -107,7 +106,7 @@ func create_video_from_id(id int) scraper.Video {
*/
func create_url_from_id(id int) scraper.Url {
s := fmt.Sprint(id)
return scraper.Url {
return scraper.Url{
TweetID: -1,
Domain: s + "domain",
Text: s + "text",
@ -125,7 +124,6 @@ func create_url_from_id(id int) scraper.Url {
}
}
/**
* Create a semi-stable Poll based on the given ID
*/
@ -140,12 +138,11 @@ func create_poll_from_id(id int) scraper.Poll {
Choice2: "Not " + s,
Choice2_Votes: 1500,
VotingDuration: 10,
VotingEndsAt: time.Unix(10000000, 0),
LastUpdatedAt: time.Unix(10000, 0),
VotingEndsAt: scraper.TimestampFromUnix(10000000),
LastUpdatedAt: scraper.TimestampFromUnix(10000),
}
}
/**
* Create a stable tweet with a fixed ID and content
*/
@ -155,7 +152,7 @@ func create_stable_tweet() scraper.Tweet {
ID: tweet_id,
UserID: -1,
Text: "stable text",
PostedAt: time.Unix(10000000, 0),
PostedAt: scraper.TimestampFromUnix(10000000),
NumLikes: 10,
NumRetweets: 10,
NumReplies: 10,
@ -175,7 +172,7 @@ func create_stable_tweet() scraper.Tweet {
create_poll_from_id(-1),
},
IsConversationScraped: true,
LastScrapedAt: time.Unix(100000000, 0),
LastScrapedAt: scraper.TimestampFromUnix(100000000),
}
}
@ -188,7 +185,7 @@ func create_stable_retweet() scraper.Retweet {
RetweetID: retweet_id,
TweetID: -1,
RetweetedByID: -1,
RetweetedAt: time.Unix(20000000, 0),
RetweetedAt: scraper.TimestampFromUnix(20000000),
}
}
@ -207,8 +204,8 @@ func create_dummy_user() scraper.User {
FollowersCount: 0,
FollowingCount: 1000,
Location: "location",
Website:"website",
JoinDate: time.Now().Truncate(1e9), // Round to nearest second
Website: "website",
JoinDate: scraper.Timestamp{time.Now().Truncate(1e9)}, // Round to nearest second
IsVerified: false,
IsPrivate: true,
ProfileImageUrl: "profile image url",
@ -219,7 +216,6 @@ func create_dummy_user() scraper.User {
}
}
/**
* Create a new tweet with a random ID and content
*/
@ -246,7 +242,7 @@ func create_dummy_tweet() scraper.Tweet {
ID: tweet_id,
UserID: -1,
Text: "text",
PostedAt: time.Now().Truncate(1e9), // Round to nearest second
PostedAt: scraper.Timestamp{time.Now().Truncate(1e9)}, // Round to nearest second
NumLikes: 1,
NumRetweets: 2,
NumReplies: 3,
@ -290,6 +286,6 @@ func create_dummy_retweet(tweet_id scraper.TweetID) scraper.Retweet {
RetweetID: retweet_id,
TweetID: tweet_id,
RetweetedByID: -1,
RetweetedAt: time.Unix(20000000, 0),
RetweetedAt: scraper.TimestampFromUnix(20000000),
}
}

View File

@ -84,21 +84,18 @@ func (card APIV2Card) ParseAsPoll() Poll {
}
id := int_or_panic(card_url.Hostname())
voting_ends_at, err := time.Parse(time.RFC3339, values["end_datetime_utc"].StringValue)
if err != nil {
panic(err)
}
last_updated_at, err := time.Parse(time.RFC3339, values["last_updated_datetime_utc"].StringValue)
if err != nil {
panic(err)
}
ret := Poll{}
ret.ID = PollID(id)
ret.NumChoices = parse_num_choices(card.Legacy.Name)
ret.VotingDuration = int_or_panic(values["duration_minutes"].StringValue) * 60
ret.VotingEndsAt = voting_ends_at
ret.LastUpdatedAt = last_updated_at
ret.VotingEndsAt, err = TimestampFromString(values["end_datetime_utc"].StringValue)
if err != nil {
panic(err)
}
ret.LastUpdatedAt, err = TimestampFromString(values["last_updated_datetime_utc"].StringValue)
if err != nil {
panic(err)
}
ret.Choice1 = values["choice1_label"].StringValue
ret.Choice1_Votes = int_or_panic(values["choice1_count"].StringValue)
@ -262,7 +259,7 @@ func (api_v2_tweet APIV2Tweet) ToTweetTrove() TweetTrove {
retweet.RetweetID = TweetID(api_v2_tweet.ID)
retweet.TweetID = TweetID(api_v2_tweet.RetweetedStatusResult.Result.ID)
retweet.RetweetedByID = UserID(api_v2_tweet.APITweet.UserID)
retweet.RetweetedAt, err = time.Parse(time.RubyDate, api_v2_tweet.APITweet.CreatedAt)
retweet.RetweetedAt, err = TimestampFromString(api_v2_tweet.APITweet.CreatedAt)
if err != nil {
fmt.Printf("%v\n", api_v2_tweet)
panic(err)

View File

@ -1,7 +1,6 @@
package scraper
import (
"time"
"strings"
"strconv"
"net/url"
@ -24,9 +23,9 @@ type Poll struct {
Choice4_Votes int
VotingDuration int // In seconds
VotingEndsAt time.Time
VotingEndsAt Timestamp
LastUpdatedAt time.Time
LastUpdatedAt Timestamp
}
func ParseAPIPoll(apiCard APICard) Poll {
@ -36,21 +35,18 @@ func ParseAPIPoll(apiCard APICard) Poll {
}
id := int_or_panic(card_url.Hostname())
voting_ends_at, err := time.Parse(time.RFC3339, apiCard.BindingValues.EndDatetimeUTC.StringValue)
if err != nil {
panic(err)
}
last_updated_at, err := time.Parse(time.RFC3339, apiCard.BindingValues.LastUpdatedAt.StringValue)
if err != nil {
panic(err)
}
ret := Poll{}
ret.ID = PollID(id)
ret.NumChoices = parse_num_choices(apiCard.Name)
ret.VotingDuration = int_or_panic(apiCard.BindingValues.DurationMinutes.StringValue) * 60
ret.VotingEndsAt = voting_ends_at
ret.LastUpdatedAt = last_updated_at
ret.VotingEndsAt, err = TimestampFromString(apiCard.BindingValues.EndDatetimeUTC.StringValue)
if err != nil {
panic(err)
}
ret.LastUpdatedAt, err = TimestampFromString(apiCard.BindingValues.LastUpdatedAt.StringValue)
if err != nil {
panic(err)
}
ret.Choice1 = apiCard.BindingValues.Choice1.StringValue
ret.Choice1_Votes = int_or_panic(apiCard.BindingValues.Choice1_Count.StringValue)

View File

@ -1,16 +1,12 @@
package scraper
import (
"time"
)
type Retweet struct {
RetweetID TweetID
TweetID TweetID
Tweet *Tweet
RetweetedByID UserID
RetweetedBy *User
RetweetedAt time.Time
RetweetedAt Timestamp
}
func ParseSingleRetweet(apiTweet APITweet) (ret Retweet, err error) {
@ -19,6 +15,9 @@ func ParseSingleRetweet(apiTweet APITweet) (ret Retweet, err error) {
ret.RetweetID = TweetID(apiTweet.ID)
ret.TweetID = TweetID(apiTweet.RetweetedStatusID)
ret.RetweetedByID = UserID(apiTweet.UserID)
ret.RetweetedAt, err = time.Parse(time.RubyDate, apiTweet.CreatedAt)
ret.RetweetedAt, err = TimestampFromString(apiTweet.CreatedAt)
if err != nil {
panic(err)
}
return
}

40
scraper/timestamp_type.go Normal file
View File

@ -0,0 +1,40 @@
package scraper
import (
"time"
"fmt"
"database/sql/driver"
)
type Timestamp struct {
time.Time
}
func (t Timestamp) Value() (driver.Value, error) {
return t.Unix(), nil
}
func (t *Timestamp) Scan(src interface{}) error {
val, is_ok := src.(int64)
if !is_ok {
return fmt.Errorf("Incompatible type for Timestamp: %#v", src)
}
*t = Timestamp{time.Unix(val, 0)}
return nil
}
func TimestampFromString(s string) (Timestamp, error) {
tmp, err := time.Parse(time.RubyDate, s)
if err == nil {
return Timestamp{tmp}, nil
}
tmp, err = time.Parse(time.RFC3339, s)
if err == nil {
return Timestamp{tmp}, nil
}
return Timestamp{}, err
}
func TimestampFromUnix(num int64) Timestamp {
return Timestamp{time.Unix(10000000, 0)}
}

View File

@ -18,7 +18,7 @@ type Tweet struct {
UserHandle UserHandle // For processing tombstones
User *User
Text string
PostedAt time.Time
PostedAt Timestamp
NumLikes int
NumRetweets int
NumReplies int
@ -39,7 +39,7 @@ type Tweet struct {
IsContentDownloaded bool
IsConversationScraped bool
LastScrapedAt time.Time
LastScrapedAt Timestamp
}
@ -58,7 +58,7 @@ func (t Tweet) String() string {
Replies: %d RT: %d QT: %d Likes: %d
`,
author,
terminal_utils.FormatDate(t.PostedAt),
terminal_utils.FormatDate(t.PostedAt.Time),
terminal_utils.WrapText(t.Text, 60),
t.NumReplies,
t.NumRetweets,
@ -91,7 +91,7 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
// Process "posted-at" date and time
if apiTweet.TombstoneText == "" { // Skip time parsing for tombstones
ret.PostedAt, err = time.Parse(time.RubyDate, apiTweet.CreatedAt)
ret.PostedAt, err = TimestampFromString(apiTweet.CreatedAt)
if err != nil {
return
}
@ -175,7 +175,7 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
// Process tombstones and other metadata
ret.TombstoneType = apiTweet.TombstoneText
ret.IsStub = !(ret.TombstoneType == "")
ret.LastScrapedAt = time.Unix(0, 0) // Caller will change this for the tweet that was actually scraped
ret.LastScrapedAt = TimestampFromUnix(0) // Caller will change this for the tweet that was actually scraped
ret.IsConversationScraped = false // Safe due to the "No Worsening" principle
return
@ -251,7 +251,7 @@ func GetTweetFull(id TweetID) (trove TweetTrove, err error) {
if !ok {
panic("Trove didn't contain its own tweet!")
}
tweet.LastScrapedAt = time.Now()
tweet.LastScrapedAt = Timestamp{time.Now()}
tweet.IsConversationScraped = true
trove.Tweets[id] = tweet

View File

@ -1,7 +1,6 @@
package scraper
import (
"time"
"fmt"
"strings"
"regexp"
@ -32,7 +31,7 @@ type User struct {
FollowersCount int
Location string
Website string
JoinDate time.Time
JoinDate Timestamp
IsPrivate bool
IsVerified bool
IsBanned bool
@ -72,7 +71,7 @@ Joined %s
terminal_utils.WrapText(u.Bio, 60),
u.FollowingCount,
u.FollowersCount,
terminal_utils.FormatDate(u.JoinDate),
terminal_utils.FormatDate(u.JoinDate.Time),
u.Location,
u.Website,
)
@ -100,7 +99,7 @@ func GetUnknownUserWithHandle(handle UserHandle) User {
FollowingCount: 0,
Location: "<blank>",
Website:"<blank>",
JoinDate: time.Unix(0, 0),
JoinDate: TimestampFromUnix(0),
IsVerified: false,
IsPrivate: false,
IsNeedingFakeID: true,
@ -133,7 +132,7 @@ func ParseSingleUser(apiUser APIUser) (ret User, err error) {
if len(apiUser.Entities.URL.Urls) > 0 {
ret.Website = apiUser.Entities.URL.Urls[0].ExpandedURL
}
ret.JoinDate, err = time.Parse(time.RubyDate, apiUser.CreatedAt)
ret.JoinDate, err = TimestampFromString(apiUser.CreatedAt)
if err != nil {
return
}