From 26ab9ec937b6ecd43145abad882975f2bec5b8d0 Mon Sep 17 00:00:00 2001 From: Alessio Date: Sun, 6 Mar 2022 18:09:43 -0800 Subject: [PATCH] Create new Timestamp helper type --- persistence/media_queries.go | 12 +-- persistence/retweet_queries.go | 7 +- persistence/tweet_queries.go | 16 +-- persistence/tweet_queries_test.go | 11 +- persistence/user_queries.go | 156 ++++++++++++++------------- persistence/user_queries_test.go | 10 +- persistence/utils.go | 1 + persistence/utils_test.go | 170 +++++++++++++++--------------- scraper/api_types_v2.go | 21 ++-- scraper/poll.go | 24 ++--- scraper/retweet.go | 11 +- scraper/timestamp_type.go | 40 +++++++ scraper/tweet.go | 12 +-- scraper/user.go | 9 +- 14 files changed, 253 insertions(+), 247 deletions(-) create mode 100644 scraper/timestamp_type.go diff --git a/persistence/media_queries.go b/persistence/media_queries.go index 2ef0fb1..5f7a965 100644 --- a/persistence/media_queries.go +++ b/persistence/media_queries.go @@ -1,8 +1,6 @@ package persistence import ( - "time" - "offline_twitter/scraper" ) @@ -83,9 +81,9 @@ func (p Profile) SavePoll(poll scraper.Poll) error { last_scraped_at=? `, poll.ID, poll.TweetID, poll.NumChoices, poll.Choice1, poll.Choice1_Votes, poll.Choice2, poll.Choice2_Votes, poll.Choice3, - poll.Choice3_Votes, poll.Choice4, poll.Choice4_Votes, poll.VotingDuration, poll.VotingEndsAt.Unix(), poll.LastUpdatedAt.Unix(), + poll.Choice3_Votes, poll.Choice4, poll.Choice4_Votes, poll.VotingDuration, poll.VotingEndsAt, poll.LastUpdatedAt, - poll.Choice1_Votes, poll.Choice2_Votes, poll.Choice3_Votes, poll.Choice4_Votes, poll.LastUpdatedAt.Unix(), + poll.Choice1_Votes, poll.Choice2_Votes, poll.Choice3_Votes, poll.Choice4_Votes, poll.LastUpdatedAt, ) return err } @@ -199,17 +197,13 @@ func (p Profile) GetPollsForTweet(t scraper.Tweet) (polls []scraper.Poll, err er return } var poll scraper.Poll - var voting_ends_at int - var last_scraped_at int for rows.Next() { err = rows.Scan(&poll.ID, &poll.NumChoices, &poll.Choice1, &poll.Choice1_Votes, &poll.Choice2, &poll.Choice2_Votes, &poll.Choice3, - &poll.Choice3_Votes, &poll.Choice4, &poll.Choice4_Votes, &poll.VotingDuration, &voting_ends_at, &last_scraped_at) + &poll.Choice3_Votes, &poll.Choice4, &poll.Choice4_Votes, &poll.VotingDuration, &poll.VotingEndsAt, &poll.LastUpdatedAt) if err != nil { return } poll.TweetID = t.ID - poll.VotingEndsAt = time.Unix(int64(voting_ends_at), 0) - poll.LastUpdatedAt = time.Unix(int64(last_scraped_at), 0) polls = append(polls, poll) } return diff --git a/persistence/retweet_queries.go b/persistence/retweet_queries.go index b849631..0877d3a 100644 --- a/persistence/retweet_queries.go +++ b/persistence/retweet_queries.go @@ -1,8 +1,6 @@ package persistence import ( - "time" - "offline_twitter/scraper" ) @@ -35,14 +33,11 @@ func (p Profile) GetRetweetById(id scraper.TweetID) (scraper.Retweet, error) { defer stmt.Close() var r scraper.Retweet - var retweeted_at int row := stmt.QueryRow(id) - err = row.Scan(&r.RetweetID, &r.TweetID, &r.RetweetedByID, &retweeted_at) + err = row.Scan(&r.RetweetID, &r.TweetID, &r.RetweetedByID, &r.RetweetedAt) if err != nil { return scraper.Retweet{}, err } - - r.RetweetedAt = time.Unix(int64(retweeted_at), 0) return r, nil } diff --git a/persistence/tweet_queries.go b/persistence/tweet_queries.go index afdd721..c9b903a 100644 --- a/persistence/tweet_queries.go +++ b/persistence/tweet_queries.go @@ -3,7 +3,6 @@ package persistence import ( "database/sql" "strings" - "time" "offline_twitter/scraper" ) @@ -30,12 +29,12 @@ func (p Profile) SaveTweet(t scraper.Tweet) error { is_conversation_scraped=(is_conversation_scraped or ?), last_scraped_at=max(last_scraped_at, ?) `, - t.ID, t.UserID, t.Text, t.PostedAt.Unix(), t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.InReplyToID, + t.ID, t.UserID, t.Text, t.PostedAt, t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.InReplyToID, t.QuotedTweetID, scraper.JoinArrayOfHandles(t.Mentions), scraper.JoinArrayOfHandles(t.ReplyMentions), - strings.Join(t.Hashtags, ","), t.TombstoneType, t.IsStub, t.IsContentDownloaded, t.IsConversationScraped, t.LastScrapedAt.Unix(), + strings.Join(t.Hashtags, ","), t.TombstoneType, t.IsStub, t.IsContentDownloaded, t.IsConversationScraped, t.LastScrapedAt, t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.IsStub, t.IsContentDownloaded, t.IsConversationScraped, - t.LastScrapedAt.Unix(), + t.LastScrapedAt, ) if err != nil { @@ -111,23 +110,18 @@ func (p Profile) GetTweetById(id scraper.TweetID) (scraper.Tweet, error) { defer stmt.Close() var t scraper.Tweet - var postedAt int - var last_scraped_at int var mentions string var reply_mentions string var hashtags string row := stmt.QueryRow(id) - err = row.Scan(&t.ID, &t.UserID, &t.Text, &postedAt, &t.NumLikes, &t.NumRetweets, &t.NumReplies, &t.NumQuoteTweets, &t.InReplyToID, + err = row.Scan(&t.ID, &t.UserID, &t.Text, &t.PostedAt, &t.NumLikes, &t.NumRetweets, &t.NumReplies, &t.NumQuoteTweets, &t.InReplyToID, &t.QuotedTweetID, &mentions, &reply_mentions, &hashtags, &t.TombstoneType, &t.IsStub, &t.IsContentDownloaded, - &t.IsConversationScraped, &last_scraped_at) + &t.IsConversationScraped, &t.LastScrapedAt) if err != nil { return t, err } - t.PostedAt = time.Unix(int64(postedAt), 0) // args are `seconds` and `nanoseconds` - t.LastScrapedAt = time.Unix(int64(last_scraped_at), 0) - t.Mentions = []scraper.UserHandle{} for _, m := range strings.Split(mentions, ",") { if m != "" { diff --git a/persistence/tweet_queries_test.go b/persistence/tweet_queries_test.go index 82e1873..114af09 100644 --- a/persistence/tweet_queries_test.go +++ b/persistence/tweet_queries_test.go @@ -2,12 +2,13 @@ package persistence_test import ( "testing" - "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/go-test/deep" + + "offline_twitter/scraper" ) /** @@ -74,7 +75,7 @@ func TestNoWorseningTweet(t *testing.T) { tweet.IsContentDownloaded = true tweet.IsStub = false tweet.IsConversationScraped = true - tweet.LastScrapedAt = time.Unix(1000, 0) + tweet.LastScrapedAt = scraper.TimestampFromUnix(1000) // Save the tweet err := profile.SaveTweet(tweet) @@ -84,7 +85,7 @@ func TestNoWorseningTweet(t *testing.T) { tweet.IsContentDownloaded = false tweet.IsStub = true tweet.IsConversationScraped = false - tweet.LastScrapedAt = time.Unix(500, 0) + tweet.LastScrapedAt = scraper.TimestampFromUnix(500) err = profile.SaveTweet(tweet) require.NoError(err) @@ -113,7 +114,7 @@ func TestModifyTweet(t *testing.T) { tweet.IsStub = true tweet.IsContentDownloaded = false tweet.IsConversationScraped = false - tweet.LastScrapedAt = time.Unix(1000, 0) + tweet.LastScrapedAt = scraper.TimestampFromUnix(1000) err := profile.SaveTweet(tweet) require.NoError(err) @@ -125,7 +126,7 @@ func TestModifyTweet(t *testing.T) { tweet.IsStub = false tweet.IsContentDownloaded = true tweet.IsConversationScraped = true - tweet.LastScrapedAt = time.Unix(2000, 0) + tweet.LastScrapedAt = scraper.TimestampFromUnix(2000) err = profile.SaveTweet(tweet) require.NoError(err) diff --git a/persistence/user_queries.go b/persistence/user_queries.go index 0fa0d61..0c07523 100644 --- a/persistence/user_queries.go +++ b/persistence/user_queries.go @@ -1,10 +1,9 @@ package persistence import ( - "fmt" - "database/sql" - "time" - "offline_twitter/scraper" + "fmt" + "database/sql" + "offline_twitter/scraper" ) /** @@ -51,21 +50,20 @@ func (p Profile) SaveUser(u *scraper.User) error { pinned_tweet_id=?, is_content_downloaded=(is_content_downloaded or ?) `, - u.ID, u.DisplayName, u.Handle, u.Bio, u.FollowingCount, u.FollowersCount, u.Location, u.Website, u.JoinDate.Unix(), u.IsPrivate, - u.IsVerified, u.IsBanned, u.ProfileImageUrl, u.ProfileImageLocalPath, u.BannerImageUrl, u.BannerImageLocalPath, u.PinnedTweetID, - u.IsContentDownloaded, u.IsIdFake, + u.ID, u.DisplayName, u.Handle, u.Bio, u.FollowingCount, u.FollowersCount, u.Location, u.Website, u.JoinDate, u.IsPrivate, + u.IsVerified, u.IsBanned, u.ProfileImageUrl, u.ProfileImageLocalPath, u.BannerImageUrl, u.BannerImageLocalPath, u.PinnedTweetID, + u.IsContentDownloaded, u.IsIdFake, - u.Bio, u.DisplayName, u.FollowingCount, u.FollowersCount, u.Location, u.Website, u.IsPrivate, u.IsVerified, u.IsBanned, - u.ProfileImageUrl, u.ProfileImageLocalPath, u.BannerImageUrl, u.BannerImageLocalPath, u.PinnedTweetID, u.IsContentDownloaded, - ) - if err != nil { - return err - } + u.Bio, u.DisplayName, u.FollowingCount, u.FollowersCount, u.Location, u.Website, u.IsPrivate, u.IsVerified, u.IsBanned, + u.ProfileImageUrl, u.ProfileImageLocalPath, u.BannerImageUrl, u.BannerImageLocalPath, u.PinnedTweetID, u.IsContentDownloaded, + ) + if err != nil { + return err + } - return nil + return nil } - /** * Check if the database has a User with the given user handle. * @@ -189,89 +187,89 @@ func (p Profile) GetUserByID(id scraper.UserID) (scraper.User, error) { * why the No Worsening Principle is needed. */ func (p Profile) CheckUserContentDownloadNeeded(user scraper.User) bool { - row := p.DB.QueryRow(`select is_content_downloaded, profile_image_url, banner_image_url from users where id = ?`, user.ID) + row := p.DB.QueryRow(`select is_content_downloaded, profile_image_url, banner_image_url from users where id = ?`, user.ID) - var is_content_downloaded bool - var profile_image_url string - var banner_image_url string - err := row.Scan(&is_content_downloaded, &profile_image_url, &banner_image_url) - if err != nil { - if err == sql.ErrNoRows { - return true - } else { - panic(err) - } - } + var is_content_downloaded bool + var profile_image_url string + var banner_image_url string + err := row.Scan(&is_content_downloaded, &profile_image_url, &banner_image_url) + if err != nil { + if err == sql.ErrNoRows { + return true + } else { + panic(err) + } + } - if !is_content_downloaded { - return true - } - if banner_image_url != user.BannerImageUrl { - return true - } - if profile_image_url != user.ProfileImageUrl { - return true - } - return false + if !is_content_downloaded { + return true + } + if banner_image_url != user.BannerImageUrl { + return true + } + if profile_image_url != user.ProfileImageUrl { + return true + } + return false } /** * Follow / unfollow a user. Update the given User object's IsFollowed field. */ func (p Profile) SetUserFollowed(user *scraper.User, is_followed bool) { - result, err := p.DB.Exec("update users set is_followed = ? where id = ?", is_followed, user.ID) - if err != nil { - panic(fmt.Sprintf("Error inserting user with handle %q: %s", user.Handle, err.Error())) - } - count, err := result.RowsAffected() - if err != nil { - panic("Unknown error: " + err.Error()) - } - if count != 1 { - panic(fmt.Sprintf("User with handle %q not found", user.Handle)) - } - user.IsFollowed = is_followed + result, err := p.DB.Exec("update users set is_followed = ? where id = ?", is_followed, user.ID) + if err != nil { + panic(fmt.Sprintf("Error inserting user with handle %q: %s", user.Handle, err.Error())) + } + count, err := result.RowsAffected() + if err != nil { + panic("Unknown error: " + err.Error()) + } + if count != 1 { + panic(fmt.Sprintf("User with handle %q not found", user.Handle)) + } + user.IsFollowed = is_followed } func (p Profile) NextFakeUserID() scraper.UserID { - _, err := p.DB.Exec("update fake_user_sequence set latest_fake_id = latest_fake_id + 1") - if err != nil { - panic(err) - } - var ret scraper.UserID - err = p.DB.QueryRow("select latest_fake_id from fake_user_sequence").Scan(&ret) - if err != nil { - panic(err) - } - return ret + _, err := p.DB.Exec("update fake_user_sequence set latest_fake_id = latest_fake_id + 1") + if err != nil { + panic(err) + } + var ret scraper.UserID + err = p.DB.QueryRow("select latest_fake_id from fake_user_sequence").Scan(&ret) + if err != nil { + panic(err) + } + return ret } func (p Profile) GetAllFollowedUsers() []scraper.UserHandle { - rows, err := p.DB.Query("select handle from users where is_followed = 1") - if err != nil { - panic(err) - } + rows, err := p.DB.Query("select handle from users where is_followed = 1") + if err != nil { + panic(err) + } - ret := []scraper.UserHandle{} + ret := []scraper.UserHandle{} - var tmp scraper.UserHandle + var tmp scraper.UserHandle - for rows.Next() { - err = rows.Scan(&tmp) - if err != nil { - panic(err) - } - ret = append(ret, tmp) - } + for rows.Next() { + err = rows.Scan(&tmp) + if err != nil { + panic(err) + } + ret = append(ret, tmp) + } - return ret + return ret } func (p Profile) IsFollowing(handle scraper.UserHandle) bool { - for _, follow := range p.GetAllFollowedUsers() { - if follow == handle { - return true; - } - } - return false; + for _, follow := range p.GetAllFollowedUsers() { + if follow == handle { + return true + } + } + return false } diff --git a/persistence/user_queries_test.go b/persistence/user_queries_test.go index 891b6a4..ad5903a 100644 --- a/persistence/user_queries_test.go +++ b/persistence/user_queries_test.go @@ -2,19 +2,17 @@ package persistence_test import ( "testing" - "time" + "fmt" "math/rand" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "github.com/go-test/deep" "offline_twitter/scraper" ) - /** * Create a user, save it, reload it, and make sure it comes back the same */ @@ -62,7 +60,7 @@ func TestModifyUser(t *testing.T) { fake_user.IsVerified = false fake_user.IsBanned = false fake_user.FollowersCount = 1000 - fake_user.JoinDate = time.Unix(1000, 0) + fake_user.JoinDate = scraper.TimestampFromUnix(1000) fake_user.ProfileImageUrl = "asdf" fake_user.IsContentDownloaded = true @@ -70,14 +68,13 @@ func TestModifyUser(t *testing.T) { err := profile.SaveUser(&fake_user) require.NoError(err) - fake_user.DisplayName = "Display Name 2" fake_user.Location = "location2" fake_user.IsPrivate = true fake_user.IsVerified = true fake_user.IsBanned = true fake_user.FollowersCount = 2000 - fake_user.JoinDate = time.Unix(2000, 0) + fake_user.JoinDate = scraper.TimestampFromUnix(2000) fake_user.ProfileImageUrl = "asdf2" fake_user.IsContentDownloaded = false // test No Worsening @@ -114,7 +111,6 @@ func TestHandleIsCaseInsensitive(t *testing.T) { } } - /** * Should correctly report whether the user exists in the database */ diff --git a/persistence/utils.go b/persistence/utils.go index 44c8c49..f164a56 100644 --- a/persistence/utils.go +++ b/persistence/utils.go @@ -12,6 +12,7 @@ type ErrNotInDatabase struct { Table string Value interface{} } + func (err ErrNotInDatabase) Error() string { return fmt.Sprintf("Not in database: %s %q", err.Table, err.Value) } diff --git a/persistence/utils_test.go b/persistence/utils_test.go index 8229ed4..42e9728 100644 --- a/persistence/utils_test.go +++ b/persistence/utils_test.go @@ -1,12 +1,12 @@ package persistence_test import ( - "time" "fmt" "math/rand" + "time" - "offline_twitter/scraper" "offline_twitter/persistence" + "offline_twitter/scraper" ) /** @@ -40,28 +40,27 @@ func create_or_load_profile(profile_path string) persistence.Profile { return profile } - /** * Create a stable user with a fixed ID and handle */ func create_stable_user() scraper.User { return scraper.User{ - ID: scraper.UserID(-1), - DisplayName: "stable display name", - Handle: scraper.UserHandle("handle stable"), - Bio: "stable bio", - FollowersCount: 10, - FollowingCount: 2000, - Location: "stable location", - Website:"stable website", - JoinDate: time.Unix(10000000, 0), - IsVerified: true, - IsPrivate: false, - ProfileImageUrl: "stable profile image url", + ID: scraper.UserID(-1), + DisplayName: "stable display name", + Handle: scraper.UserHandle("handle stable"), + Bio: "stable bio", + FollowersCount: 10, + FollowingCount: 2000, + Location: "stable location", + Website: "stable website", + JoinDate: scraper.TimestampFromUnix(10000000), + IsVerified: true, + IsPrivate: false, + ProfileImageUrl: "stable profile image url", ProfileImageLocalPath: "stable profile image local path", - BannerImageUrl: "stable banner image url", - BannerImageLocalPath: "stable image local path", - PinnedTweetID: scraper.TweetID(345), + BannerImageUrl: "stable banner image url", + BannerImageLocalPath: "stable image local path", + PinnedTweetID: scraper.TweetID(345), } } @@ -107,58 +106,56 @@ func create_video_from_id(id int) scraper.Video { */ func create_url_from_id(id int) scraper.Url { s := fmt.Sprint(id) - return scraper.Url { - TweetID: -1, - Domain: s + "domain", - Text: s + "text", - ShortText: s + "shorttext", - Title: s + "title", - Description: s + "description", - ThumbnailWidth: id * 23, - ThumbnailHeight: id * 7, - ThumbnailRemoteUrl: s + "remote url", - ThumbnailLocalPath: s + "local path", - CreatorID: scraper.UserID(id), - SiteID: scraper.UserID(id), - HasCard: true, + return scraper.Url{ + TweetID: -1, + Domain: s + "domain", + Text: s + "text", + ShortText: s + "shorttext", + Title: s + "title", + Description: s + "description", + ThumbnailWidth: id * 23, + ThumbnailHeight: id * 7, + ThumbnailRemoteUrl: s + "remote url", + ThumbnailLocalPath: s + "local path", + CreatorID: scraper.UserID(id), + SiteID: scraper.UserID(id), + HasCard: true, IsContentDownloaded: false, } } - /** * Create a semi-stable Poll based on the given ID */ func create_poll_from_id(id int) scraper.Poll { s := fmt.Sprint(id) return scraper.Poll{ - ID: scraper.PollID(id), - TweetID: -1, - NumChoices: 2, - Choice1: s, - Choice1_Votes: 1000, - Choice2: "Not " + s, - Choice2_Votes: 1500, + ID: scraper.PollID(id), + TweetID: -1, + NumChoices: 2, + Choice1: s, + Choice1_Votes: 1000, + Choice2: "Not " + s, + Choice2_Votes: 1500, VotingDuration: 10, - VotingEndsAt: time.Unix(10000000, 0), - LastUpdatedAt: time.Unix(10000, 0), + VotingEndsAt: scraper.TimestampFromUnix(10000000), + LastUpdatedAt: scraper.TimestampFromUnix(10000), } } - /** * Create a stable tweet with a fixed ID and content */ func create_stable_tweet() scraper.Tweet { tweet_id := scraper.TweetID(-1) return scraper.Tweet{ - ID: tweet_id, - UserID: -1, - Text: "stable text", - PostedAt: time.Unix(10000000, 0), - NumLikes: 10, - NumRetweets: 10, - NumReplies: 10, + ID: tweet_id, + UserID: -1, + Text: "stable text", + PostedAt: scraper.TimestampFromUnix(10000000), + NumLikes: 10, + NumRetweets: 10, + NumReplies: 10, NumQuoteTweets: 10, Videos: []scraper.Video{ create_video_from_id(-1), @@ -175,7 +172,7 @@ func create_stable_tweet() scraper.Tweet { create_poll_from_id(-1), }, IsConversationScraped: true, - LastScrapedAt: time.Unix(100000000, 0), + LastScrapedAt: scraper.TimestampFromUnix(100000000), } } @@ -185,10 +182,10 @@ func create_stable_tweet() scraper.Tweet { func create_stable_retweet() scraper.Retweet { retweet_id := scraper.TweetID(-1) return scraper.Retweet{ - RetweetID: retweet_id, - TweetID: -1, + RetweetID: retweet_id, + TweetID: -1, RetweetedByID: -1, - RetweetedAt: time.Unix(20000000, 0), + RetweetedAt: scraper.TimestampFromUnix(20000000), } } @@ -200,26 +197,25 @@ func create_dummy_user() scraper.User { userID := rand.Int() return scraper.User{ - ID: scraper.UserID(userID), - DisplayName: "display name", - Handle: scraper.UserHandle(fmt.Sprintf("handle%d", userID)), - Bio: "bio", - FollowersCount: 0, - FollowingCount: 1000, - Location: "location", - Website:"website", - JoinDate: time.Now().Truncate(1e9), // Round to nearest second - IsVerified: false, - IsPrivate: true, - ProfileImageUrl: "profile image url", + ID: scraper.UserID(userID), + DisplayName: "display name", + Handle: scraper.UserHandle(fmt.Sprintf("handle%d", userID)), + Bio: "bio", + FollowersCount: 0, + FollowingCount: 1000, + Location: "location", + Website: "website", + JoinDate: scraper.Timestamp{time.Now().Truncate(1e9)}, // Round to nearest second + IsVerified: false, + IsPrivate: true, + ProfileImageUrl: "profile image url", ProfileImageLocalPath: "profile image local path", - BannerImageUrl: "banner image url", - BannerImageLocalPath: "banner image local path", - PinnedTweetID: scraper.TweetID(234), + BannerImageUrl: "banner image url", + BannerImageLocalPath: "banner image local path", + PinnedTweetID: scraper.TweetID(234), } } - /** * Create a new tweet with a random ID and content */ @@ -243,21 +239,21 @@ func create_dummy_tweet() scraper.Tweet { poll.TweetID = tweet_id return scraper.Tweet{ - ID: tweet_id, - UserID: -1, - Text: "text", - PostedAt: time.Now().Truncate(1e9), // Round to nearest second - NumLikes: 1, - NumRetweets: 2, - NumReplies: 3, + ID: tweet_id, + UserID: -1, + Text: "text", + PostedAt: scraper.Timestamp{time.Now().Truncate(1e9)}, // Round to nearest second + NumLikes: 1, + NumRetweets: 2, + NumReplies: 3, NumQuoteTweets: 4, - Videos: []scraper.Video{vid}, - Urls: []scraper.Url{url1, url2}, - Images: []scraper.Image{img1, img2}, - Mentions: []scraper.UserHandle{"mention1", "mention2"}, - ReplyMentions: []scraper.UserHandle{"replymention1", "replymention2"}, - Hashtags: []string{"hash1", "hash2"}, - Polls: []scraper.Poll{poll}, + Videos: []scraper.Video{vid}, + Urls: []scraper.Url{url1, url2}, + Images: []scraper.Image{img1, img2}, + Mentions: []scraper.UserHandle{"mention1", "mention2"}, + ReplyMentions: []scraper.UserHandle{"replymention1", "replymention2"}, + Hashtags: []string{"hash1", "hash2"}, + Polls: []scraper.Poll{poll}, } } @@ -287,9 +283,9 @@ func create_dummy_retweet(tweet_id scraper.TweetID) scraper.Retweet { retweet_id := scraper.TweetID(rand.Int()) return scraper.Retweet{ - RetweetID: retweet_id, - TweetID: tweet_id, + RetweetID: retweet_id, + TweetID: tweet_id, RetweetedByID: -1, - RetweetedAt: time.Unix(20000000, 0), + RetweetedAt: scraper.TimestampFromUnix(20000000), } } diff --git a/scraper/api_types_v2.go b/scraper/api_types_v2.go index 30c8f5e..873b543 100644 --- a/scraper/api_types_v2.go +++ b/scraper/api_types_v2.go @@ -84,21 +84,18 @@ func (card APIV2Card) ParseAsPoll() Poll { } id := int_or_panic(card_url.Hostname()) - voting_ends_at, err := time.Parse(time.RFC3339, values["end_datetime_utc"].StringValue) - if err != nil { - panic(err) - } - last_updated_at, err := time.Parse(time.RFC3339, values["last_updated_datetime_utc"].StringValue) - if err != nil { - panic(err) - } - ret := Poll{} ret.ID = PollID(id) ret.NumChoices = parse_num_choices(card.Legacy.Name) ret.VotingDuration = int_or_panic(values["duration_minutes"].StringValue) * 60 - ret.VotingEndsAt = voting_ends_at - ret.LastUpdatedAt = last_updated_at + ret.VotingEndsAt, err = TimestampFromString(values["end_datetime_utc"].StringValue) + if err != nil { + panic(err) + } + ret.LastUpdatedAt, err = TimestampFromString(values["last_updated_datetime_utc"].StringValue) + if err != nil { + panic(err) + } ret.Choice1 = values["choice1_label"].StringValue ret.Choice1_Votes = int_or_panic(values["choice1_count"].StringValue) @@ -262,7 +259,7 @@ func (api_v2_tweet APIV2Tweet) ToTweetTrove() TweetTrove { retweet.RetweetID = TweetID(api_v2_tweet.ID) retweet.TweetID = TweetID(api_v2_tweet.RetweetedStatusResult.Result.ID) retweet.RetweetedByID = UserID(api_v2_tweet.APITweet.UserID) - retweet.RetweetedAt, err = time.Parse(time.RubyDate, api_v2_tweet.APITweet.CreatedAt) + retweet.RetweetedAt, err = TimestampFromString(api_v2_tweet.APITweet.CreatedAt) if err != nil { fmt.Printf("%v\n", api_v2_tweet) panic(err) diff --git a/scraper/poll.go b/scraper/poll.go index dbc8b6a..0edae23 100644 --- a/scraper/poll.go +++ b/scraper/poll.go @@ -1,7 +1,6 @@ package scraper import ( - "time" "strings" "strconv" "net/url" @@ -24,9 +23,9 @@ type Poll struct { Choice4_Votes int VotingDuration int // In seconds - VotingEndsAt time.Time + VotingEndsAt Timestamp - LastUpdatedAt time.Time + LastUpdatedAt Timestamp } func ParseAPIPoll(apiCard APICard) Poll { @@ -36,21 +35,18 @@ func ParseAPIPoll(apiCard APICard) Poll { } id := int_or_panic(card_url.Hostname()) - voting_ends_at, err := time.Parse(time.RFC3339, apiCard.BindingValues.EndDatetimeUTC.StringValue) - if err != nil { - panic(err) - } - last_updated_at, err := time.Parse(time.RFC3339, apiCard.BindingValues.LastUpdatedAt.StringValue) - if err != nil { - panic(err) - } - ret := Poll{} ret.ID = PollID(id) ret.NumChoices = parse_num_choices(apiCard.Name) ret.VotingDuration = int_or_panic(apiCard.BindingValues.DurationMinutes.StringValue) * 60 - ret.VotingEndsAt = voting_ends_at - ret.LastUpdatedAt = last_updated_at + ret.VotingEndsAt, err = TimestampFromString(apiCard.BindingValues.EndDatetimeUTC.StringValue) + if err != nil { + panic(err) + } + ret.LastUpdatedAt, err = TimestampFromString(apiCard.BindingValues.LastUpdatedAt.StringValue) + if err != nil { + panic(err) + } ret.Choice1 = apiCard.BindingValues.Choice1.StringValue ret.Choice1_Votes = int_or_panic(apiCard.BindingValues.Choice1_Count.StringValue) diff --git a/scraper/retweet.go b/scraper/retweet.go index 91e0a4d..77be9ae 100644 --- a/scraper/retweet.go +++ b/scraper/retweet.go @@ -1,16 +1,12 @@ package scraper -import ( - "time" -) - type Retweet struct { RetweetID TweetID TweetID TweetID Tweet *Tweet RetweetedByID UserID RetweetedBy *User - RetweetedAt time.Time + RetweetedAt Timestamp } func ParseSingleRetweet(apiTweet APITweet) (ret Retweet, err error) { @@ -19,6 +15,9 @@ func ParseSingleRetweet(apiTweet APITweet) (ret Retweet, err error) { ret.RetweetID = TweetID(apiTweet.ID) ret.TweetID = TweetID(apiTweet.RetweetedStatusID) ret.RetweetedByID = UserID(apiTweet.UserID) - ret.RetweetedAt, err = time.Parse(time.RubyDate, apiTweet.CreatedAt) + ret.RetweetedAt, err = TimestampFromString(apiTweet.CreatedAt) + if err != nil { + panic(err) + } return } diff --git a/scraper/timestamp_type.go b/scraper/timestamp_type.go new file mode 100644 index 0000000..cdca2ff --- /dev/null +++ b/scraper/timestamp_type.go @@ -0,0 +1,40 @@ +package scraper + +import ( + "time" + "fmt" + "database/sql/driver" +) + +type Timestamp struct { + time.Time +} + +func (t Timestamp) Value() (driver.Value, error) { + return t.Unix(), nil +} + +func (t *Timestamp) Scan(src interface{}) error { + val, is_ok := src.(int64) + if !is_ok { + return fmt.Errorf("Incompatible type for Timestamp: %#v", src) + } + *t = Timestamp{time.Unix(val, 0)} + return nil +} + +func TimestampFromString(s string) (Timestamp, error) { + tmp, err := time.Parse(time.RubyDate, s) + if err == nil { + return Timestamp{tmp}, nil + } + tmp, err = time.Parse(time.RFC3339, s) + if err == nil { + return Timestamp{tmp}, nil + } + return Timestamp{}, err +} + +func TimestampFromUnix(num int64) Timestamp { + return Timestamp{time.Unix(10000000, 0)} +} diff --git a/scraper/tweet.go b/scraper/tweet.go index 9b594c8..9c210ec 100644 --- a/scraper/tweet.go +++ b/scraper/tweet.go @@ -18,7 +18,7 @@ type Tweet struct { UserHandle UserHandle // For processing tombstones User *User Text string - PostedAt time.Time + PostedAt Timestamp NumLikes int NumRetweets int NumReplies int @@ -39,7 +39,7 @@ type Tweet struct { IsContentDownloaded bool IsConversationScraped bool - LastScrapedAt time.Time + LastScrapedAt Timestamp } @@ -58,7 +58,7 @@ func (t Tweet) String() string { Replies: %d RT: %d QT: %d Likes: %d `, author, - terminal_utils.FormatDate(t.PostedAt), + terminal_utils.FormatDate(t.PostedAt.Time), terminal_utils.WrapText(t.Text, 60), t.NumReplies, t.NumRetweets, @@ -91,7 +91,7 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) { // Process "posted-at" date and time if apiTweet.TombstoneText == "" { // Skip time parsing for tombstones - ret.PostedAt, err = time.Parse(time.RubyDate, apiTweet.CreatedAt) + ret.PostedAt, err = TimestampFromString(apiTweet.CreatedAt) if err != nil { return } @@ -175,7 +175,7 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) { // Process tombstones and other metadata ret.TombstoneType = apiTweet.TombstoneText ret.IsStub = !(ret.TombstoneType == "") - ret.LastScrapedAt = time.Unix(0, 0) // Caller will change this for the tweet that was actually scraped + ret.LastScrapedAt = TimestampFromUnix(0) // Caller will change this for the tweet that was actually scraped ret.IsConversationScraped = false // Safe due to the "No Worsening" principle return @@ -251,7 +251,7 @@ func GetTweetFull(id TweetID) (trove TweetTrove, err error) { if !ok { panic("Trove didn't contain its own tweet!") } - tweet.LastScrapedAt = time.Now() + tweet.LastScrapedAt = Timestamp{time.Now()} tweet.IsConversationScraped = true trove.Tweets[id] = tweet diff --git a/scraper/user.go b/scraper/user.go index ba8d0cb..1b7977e 100644 --- a/scraper/user.go +++ b/scraper/user.go @@ -1,7 +1,6 @@ package scraper import ( - "time" "fmt" "strings" "regexp" @@ -32,7 +31,7 @@ type User struct { FollowersCount int Location string Website string - JoinDate time.Time + JoinDate Timestamp IsPrivate bool IsVerified bool IsBanned bool @@ -72,7 +71,7 @@ Joined %s terminal_utils.WrapText(u.Bio, 60), u.FollowingCount, u.FollowersCount, - terminal_utils.FormatDate(u.JoinDate), + terminal_utils.FormatDate(u.JoinDate.Time), u.Location, u.Website, ) @@ -100,7 +99,7 @@ func GetUnknownUserWithHandle(handle UserHandle) User { FollowingCount: 0, Location: "", Website:"", - JoinDate: time.Unix(0, 0), + JoinDate: TimestampFromUnix(0), IsVerified: false, IsPrivate: false, IsNeedingFakeID: true, @@ -133,7 +132,7 @@ func ParseSingleUser(apiUser APIUser) (ret User, err error) { if len(apiUser.Entities.URL.Urls) > 0 { ret.Website = apiUser.Entities.URL.Urls[0].ExpandedURL } - ret.JoinDate, err = time.Parse(time.RubyDate, apiUser.CreatedAt) + ret.JoinDate, err = TimestampFromString(apiUser.CreatedAt) if err != nil { return }