Create new Timestamp helper type
This commit is contained in:
parent
7edc8ad5d3
commit
26ab9ec937
@ -1,8 +1,6 @@
|
||||
package persistence
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"offline_twitter/scraper"
|
||||
)
|
||||
|
||||
@ -83,9 +81,9 @@ func (p Profile) SavePoll(poll scraper.Poll) error {
|
||||
last_scraped_at=?
|
||||
`,
|
||||
poll.ID, poll.TweetID, poll.NumChoices, poll.Choice1, poll.Choice1_Votes, poll.Choice2, poll.Choice2_Votes, poll.Choice3,
|
||||
poll.Choice3_Votes, poll.Choice4, poll.Choice4_Votes, poll.VotingDuration, poll.VotingEndsAt.Unix(), poll.LastUpdatedAt.Unix(),
|
||||
poll.Choice3_Votes, poll.Choice4, poll.Choice4_Votes, poll.VotingDuration, poll.VotingEndsAt, poll.LastUpdatedAt,
|
||||
|
||||
poll.Choice1_Votes, poll.Choice2_Votes, poll.Choice3_Votes, poll.Choice4_Votes, poll.LastUpdatedAt.Unix(),
|
||||
poll.Choice1_Votes, poll.Choice2_Votes, poll.Choice3_Votes, poll.Choice4_Votes, poll.LastUpdatedAt,
|
||||
)
|
||||
return err
|
||||
}
|
||||
@ -199,17 +197,13 @@ func (p Profile) GetPollsForTweet(t scraper.Tweet) (polls []scraper.Poll, err er
|
||||
return
|
||||
}
|
||||
var poll scraper.Poll
|
||||
var voting_ends_at int
|
||||
var last_scraped_at int
|
||||
for rows.Next() {
|
||||
err = rows.Scan(&poll.ID, &poll.NumChoices, &poll.Choice1, &poll.Choice1_Votes, &poll.Choice2, &poll.Choice2_Votes, &poll.Choice3,
|
||||
&poll.Choice3_Votes, &poll.Choice4, &poll.Choice4_Votes, &poll.VotingDuration, &voting_ends_at, &last_scraped_at)
|
||||
&poll.Choice3_Votes, &poll.Choice4, &poll.Choice4_Votes, &poll.VotingDuration, &poll.VotingEndsAt, &poll.LastUpdatedAt)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
poll.TweetID = t.ID
|
||||
poll.VotingEndsAt = time.Unix(int64(voting_ends_at), 0)
|
||||
poll.LastUpdatedAt = time.Unix(int64(last_scraped_at), 0)
|
||||
polls = append(polls, poll)
|
||||
}
|
||||
return
|
||||
|
@ -1,8 +1,6 @@
|
||||
package persistence
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"offline_twitter/scraper"
|
||||
)
|
||||
|
||||
@ -35,14 +33,11 @@ func (p Profile) GetRetweetById(id scraper.TweetID) (scraper.Retweet, error) {
|
||||
defer stmt.Close()
|
||||
|
||||
var r scraper.Retweet
|
||||
var retweeted_at int
|
||||
|
||||
row := stmt.QueryRow(id)
|
||||
err = row.Scan(&r.RetweetID, &r.TweetID, &r.RetweetedByID, &retweeted_at)
|
||||
err = row.Scan(&r.RetweetID, &r.TweetID, &r.RetweetedByID, &r.RetweetedAt)
|
||||
if err != nil {
|
||||
return scraper.Retweet{}, err
|
||||
}
|
||||
|
||||
r.RetweetedAt = time.Unix(int64(retweeted_at), 0)
|
||||
return r, nil
|
||||
}
|
||||
|
@ -3,7 +3,6 @@ package persistence
|
||||
import (
|
||||
"database/sql"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"offline_twitter/scraper"
|
||||
)
|
||||
@ -30,12 +29,12 @@ func (p Profile) SaveTweet(t scraper.Tweet) error {
|
||||
is_conversation_scraped=(is_conversation_scraped or ?),
|
||||
last_scraped_at=max(last_scraped_at, ?)
|
||||
`,
|
||||
t.ID, t.UserID, t.Text, t.PostedAt.Unix(), t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.InReplyToID,
|
||||
t.ID, t.UserID, t.Text, t.PostedAt, t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.InReplyToID,
|
||||
t.QuotedTweetID, scraper.JoinArrayOfHandles(t.Mentions), scraper.JoinArrayOfHandles(t.ReplyMentions),
|
||||
strings.Join(t.Hashtags, ","), t.TombstoneType, t.IsStub, t.IsContentDownloaded, t.IsConversationScraped, t.LastScrapedAt.Unix(),
|
||||
strings.Join(t.Hashtags, ","), t.TombstoneType, t.IsStub, t.IsContentDownloaded, t.IsConversationScraped, t.LastScrapedAt,
|
||||
|
||||
t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.IsStub, t.IsContentDownloaded, t.IsConversationScraped,
|
||||
t.LastScrapedAt.Unix(),
|
||||
t.LastScrapedAt,
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
@ -111,23 +110,18 @@ func (p Profile) GetTweetById(id scraper.TweetID) (scraper.Tweet, error) {
|
||||
defer stmt.Close()
|
||||
|
||||
var t scraper.Tweet
|
||||
var postedAt int
|
||||
var last_scraped_at int
|
||||
var mentions string
|
||||
var reply_mentions string
|
||||
var hashtags string
|
||||
|
||||
row := stmt.QueryRow(id)
|
||||
err = row.Scan(&t.ID, &t.UserID, &t.Text, &postedAt, &t.NumLikes, &t.NumRetweets, &t.NumReplies, &t.NumQuoteTweets, &t.InReplyToID,
|
||||
err = row.Scan(&t.ID, &t.UserID, &t.Text, &t.PostedAt, &t.NumLikes, &t.NumRetweets, &t.NumReplies, &t.NumQuoteTweets, &t.InReplyToID,
|
||||
&t.QuotedTweetID, &mentions, &reply_mentions, &hashtags, &t.TombstoneType, &t.IsStub, &t.IsContentDownloaded,
|
||||
&t.IsConversationScraped, &last_scraped_at)
|
||||
&t.IsConversationScraped, &t.LastScrapedAt)
|
||||
if err != nil {
|
||||
return t, err
|
||||
}
|
||||
|
||||
t.PostedAt = time.Unix(int64(postedAt), 0) // args are `seconds` and `nanoseconds`
|
||||
t.LastScrapedAt = time.Unix(int64(last_scraped_at), 0)
|
||||
|
||||
t.Mentions = []scraper.UserHandle{}
|
||||
for _, m := range strings.Split(mentions, ",") {
|
||||
if m != "" {
|
||||
|
@ -2,12 +2,13 @@ package persistence_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/go-test/deep"
|
||||
|
||||
"offline_twitter/scraper"
|
||||
)
|
||||
|
||||
/**
|
||||
@ -74,7 +75,7 @@ func TestNoWorseningTweet(t *testing.T) {
|
||||
tweet.IsContentDownloaded = true
|
||||
tweet.IsStub = false
|
||||
tweet.IsConversationScraped = true
|
||||
tweet.LastScrapedAt = time.Unix(1000, 0)
|
||||
tweet.LastScrapedAt = scraper.TimestampFromUnix(1000)
|
||||
|
||||
// Save the tweet
|
||||
err := profile.SaveTweet(tweet)
|
||||
@ -84,7 +85,7 @@ func TestNoWorseningTweet(t *testing.T) {
|
||||
tweet.IsContentDownloaded = false
|
||||
tweet.IsStub = true
|
||||
tweet.IsConversationScraped = false
|
||||
tweet.LastScrapedAt = time.Unix(500, 0)
|
||||
tweet.LastScrapedAt = scraper.TimestampFromUnix(500)
|
||||
err = profile.SaveTweet(tweet)
|
||||
require.NoError(err)
|
||||
|
||||
@ -113,7 +114,7 @@ func TestModifyTweet(t *testing.T) {
|
||||
tweet.IsStub = true
|
||||
tweet.IsContentDownloaded = false
|
||||
tweet.IsConversationScraped = false
|
||||
tweet.LastScrapedAt = time.Unix(1000, 0)
|
||||
tweet.LastScrapedAt = scraper.TimestampFromUnix(1000)
|
||||
|
||||
err := profile.SaveTweet(tweet)
|
||||
require.NoError(err)
|
||||
@ -125,7 +126,7 @@ func TestModifyTweet(t *testing.T) {
|
||||
tweet.IsStub = false
|
||||
tweet.IsContentDownloaded = true
|
||||
tweet.IsConversationScraped = true
|
||||
tweet.LastScrapedAt = time.Unix(2000, 0)
|
||||
tweet.LastScrapedAt = scraper.TimestampFromUnix(2000)
|
||||
|
||||
err = profile.SaveTweet(tweet)
|
||||
require.NoError(err)
|
||||
|
@ -1,10 +1,9 @@
|
||||
package persistence
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"database/sql"
|
||||
"time"
|
||||
"offline_twitter/scraper"
|
||||
"fmt"
|
||||
"database/sql"
|
||||
"offline_twitter/scraper"
|
||||
)
|
||||
|
||||
/**
|
||||
@ -51,21 +50,20 @@ func (p Profile) SaveUser(u *scraper.User) error {
|
||||
pinned_tweet_id=?,
|
||||
is_content_downloaded=(is_content_downloaded or ?)
|
||||
`,
|
||||
u.ID, u.DisplayName, u.Handle, u.Bio, u.FollowingCount, u.FollowersCount, u.Location, u.Website, u.JoinDate.Unix(), u.IsPrivate,
|
||||
u.IsVerified, u.IsBanned, u.ProfileImageUrl, u.ProfileImageLocalPath, u.BannerImageUrl, u.BannerImageLocalPath, u.PinnedTweetID,
|
||||
u.IsContentDownloaded, u.IsIdFake,
|
||||
u.ID, u.DisplayName, u.Handle, u.Bio, u.FollowingCount, u.FollowersCount, u.Location, u.Website, u.JoinDate, u.IsPrivate,
|
||||
u.IsVerified, u.IsBanned, u.ProfileImageUrl, u.ProfileImageLocalPath, u.BannerImageUrl, u.BannerImageLocalPath, u.PinnedTweetID,
|
||||
u.IsContentDownloaded, u.IsIdFake,
|
||||
|
||||
u.Bio, u.DisplayName, u.FollowingCount, u.FollowersCount, u.Location, u.Website, u.IsPrivate, u.IsVerified, u.IsBanned,
|
||||
u.ProfileImageUrl, u.ProfileImageLocalPath, u.BannerImageUrl, u.BannerImageLocalPath, u.PinnedTweetID, u.IsContentDownloaded,
|
||||
)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
u.Bio, u.DisplayName, u.FollowingCount, u.FollowersCount, u.Location, u.Website, u.IsPrivate, u.IsVerified, u.IsBanned,
|
||||
u.ProfileImageUrl, u.ProfileImageLocalPath, u.BannerImageUrl, u.BannerImageLocalPath, u.PinnedTweetID, u.IsContentDownloaded,
|
||||
)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Check if the database has a User with the given user handle.
|
||||
*
|
||||
@ -189,89 +187,89 @@ func (p Profile) GetUserByID(id scraper.UserID) (scraper.User, error) {
|
||||
* why the No Worsening Principle is needed.
|
||||
*/
|
||||
func (p Profile) CheckUserContentDownloadNeeded(user scraper.User) bool {
|
||||
row := p.DB.QueryRow(`select is_content_downloaded, profile_image_url, banner_image_url from users where id = ?`, user.ID)
|
||||
row := p.DB.QueryRow(`select is_content_downloaded, profile_image_url, banner_image_url from users where id = ?`, user.ID)
|
||||
|
||||
var is_content_downloaded bool
|
||||
var profile_image_url string
|
||||
var banner_image_url string
|
||||
err := row.Scan(&is_content_downloaded, &profile_image_url, &banner_image_url)
|
||||
if err != nil {
|
||||
if err == sql.ErrNoRows {
|
||||
return true
|
||||
} else {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
var is_content_downloaded bool
|
||||
var profile_image_url string
|
||||
var banner_image_url string
|
||||
err := row.Scan(&is_content_downloaded, &profile_image_url, &banner_image_url)
|
||||
if err != nil {
|
||||
if err == sql.ErrNoRows {
|
||||
return true
|
||||
} else {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
if !is_content_downloaded {
|
||||
return true
|
||||
}
|
||||
if banner_image_url != user.BannerImageUrl {
|
||||
return true
|
||||
}
|
||||
if profile_image_url != user.ProfileImageUrl {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
if !is_content_downloaded {
|
||||
return true
|
||||
}
|
||||
if banner_image_url != user.BannerImageUrl {
|
||||
return true
|
||||
}
|
||||
if profile_image_url != user.ProfileImageUrl {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
/**
|
||||
* Follow / unfollow a user. Update the given User object's IsFollowed field.
|
||||
*/
|
||||
func (p Profile) SetUserFollowed(user *scraper.User, is_followed bool) {
|
||||
result, err := p.DB.Exec("update users set is_followed = ? where id = ?", is_followed, user.ID)
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("Error inserting user with handle %q: %s", user.Handle, err.Error()))
|
||||
}
|
||||
count, err := result.RowsAffected()
|
||||
if err != nil {
|
||||
panic("Unknown error: " + err.Error())
|
||||
}
|
||||
if count != 1 {
|
||||
panic(fmt.Sprintf("User with handle %q not found", user.Handle))
|
||||
}
|
||||
user.IsFollowed = is_followed
|
||||
result, err := p.DB.Exec("update users set is_followed = ? where id = ?", is_followed, user.ID)
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("Error inserting user with handle %q: %s", user.Handle, err.Error()))
|
||||
}
|
||||
count, err := result.RowsAffected()
|
||||
if err != nil {
|
||||
panic("Unknown error: " + err.Error())
|
||||
}
|
||||
if count != 1 {
|
||||
panic(fmt.Sprintf("User with handle %q not found", user.Handle))
|
||||
}
|
||||
user.IsFollowed = is_followed
|
||||
}
|
||||
|
||||
func (p Profile) NextFakeUserID() scraper.UserID {
|
||||
_, err := p.DB.Exec("update fake_user_sequence set latest_fake_id = latest_fake_id + 1")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
var ret scraper.UserID
|
||||
err = p.DB.QueryRow("select latest_fake_id from fake_user_sequence").Scan(&ret)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return ret
|
||||
_, err := p.DB.Exec("update fake_user_sequence set latest_fake_id = latest_fake_id + 1")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
var ret scraper.UserID
|
||||
err = p.DB.QueryRow("select latest_fake_id from fake_user_sequence").Scan(&ret)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
func (p Profile) GetAllFollowedUsers() []scraper.UserHandle {
|
||||
rows, err := p.DB.Query("select handle from users where is_followed = 1")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
rows, err := p.DB.Query("select handle from users where is_followed = 1")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
ret := []scraper.UserHandle{}
|
||||
ret := []scraper.UserHandle{}
|
||||
|
||||
var tmp scraper.UserHandle
|
||||
var tmp scraper.UserHandle
|
||||
|
||||
for rows.Next() {
|
||||
err = rows.Scan(&tmp)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
ret = append(ret, tmp)
|
||||
}
|
||||
for rows.Next() {
|
||||
err = rows.Scan(&tmp)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
ret = append(ret, tmp)
|
||||
}
|
||||
|
||||
return ret
|
||||
return ret
|
||||
}
|
||||
|
||||
func (p Profile) IsFollowing(handle scraper.UserHandle) bool {
|
||||
for _, follow := range p.GetAllFollowedUsers() {
|
||||
if follow == handle {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
for _, follow := range p.GetAllFollowedUsers() {
|
||||
if follow == handle {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
@ -2,19 +2,17 @@ package persistence_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"fmt"
|
||||
"math/rand"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/go-test/deep"
|
||||
|
||||
"offline_twitter/scraper"
|
||||
)
|
||||
|
||||
|
||||
/**
|
||||
* Create a user, save it, reload it, and make sure it comes back the same
|
||||
*/
|
||||
@ -62,7 +60,7 @@ func TestModifyUser(t *testing.T) {
|
||||
fake_user.IsVerified = false
|
||||
fake_user.IsBanned = false
|
||||
fake_user.FollowersCount = 1000
|
||||
fake_user.JoinDate = time.Unix(1000, 0)
|
||||
fake_user.JoinDate = scraper.TimestampFromUnix(1000)
|
||||
fake_user.ProfileImageUrl = "asdf"
|
||||
fake_user.IsContentDownloaded = true
|
||||
|
||||
@ -70,14 +68,13 @@ func TestModifyUser(t *testing.T) {
|
||||
err := profile.SaveUser(&fake_user)
|
||||
require.NoError(err)
|
||||
|
||||
|
||||
fake_user.DisplayName = "Display Name 2"
|
||||
fake_user.Location = "location2"
|
||||
fake_user.IsPrivate = true
|
||||
fake_user.IsVerified = true
|
||||
fake_user.IsBanned = true
|
||||
fake_user.FollowersCount = 2000
|
||||
fake_user.JoinDate = time.Unix(2000, 0)
|
||||
fake_user.JoinDate = scraper.TimestampFromUnix(2000)
|
||||
fake_user.ProfileImageUrl = "asdf2"
|
||||
fake_user.IsContentDownloaded = false // test No Worsening
|
||||
|
||||
@ -114,7 +111,6 @@ func TestHandleIsCaseInsensitive(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Should correctly report whether the user exists in the database
|
||||
*/
|
||||
|
@ -12,6 +12,7 @@ type ErrNotInDatabase struct {
|
||||
Table string
|
||||
Value interface{}
|
||||
}
|
||||
|
||||
func (err ErrNotInDatabase) Error() string {
|
||||
return fmt.Sprintf("Not in database: %s %q", err.Table, err.Value)
|
||||
}
|
||||
|
@ -1,12 +1,12 @@
|
||||
package persistence_test
|
||||
|
||||
import (
|
||||
"time"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"time"
|
||||
|
||||
"offline_twitter/scraper"
|
||||
"offline_twitter/persistence"
|
||||
"offline_twitter/scraper"
|
||||
)
|
||||
|
||||
/**
|
||||
@ -40,28 +40,27 @@ func create_or_load_profile(profile_path string) persistence.Profile {
|
||||
return profile
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Create a stable user with a fixed ID and handle
|
||||
*/
|
||||
func create_stable_user() scraper.User {
|
||||
return scraper.User{
|
||||
ID: scraper.UserID(-1),
|
||||
DisplayName: "stable display name",
|
||||
Handle: scraper.UserHandle("handle stable"),
|
||||
Bio: "stable bio",
|
||||
FollowersCount: 10,
|
||||
FollowingCount: 2000,
|
||||
Location: "stable location",
|
||||
Website:"stable website",
|
||||
JoinDate: time.Unix(10000000, 0),
|
||||
IsVerified: true,
|
||||
IsPrivate: false,
|
||||
ProfileImageUrl: "stable profile image url",
|
||||
ID: scraper.UserID(-1),
|
||||
DisplayName: "stable display name",
|
||||
Handle: scraper.UserHandle("handle stable"),
|
||||
Bio: "stable bio",
|
||||
FollowersCount: 10,
|
||||
FollowingCount: 2000,
|
||||
Location: "stable location",
|
||||
Website: "stable website",
|
||||
JoinDate: scraper.TimestampFromUnix(10000000),
|
||||
IsVerified: true,
|
||||
IsPrivate: false,
|
||||
ProfileImageUrl: "stable profile image url",
|
||||
ProfileImageLocalPath: "stable profile image local path",
|
||||
BannerImageUrl: "stable banner image url",
|
||||
BannerImageLocalPath: "stable image local path",
|
||||
PinnedTweetID: scraper.TweetID(345),
|
||||
BannerImageUrl: "stable banner image url",
|
||||
BannerImageLocalPath: "stable image local path",
|
||||
PinnedTweetID: scraper.TweetID(345),
|
||||
}
|
||||
}
|
||||
|
||||
@ -107,58 +106,56 @@ func create_video_from_id(id int) scraper.Video {
|
||||
*/
|
||||
func create_url_from_id(id int) scraper.Url {
|
||||
s := fmt.Sprint(id)
|
||||
return scraper.Url {
|
||||
TweetID: -1,
|
||||
Domain: s + "domain",
|
||||
Text: s + "text",
|
||||
ShortText: s + "shorttext",
|
||||
Title: s + "title",
|
||||
Description: s + "description",
|
||||
ThumbnailWidth: id * 23,
|
||||
ThumbnailHeight: id * 7,
|
||||
ThumbnailRemoteUrl: s + "remote url",
|
||||
ThumbnailLocalPath: s + "local path",
|
||||
CreatorID: scraper.UserID(id),
|
||||
SiteID: scraper.UserID(id),
|
||||
HasCard: true,
|
||||
return scraper.Url{
|
||||
TweetID: -1,
|
||||
Domain: s + "domain",
|
||||
Text: s + "text",
|
||||
ShortText: s + "shorttext",
|
||||
Title: s + "title",
|
||||
Description: s + "description",
|
||||
ThumbnailWidth: id * 23,
|
||||
ThumbnailHeight: id * 7,
|
||||
ThumbnailRemoteUrl: s + "remote url",
|
||||
ThumbnailLocalPath: s + "local path",
|
||||
CreatorID: scraper.UserID(id),
|
||||
SiteID: scraper.UserID(id),
|
||||
HasCard: true,
|
||||
IsContentDownloaded: false,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Create a semi-stable Poll based on the given ID
|
||||
*/
|
||||
func create_poll_from_id(id int) scraper.Poll {
|
||||
s := fmt.Sprint(id)
|
||||
return scraper.Poll{
|
||||
ID: scraper.PollID(id),
|
||||
TweetID: -1,
|
||||
NumChoices: 2,
|
||||
Choice1: s,
|
||||
Choice1_Votes: 1000,
|
||||
Choice2: "Not " + s,
|
||||
Choice2_Votes: 1500,
|
||||
ID: scraper.PollID(id),
|
||||
TweetID: -1,
|
||||
NumChoices: 2,
|
||||
Choice1: s,
|
||||
Choice1_Votes: 1000,
|
||||
Choice2: "Not " + s,
|
||||
Choice2_Votes: 1500,
|
||||
VotingDuration: 10,
|
||||
VotingEndsAt: time.Unix(10000000, 0),
|
||||
LastUpdatedAt: time.Unix(10000, 0),
|
||||
VotingEndsAt: scraper.TimestampFromUnix(10000000),
|
||||
LastUpdatedAt: scraper.TimestampFromUnix(10000),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Create a stable tweet with a fixed ID and content
|
||||
*/
|
||||
func create_stable_tweet() scraper.Tweet {
|
||||
tweet_id := scraper.TweetID(-1)
|
||||
return scraper.Tweet{
|
||||
ID: tweet_id,
|
||||
UserID: -1,
|
||||
Text: "stable text",
|
||||
PostedAt: time.Unix(10000000, 0),
|
||||
NumLikes: 10,
|
||||
NumRetweets: 10,
|
||||
NumReplies: 10,
|
||||
ID: tweet_id,
|
||||
UserID: -1,
|
||||
Text: "stable text",
|
||||
PostedAt: scraper.TimestampFromUnix(10000000),
|
||||
NumLikes: 10,
|
||||
NumRetweets: 10,
|
||||
NumReplies: 10,
|
||||
NumQuoteTweets: 10,
|
||||
Videos: []scraper.Video{
|
||||
create_video_from_id(-1),
|
||||
@ -175,7 +172,7 @@ func create_stable_tweet() scraper.Tweet {
|
||||
create_poll_from_id(-1),
|
||||
},
|
||||
IsConversationScraped: true,
|
||||
LastScrapedAt: time.Unix(100000000, 0),
|
||||
LastScrapedAt: scraper.TimestampFromUnix(100000000),
|
||||
}
|
||||
}
|
||||
|
||||
@ -185,10 +182,10 @@ func create_stable_tweet() scraper.Tweet {
|
||||
func create_stable_retweet() scraper.Retweet {
|
||||
retweet_id := scraper.TweetID(-1)
|
||||
return scraper.Retweet{
|
||||
RetweetID: retweet_id,
|
||||
TweetID: -1,
|
||||
RetweetID: retweet_id,
|
||||
TweetID: -1,
|
||||
RetweetedByID: -1,
|
||||
RetweetedAt: time.Unix(20000000, 0),
|
||||
RetweetedAt: scraper.TimestampFromUnix(20000000),
|
||||
}
|
||||
}
|
||||
|
||||
@ -200,26 +197,25 @@ func create_dummy_user() scraper.User {
|
||||
userID := rand.Int()
|
||||
|
||||
return scraper.User{
|
||||
ID: scraper.UserID(userID),
|
||||
DisplayName: "display name",
|
||||
Handle: scraper.UserHandle(fmt.Sprintf("handle%d", userID)),
|
||||
Bio: "bio",
|
||||
FollowersCount: 0,
|
||||
FollowingCount: 1000,
|
||||
Location: "location",
|
||||
Website:"website",
|
||||
JoinDate: time.Now().Truncate(1e9), // Round to nearest second
|
||||
IsVerified: false,
|
||||
IsPrivate: true,
|
||||
ProfileImageUrl: "profile image url",
|
||||
ID: scraper.UserID(userID),
|
||||
DisplayName: "display name",
|
||||
Handle: scraper.UserHandle(fmt.Sprintf("handle%d", userID)),
|
||||
Bio: "bio",
|
||||
FollowersCount: 0,
|
||||
FollowingCount: 1000,
|
||||
Location: "location",
|
||||
Website: "website",
|
||||
JoinDate: scraper.Timestamp{time.Now().Truncate(1e9)}, // Round to nearest second
|
||||
IsVerified: false,
|
||||
IsPrivate: true,
|
||||
ProfileImageUrl: "profile image url",
|
||||
ProfileImageLocalPath: "profile image local path",
|
||||
BannerImageUrl: "banner image url",
|
||||
BannerImageLocalPath: "banner image local path",
|
||||
PinnedTweetID: scraper.TweetID(234),
|
||||
BannerImageUrl: "banner image url",
|
||||
BannerImageLocalPath: "banner image local path",
|
||||
PinnedTweetID: scraper.TweetID(234),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Create a new tweet with a random ID and content
|
||||
*/
|
||||
@ -243,21 +239,21 @@ func create_dummy_tweet() scraper.Tweet {
|
||||
poll.TweetID = tweet_id
|
||||
|
||||
return scraper.Tweet{
|
||||
ID: tweet_id,
|
||||
UserID: -1,
|
||||
Text: "text",
|
||||
PostedAt: time.Now().Truncate(1e9), // Round to nearest second
|
||||
NumLikes: 1,
|
||||
NumRetweets: 2,
|
||||
NumReplies: 3,
|
||||
ID: tweet_id,
|
||||
UserID: -1,
|
||||
Text: "text",
|
||||
PostedAt: scraper.Timestamp{time.Now().Truncate(1e9)}, // Round to nearest second
|
||||
NumLikes: 1,
|
||||
NumRetweets: 2,
|
||||
NumReplies: 3,
|
||||
NumQuoteTweets: 4,
|
||||
Videos: []scraper.Video{vid},
|
||||
Urls: []scraper.Url{url1, url2},
|
||||
Images: []scraper.Image{img1, img2},
|
||||
Mentions: []scraper.UserHandle{"mention1", "mention2"},
|
||||
ReplyMentions: []scraper.UserHandle{"replymention1", "replymention2"},
|
||||
Hashtags: []string{"hash1", "hash2"},
|
||||
Polls: []scraper.Poll{poll},
|
||||
Videos: []scraper.Video{vid},
|
||||
Urls: []scraper.Url{url1, url2},
|
||||
Images: []scraper.Image{img1, img2},
|
||||
Mentions: []scraper.UserHandle{"mention1", "mention2"},
|
||||
ReplyMentions: []scraper.UserHandle{"replymention1", "replymention2"},
|
||||
Hashtags: []string{"hash1", "hash2"},
|
||||
Polls: []scraper.Poll{poll},
|
||||
}
|
||||
}
|
||||
|
||||
@ -287,9 +283,9 @@ func create_dummy_retweet(tweet_id scraper.TweetID) scraper.Retweet {
|
||||
retweet_id := scraper.TweetID(rand.Int())
|
||||
|
||||
return scraper.Retweet{
|
||||
RetweetID: retweet_id,
|
||||
TweetID: tweet_id,
|
||||
RetweetID: retweet_id,
|
||||
TweetID: tweet_id,
|
||||
RetweetedByID: -1,
|
||||
RetweetedAt: time.Unix(20000000, 0),
|
||||
RetweetedAt: scraper.TimestampFromUnix(20000000),
|
||||
}
|
||||
}
|
||||
|
@ -84,21 +84,18 @@ func (card APIV2Card) ParseAsPoll() Poll {
|
||||
}
|
||||
id := int_or_panic(card_url.Hostname())
|
||||
|
||||
voting_ends_at, err := time.Parse(time.RFC3339, values["end_datetime_utc"].StringValue)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
last_updated_at, err := time.Parse(time.RFC3339, values["last_updated_datetime_utc"].StringValue)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
ret := Poll{}
|
||||
ret.ID = PollID(id)
|
||||
ret.NumChoices = parse_num_choices(card.Legacy.Name)
|
||||
ret.VotingDuration = int_or_panic(values["duration_minutes"].StringValue) * 60
|
||||
ret.VotingEndsAt = voting_ends_at
|
||||
ret.LastUpdatedAt = last_updated_at
|
||||
ret.VotingEndsAt, err = TimestampFromString(values["end_datetime_utc"].StringValue)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
ret.LastUpdatedAt, err = TimestampFromString(values["last_updated_datetime_utc"].StringValue)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
ret.Choice1 = values["choice1_label"].StringValue
|
||||
ret.Choice1_Votes = int_or_panic(values["choice1_count"].StringValue)
|
||||
@ -262,7 +259,7 @@ func (api_v2_tweet APIV2Tweet) ToTweetTrove() TweetTrove {
|
||||
retweet.RetweetID = TweetID(api_v2_tweet.ID)
|
||||
retweet.TweetID = TweetID(api_v2_tweet.RetweetedStatusResult.Result.ID)
|
||||
retweet.RetweetedByID = UserID(api_v2_tweet.APITweet.UserID)
|
||||
retweet.RetweetedAt, err = time.Parse(time.RubyDate, api_v2_tweet.APITweet.CreatedAt)
|
||||
retweet.RetweetedAt, err = TimestampFromString(api_v2_tweet.APITweet.CreatedAt)
|
||||
if err != nil {
|
||||
fmt.Printf("%v\n", api_v2_tweet)
|
||||
panic(err)
|
||||
|
@ -1,7 +1,6 @@
|
||||
package scraper
|
||||
|
||||
import (
|
||||
"time"
|
||||
"strings"
|
||||
"strconv"
|
||||
"net/url"
|
||||
@ -24,9 +23,9 @@ type Poll struct {
|
||||
Choice4_Votes int
|
||||
|
||||
VotingDuration int // In seconds
|
||||
VotingEndsAt time.Time
|
||||
VotingEndsAt Timestamp
|
||||
|
||||
LastUpdatedAt time.Time
|
||||
LastUpdatedAt Timestamp
|
||||
}
|
||||
|
||||
func ParseAPIPoll(apiCard APICard) Poll {
|
||||
@ -36,21 +35,18 @@ func ParseAPIPoll(apiCard APICard) Poll {
|
||||
}
|
||||
id := int_or_panic(card_url.Hostname())
|
||||
|
||||
voting_ends_at, err := time.Parse(time.RFC3339, apiCard.BindingValues.EndDatetimeUTC.StringValue)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
last_updated_at, err := time.Parse(time.RFC3339, apiCard.BindingValues.LastUpdatedAt.StringValue)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
ret := Poll{}
|
||||
ret.ID = PollID(id)
|
||||
ret.NumChoices = parse_num_choices(apiCard.Name)
|
||||
ret.VotingDuration = int_or_panic(apiCard.BindingValues.DurationMinutes.StringValue) * 60
|
||||
ret.VotingEndsAt = voting_ends_at
|
||||
ret.LastUpdatedAt = last_updated_at
|
||||
ret.VotingEndsAt, err = TimestampFromString(apiCard.BindingValues.EndDatetimeUTC.StringValue)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
ret.LastUpdatedAt, err = TimestampFromString(apiCard.BindingValues.LastUpdatedAt.StringValue)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
ret.Choice1 = apiCard.BindingValues.Choice1.StringValue
|
||||
ret.Choice1_Votes = int_or_panic(apiCard.BindingValues.Choice1_Count.StringValue)
|
||||
|
@ -1,16 +1,12 @@
|
||||
package scraper
|
||||
|
||||
import (
|
||||
"time"
|
||||
)
|
||||
|
||||
type Retweet struct {
|
||||
RetweetID TweetID
|
||||
TweetID TweetID
|
||||
Tweet *Tweet
|
||||
RetweetedByID UserID
|
||||
RetweetedBy *User
|
||||
RetweetedAt time.Time
|
||||
RetweetedAt Timestamp
|
||||
}
|
||||
|
||||
func ParseSingleRetweet(apiTweet APITweet) (ret Retweet, err error) {
|
||||
@ -19,6 +15,9 @@ func ParseSingleRetweet(apiTweet APITweet) (ret Retweet, err error) {
|
||||
ret.RetweetID = TweetID(apiTweet.ID)
|
||||
ret.TweetID = TweetID(apiTweet.RetweetedStatusID)
|
||||
ret.RetweetedByID = UserID(apiTweet.UserID)
|
||||
ret.RetweetedAt, err = time.Parse(time.RubyDate, apiTweet.CreatedAt)
|
||||
ret.RetweetedAt, err = TimestampFromString(apiTweet.CreatedAt)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
40
scraper/timestamp_type.go
Normal file
40
scraper/timestamp_type.go
Normal file
@ -0,0 +1,40 @@
|
||||
package scraper
|
||||
|
||||
import (
|
||||
"time"
|
||||
"fmt"
|
||||
"database/sql/driver"
|
||||
)
|
||||
|
||||
type Timestamp struct {
|
||||
time.Time
|
||||
}
|
||||
|
||||
func (t Timestamp) Value() (driver.Value, error) {
|
||||
return t.Unix(), nil
|
||||
}
|
||||
|
||||
func (t *Timestamp) Scan(src interface{}) error {
|
||||
val, is_ok := src.(int64)
|
||||
if !is_ok {
|
||||
return fmt.Errorf("Incompatible type for Timestamp: %#v", src)
|
||||
}
|
||||
*t = Timestamp{time.Unix(val, 0)}
|
||||
return nil
|
||||
}
|
||||
|
||||
func TimestampFromString(s string) (Timestamp, error) {
|
||||
tmp, err := time.Parse(time.RubyDate, s)
|
||||
if err == nil {
|
||||
return Timestamp{tmp}, nil
|
||||
}
|
||||
tmp, err = time.Parse(time.RFC3339, s)
|
||||
if err == nil {
|
||||
return Timestamp{tmp}, nil
|
||||
}
|
||||
return Timestamp{}, err
|
||||
}
|
||||
|
||||
func TimestampFromUnix(num int64) Timestamp {
|
||||
return Timestamp{time.Unix(10000000, 0)}
|
||||
}
|
@ -18,7 +18,7 @@ type Tweet struct {
|
||||
UserHandle UserHandle // For processing tombstones
|
||||
User *User
|
||||
Text string
|
||||
PostedAt time.Time
|
||||
PostedAt Timestamp
|
||||
NumLikes int
|
||||
NumRetweets int
|
||||
NumReplies int
|
||||
@ -39,7 +39,7 @@ type Tweet struct {
|
||||
|
||||
IsContentDownloaded bool
|
||||
IsConversationScraped bool
|
||||
LastScrapedAt time.Time
|
||||
LastScrapedAt Timestamp
|
||||
}
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@ func (t Tweet) String() string {
|
||||
Replies: %d RT: %d QT: %d Likes: %d
|
||||
`,
|
||||
author,
|
||||
terminal_utils.FormatDate(t.PostedAt),
|
||||
terminal_utils.FormatDate(t.PostedAt.Time),
|
||||
terminal_utils.WrapText(t.Text, 60),
|
||||
t.NumReplies,
|
||||
t.NumRetweets,
|
||||
@ -91,7 +91,7 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
|
||||
|
||||
// Process "posted-at" date and time
|
||||
if apiTweet.TombstoneText == "" { // Skip time parsing for tombstones
|
||||
ret.PostedAt, err = time.Parse(time.RubyDate, apiTweet.CreatedAt)
|
||||
ret.PostedAt, err = TimestampFromString(apiTweet.CreatedAt)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
@ -175,7 +175,7 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
|
||||
// Process tombstones and other metadata
|
||||
ret.TombstoneType = apiTweet.TombstoneText
|
||||
ret.IsStub = !(ret.TombstoneType == "")
|
||||
ret.LastScrapedAt = time.Unix(0, 0) // Caller will change this for the tweet that was actually scraped
|
||||
ret.LastScrapedAt = TimestampFromUnix(0) // Caller will change this for the tweet that was actually scraped
|
||||
ret.IsConversationScraped = false // Safe due to the "No Worsening" principle
|
||||
|
||||
return
|
||||
@ -251,7 +251,7 @@ func GetTweetFull(id TweetID) (trove TweetTrove, err error) {
|
||||
if !ok {
|
||||
panic("Trove didn't contain its own tweet!")
|
||||
}
|
||||
tweet.LastScrapedAt = time.Now()
|
||||
tweet.LastScrapedAt = Timestamp{time.Now()}
|
||||
tweet.IsConversationScraped = true
|
||||
trove.Tweets[id] = tweet
|
||||
|
||||
|
@ -1,7 +1,6 @@
|
||||
package scraper
|
||||
|
||||
import (
|
||||
"time"
|
||||
"fmt"
|
||||
"strings"
|
||||
"regexp"
|
||||
@ -32,7 +31,7 @@ type User struct {
|
||||
FollowersCount int
|
||||
Location string
|
||||
Website string
|
||||
JoinDate time.Time
|
||||
JoinDate Timestamp
|
||||
IsPrivate bool
|
||||
IsVerified bool
|
||||
IsBanned bool
|
||||
@ -72,7 +71,7 @@ Joined %s
|
||||
terminal_utils.WrapText(u.Bio, 60),
|
||||
u.FollowingCount,
|
||||
u.FollowersCount,
|
||||
terminal_utils.FormatDate(u.JoinDate),
|
||||
terminal_utils.FormatDate(u.JoinDate.Time),
|
||||
u.Location,
|
||||
u.Website,
|
||||
)
|
||||
@ -100,7 +99,7 @@ func GetUnknownUserWithHandle(handle UserHandle) User {
|
||||
FollowingCount: 0,
|
||||
Location: "<blank>",
|
||||
Website:"<blank>",
|
||||
JoinDate: time.Unix(0, 0),
|
||||
JoinDate: TimestampFromUnix(0),
|
||||
IsVerified: false,
|
||||
IsPrivate: false,
|
||||
IsNeedingFakeID: true,
|
||||
@ -133,7 +132,7 @@ func ParseSingleUser(apiUser APIUser) (ret User, err error) {
|
||||
if len(apiUser.Entities.URL.Urls) > 0 {
|
||||
ret.Website = apiUser.Entities.URL.Urls[0].ExpandedURL
|
||||
}
|
||||
ret.JoinDate, err = time.Parse(time.RubyDate, apiUser.CreatedAt)
|
||||
ret.JoinDate, err = TimestampFromString(apiUser.CreatedAt)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user