Add persistence for polls

This commit is contained in:
Alessio 2021-12-12 16:42:32 -08:00
parent ab89d53cc6
commit c15ac9d751
8 changed files with 248 additions and 2 deletions

View File

@ -1,6 +1,8 @@
package persistence
import (
"time"
"offline_twitter/scraper"
)
@ -58,6 +60,27 @@ func (p Profile) SaveUrl(url scraper.Url) error {
return err
}
/**
* Save a Poll
*/
func (p Profile) SavePoll(poll scraper.Poll) error {
_, err := p.DB.Exec(`
insert into polls (tweet_id, num_choices, choice1, choice1_votes, choice2, choice2_votes, choice3, choice3_votes, choice4, choice4_votes, voting_duration, voting_ends_at, last_scraped_at)
values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
on conflict do update
set choice1_votes=?,
choice2_votes=?,
choice3_votes=?,
choice4_votes=?,
last_scraped_at=?
`,
poll.TweetID, poll.NumChoices, poll.Choice1, poll.Choice1_Votes, poll.Choice2, poll.Choice2_Votes, poll.Choice3, poll.Choice3_Votes, poll.Choice4, poll.Choice4_Votes, poll.VotingDuration, poll.VotingEndsAt.Unix(), poll.LastUpdatedAt.Unix(),
poll.Choice1_Votes, poll.Choice2_Votes, poll.Choice3_Votes, poll.Choice4_Votes, poll.LastUpdatedAt.Unix(),
)
return err
}
/**
* Get the list of images for a tweet
*/
@ -134,3 +157,32 @@ func (p Profile) GetUrlsForTweet(t scraper.Tweet) (urls []scraper.Url, err error
}
return
}
/**
* Get the list of Polls for a Tweet
*/
func (p Profile) GetPollsForTweet(t scraper.Tweet) (polls []scraper.Poll, err error) {
stmt, err := p.DB.Prepare("select num_choices, choice1, choice1_votes, choice2, choice2_votes, choice3, choice3_votes, choice4, choice4_votes, voting_duration, voting_ends_at, last_scraped_at from polls where tweet_id=?")
if err != nil {
return
}
defer stmt.Close()
rows, err := stmt.Query(t.ID)
if err != nil {
return
}
var poll scraper.Poll
var voting_ends_at int
var last_scraped_at int
for rows.Next() {
err = rows.Scan(&poll.NumChoices, &poll.Choice1, &poll.Choice1_Votes, &poll.Choice2, &poll.Choice2_Votes, &poll.Choice3, &poll.Choice3_Votes, &poll.Choice4, &poll.Choice4_Votes, &poll.VotingDuration, &voting_ends_at, &last_scraped_at)
if err != nil {
return
}
poll.TweetID = t.ID
poll.VotingEndsAt = time.Unix(int64(voting_ends_at), 0)
poll.LastUpdatedAt = time.Unix(int64(last_scraped_at), 0)
polls = append(polls, poll)
}
return
}

View File

@ -244,3 +244,42 @@ func TestModifyUrl(t *testing.T) {
t.Error(diff)
}
}
/**
* Create a Poll, save it, reload it, and make sure it comes back the same
*/
func TestSaveAndLoadPoll(t *testing.T) {
profile_path := "test_profiles/TestMediaQueries"
profile := create_or_load_profile(profile_path)
tweet := create_stable_tweet()
poll := create_poll_from_id(rand.Int())
poll.TweetID = tweet.ID
// Save the Poll
err := profile.SavePoll(poll)
if err != nil {
t.Fatalf("Failed to save the poll: %s", err.Error())
}
// Reload the Poll
polls, err := profile.GetPollsForTweet(tweet)
if err != nil {
t.Fatalf("Could not load poll: %s", err.Error())
}
var new_poll scraper.Poll
for index := range polls {
if polls[index].Choice1 == poll.Choice1 {
new_poll = polls[index]
}
}
if new_poll.Choice1 != poll.Choice1 {
t.Fatalf("Could not find poll for some reason: %s, %s; %+v", new_poll.Choice1, poll.Choice1, polls)
}
if diff := deep.Equal(poll, new_poll); diff != nil {
t.Error(diff)
}
}

View File

@ -81,6 +81,27 @@ create table urls (rowid integer primary key,
foreign key(tweet_id) references tweets(id)
);
create table polls (rowid integer primary key,
tweet_id integer not null,
num_choices integer not null,
choice1 text,
choice1_votes integer,
choice2 text,
choice2_votes integer,
choice3 text,
choice3_votes integer,
choice4 text,
choice4_votes integer,
voting_duration integer not null, -- in seconds
voting_ends_at integer not null,
last_scraped_at integer not null,
foreign key(tweet_id) references tweets(id)
);
create table images (rowid integer primary key,
id integer unique not null check(typeof(id) = 'integer'),
tweet_id integer not null,

View File

@ -57,6 +57,12 @@ func (p Profile) SaveTweet(t scraper.Tweet) error {
return err
}
}
for _, poll := range t.Polls {
err := p.SavePoll(poll)
if err != nil {
return err
}
}
err = tx.Commit()
if err != nil {
@ -138,6 +144,12 @@ func (p Profile) GetTweetById(id scraper.TweetID) (scraper.Tweet, error) {
}
t.Videos = vids
polls, err := p.GetPollsForTweet(t)
if err != nil {
return t, err
}
t.Polls = polls
urls, err := p.GetUrlsForTweet(t)
t.Urls = urls

View File

@ -119,6 +119,26 @@ func create_url_from_id(id int) scraper.Url {
}
}
/**
* Create a semi-stable Poll based on the given ID
*/
func create_poll_from_id(id int) scraper.Poll {
s := fmt.Sprint(id)
return scraper.Poll{
TweetID: -1,
NumChoices: 2,
Choice1: s,
Choice1_Votes: 1000,
Choice2: "Not " + s,
Choice2_Votes: 1500,
VotingDuration: 10,
VotingEndsAt: time.Unix(10000000, 0),
LastUpdatedAt: time.Unix(10000, 0),
}
}
/**
* Create a stable tweet with a fixed ID and content
*/
@ -144,6 +164,9 @@ func create_stable_tweet() scraper.Tweet {
},
Mentions: []scraper.UserHandle{},
Hashtags: []string{},
Polls: []scraper.Poll{
create_poll_from_id(-1),
},
}
}
@ -207,6 +230,9 @@ func create_dummy_tweet() scraper.Tweet {
url2 := create_url_from_id(rand.Int())
url2.TweetID = tweet_id
poll := create_poll_from_id(rand.Int())
poll.TweetID = tweet_id
return scraper.Tweet{
ID: tweet_id,
UserID: -1,
@ -222,6 +248,7 @@ func create_dummy_tweet() scraper.Tweet {
Mentions: []scraper.UserHandle{"mention1", "mention2"},
ReplyMentions: []scraper.UserHandle{"replymention1", "replymention2"},
Hashtags: []string{"hash1", "hash2"},
Polls: []scraper.Poll{poll},
}
}

View File

@ -0,0 +1 @@
{"created_at":"Tue Nov 30 04:12:14 +0000 2021","id_str":"1465534109573390348","full_text":"Which of these tattoos would you get if you had to get one on your arm?","display_text_range":[0,71],"entities":{},"source":"<a href=\"http://twitter.com/download/android\" rel=\"nofollow\">Twitter for Android</a>","user_id_str":"44067298","retweet_count":13,"favorite_count":118,"reply_count":1,"quote_count":18,"conversation_id_str":"1465534109573390348","conversation_control":{"policy":"community","conversation_owner":{"screen_name":"michaelmalice"}},"possibly_sensitive_editable":true,"card":{"name":"poll4choice_text_only","url":"card://1465534108923314180","card_type_url":"http://card-type-url-is-deprecated.invalid","binding_values":{"choice1_label":{"type":"STRING","string_value":"Tribal armband"},"choice2_label":{"type":"STRING","string_value":"Marijuana leaf"},"end_datetime_utc":{"type":"STRING","string_value":"2021-12-01T04:12:14Z"},"counts_are_final":{"type":"BOOLEAN","boolean_value":true},"choice2_count":{"type":"STRING","string_value":"624"},"choice1_count":{"type":"STRING","string_value":"1593"},"choice4_label":{"type":"STRING","string_value":"Maple leaf"},"last_updated_datetime_utc":{"type":"STRING","string_value":"2021-12-01T04:12:15Z"},"duration_minutes":{"type":"STRING","string_value":"1440"},"choice3_count":{"type":"STRING","string_value":"778"},"choice4_count":{"type":"STRING","string_value":"1138"},"choice3_label":{"type":"STRING","string_value":"Butterfly"},"api":{"type":"STRING","string_value":"capi://passthrough/1"},"card_url":{"type":"STRING","string_value":"https://twitter.com","scribe_key":"card_url"}},"card_platform":{"platform":{"device":{"name":"Swift","version":"12"},"audience":{"name":"production"}}}},"lang":"en","self_thread":{"id_str":"1465534109573390348"}}

View File

@ -25,12 +25,13 @@ type Tweet struct {
InReplyToID TweetID
QuotedTweetID TweetID
Urls []Url
Images []Image
Videos []Video
Mentions []UserHandle
ReplyMentions []UserHandle
Hashtags []string
Urls []Url
Polls []Poll
TombstoneType string
IsStub bool
@ -84,6 +85,7 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
ret.UserID = UserID(apiTweet.UserID)
ret.Text = apiTweet.FullText
// Process "posted-at" date and time
if apiTweet.TombstoneText == "" { // Skip time parsing for tombstones
ret.PostedAt, err = time.Parse(time.RubyDate, apiTweet.CreatedAt)
if err != nil {
@ -97,6 +99,7 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
ret.NumQuoteTweets = apiTweet.QuoteCount
ret.InReplyToID = TweetID(apiTweet.InReplyToStatusID)
// Process URLs and link previews
for _, url := range apiTweet.Entities.URLs {
var url_object Url
if apiTweet.Card.ShortenedUrl == url.ShortenedUrl {
@ -107,6 +110,7 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
ret.Urls = append(ret.Urls, url_object)
}
// Process images
for _, media := range apiTweet.Entities.Media {
if media.Type != "photo" { // TODO: remove this eventually
panic_str := fmt.Sprintf("Unknown media type: %q", media.Type)
@ -116,13 +120,16 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
new_image.TweetID = ret.ID
ret.Images = append(ret.Images, new_image)
}
// Process hashtags
for _, hashtag := range apiTweet.Entities.Hashtags {
ret.Hashtags = append(ret.Hashtags, hashtag.Text)
}
// Process `@` mentions and reply-mentions
for _, mention := range apiTweet.Entities.Mentions {
ret.Mentions = append(ret.Mentions, UserHandle(mention.UserName))
}
for _, mention := range strings.Split(apiTweet.Entities.ReplyMentions, " ") {
if mention != "" {
if mention[0] != '@' {
@ -134,6 +141,7 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
ret.QuotedTweetID = TweetID(apiTweet.QuotedStatusID)
// Process videos
for _, entity := range apiTweet.ExtendedEntities.Media {
if entity.Type != "video" && entity.Type != "animated_gif" {
continue
@ -146,6 +154,14 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
ret.Images = []Image{}
}
// Process polls
if strings.Index(apiTweet.Card.Name, "poll") == 0 {
poll := ParseAPIPoll(apiTweet.Card)
poll.TweetID = ret.ID
ret.Polls = []Poll{poll}
}
// Process tombstones
ret.TombstoneType = apiTweet.TombstoneText
ret.IsStub = !(ret.TombstoneType == "")

View File

@ -51,6 +51,10 @@ func TestParseSingleTweet(t *testing.T) {
if tweet.QuotedTweetID != 0 {
t.Errorf("Incorrectly believes it quote-tweets tweet with ID %d", tweet.QuotedTweetID)
}
if len(tweet.Polls) != 0 {
t.Errorf("Should not have any polls")
}
}
func TestParseTweetWithImage(t *testing.T) {
@ -85,6 +89,10 @@ func TestParseTweetWithQuotedTweetAsLink(t *testing.T) {
if tweet.QuotedTweetID != expected_quoted_id {
t.Errorf("Should be a quoted tweet with ID %d, but got %d instead", expected_quoted_id, tweet.QuotedTweetID)
}
if len(tweet.Polls) != 0 {
t.Errorf("Should not have any polls")
}
}
func TestParseTweetWithVideo(t *testing.T) {
@ -136,6 +144,10 @@ func TestParseTweetWithUrl(t *testing.T) {
if tweet.Urls[0].Domain != expected_url_domain {
t.Errorf("Expected Url text to be %q, but got %q", expected_url_domain, tweet.Urls[0].Domain)
}
if len(tweet.Polls) != 0 {
t.Errorf("Should not have any polls")
}
}
func TestParseTweetWithUrlButNoCard(t *testing.T) {
@ -173,6 +185,10 @@ func TestParseTweetWithMultipleUrls(t *testing.T) {
if tweet.Urls[2].Title != expected_title {
t.Errorf("Expected title to be %q, but got %q", expected_title, tweet.Urls[2].Title)
}
if len(tweet.Polls) != 0 {
t.Errorf("Should not have any polls")
}
}
func TestTweetWithLotsOfReplyMentions(t *testing.T) {
@ -188,6 +204,68 @@ func TestTweetWithLotsOfReplyMentions(t *testing.T) {
}
}
func TestTweetWithPoll(t *testing.T) {
tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_poll_4_choices.json")
if len(tweet.Polls) != 1 {
t.Fatalf("Expected there to be 1 poll, but there was %d", len(tweet.Polls))
}
p := tweet.Polls[0]
if p.TweetID != tweet.ID {
t.Errorf("Poll's TweetID (%d) should match the tweet's ID (%d)", p.TweetID, tweet.ID)
}
if p.NumChoices != 4 {
t.Errorf("Expected %d choices, got %d instead", 4, p.NumChoices)
}
expected_choice1 := "Tribal armband"
if p.Choice1 != expected_choice1 {
t.Errorf("Expected choice1 %q, got %q", expected_choice1, p.Choice1)
}
expected_choice2 := "Marijuana leaf"
if p.Choice2 != expected_choice2 {
t.Errorf("Expected choice2 %q, got %q", expected_choice2, p.Choice2)
}
expected_choice3 := "Butterfly"
if p.Choice3 != expected_choice3 {
t.Errorf("Expected choice3 %q, got %q", expected_choice3, p.Choice3)
}
expected_choice4 := "Maple leaf"
if p.Choice4 != expected_choice4 {
t.Errorf("Expected choice4 %q, got %q", expected_choice4, p.Choice4)
}
expected_votes1 := 1593
expected_votes2 := 624
expected_votes3 := 778
expected_votes4 := 1138
if p.Choice1_Votes != expected_votes1 {
t.Errorf("Expected Choice1_Votes %d, got %d", expected_votes1, p.Choice1_Votes)
}
if p.Choice2_Votes != expected_votes2 {
t.Errorf("Expected Choice2_Votes %d, got %d", expected_votes2, p.Choice2_Votes)
}
if p.Choice3_Votes != expected_votes3 {
t.Errorf("Expected Choice3_Votes %d, got %d", expected_votes3, p.Choice3_Votes)
}
if p.Choice4_Votes != expected_votes4 {
t.Errorf("Expected Choice4_Votes %d, got %d", expected_votes4, p.Choice4_Votes)
}
expected_duration := 1440 * 60
if p.VotingDuration != expected_duration {
t.Errorf("Expected voting duration %d seconds, got %d", expected_duration, p.VotingDuration)
}
expected_ends_at := int64(1638331934)
if p.VotingEndsAt.Unix() != expected_ends_at {
t.Errorf("Expected voting ends at %d (unix), got %d", expected_ends_at, p.VotingEndsAt.Unix())
}
expected_last_updated_at := int64(1638331935)
if p.LastUpdatedAt.Unix() != expected_last_updated_at {
t.Errorf("Expected updated %d, got %d", expected_last_updated_at, p.LastUpdatedAt.Unix())
}
}
func TestParseTweetResponse(t *testing.T) {
data, err := ioutil.ReadFile("test_responses/michael_malice_feed.json")