diff --git a/persistence/media_queries.go b/persistence/media_queries.go index 7947ee7..1224c8c 100644 --- a/persistence/media_queries.go +++ b/persistence/media_queries.go @@ -1,6 +1,8 @@ package persistence import ( + "time" + "offline_twitter/scraper" ) @@ -58,6 +60,27 @@ func (p Profile) SaveUrl(url scraper.Url) error { return err } +/** + * Save a Poll + */ +func (p Profile) SavePoll(poll scraper.Poll) error { + _, err := p.DB.Exec(` + insert into polls (tweet_id, num_choices, choice1, choice1_votes, choice2, choice2_votes, choice3, choice3_votes, choice4, choice4_votes, voting_duration, voting_ends_at, last_scraped_at) + values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + on conflict do update + set choice1_votes=?, + choice2_votes=?, + choice3_votes=?, + choice4_votes=?, + last_scraped_at=? + `, + poll.TweetID, poll.NumChoices, poll.Choice1, poll.Choice1_Votes, poll.Choice2, poll.Choice2_Votes, poll.Choice3, poll.Choice3_Votes, poll.Choice4, poll.Choice4_Votes, poll.VotingDuration, poll.VotingEndsAt.Unix(), poll.LastUpdatedAt.Unix(), + poll.Choice1_Votes, poll.Choice2_Votes, poll.Choice3_Votes, poll.Choice4_Votes, poll.LastUpdatedAt.Unix(), + ) + return err +} + + /** * Get the list of images for a tweet */ @@ -134,3 +157,32 @@ func (p Profile) GetUrlsForTweet(t scraper.Tweet) (urls []scraper.Url, err error } return } + +/** + * Get the list of Polls for a Tweet + */ +func (p Profile) GetPollsForTweet(t scraper.Tweet) (polls []scraper.Poll, err error) { + stmt, err := p.DB.Prepare("select num_choices, choice1, choice1_votes, choice2, choice2_votes, choice3, choice3_votes, choice4, choice4_votes, voting_duration, voting_ends_at, last_scraped_at from polls where tweet_id=?") + if err != nil { + return + } + defer stmt.Close() + rows, err := stmt.Query(t.ID) + if err != nil { + return + } + var poll scraper.Poll + var voting_ends_at int + var last_scraped_at int + for rows.Next() { + err = rows.Scan(&poll.NumChoices, &poll.Choice1, &poll.Choice1_Votes, &poll.Choice2, &poll.Choice2_Votes, &poll.Choice3, &poll.Choice3_Votes, &poll.Choice4, &poll.Choice4_Votes, &poll.VotingDuration, &voting_ends_at, &last_scraped_at) + if err != nil { + return + } + poll.TweetID = t.ID + poll.VotingEndsAt = time.Unix(int64(voting_ends_at), 0) + poll.LastUpdatedAt = time.Unix(int64(last_scraped_at), 0) + polls = append(polls, poll) + } + return +} diff --git a/persistence/media_queries_test.go b/persistence/media_queries_test.go index 32fd012..d3610b4 100644 --- a/persistence/media_queries_test.go +++ b/persistence/media_queries_test.go @@ -244,3 +244,42 @@ func TestModifyUrl(t *testing.T) { t.Error(diff) } } + + +/** + * Create a Poll, save it, reload it, and make sure it comes back the same + */ +func TestSaveAndLoadPoll(t *testing.T) { + profile_path := "test_profiles/TestMediaQueries" + profile := create_or_load_profile(profile_path) + + tweet := create_stable_tweet() + + poll := create_poll_from_id(rand.Int()) + poll.TweetID = tweet.ID + + // Save the Poll + err := profile.SavePoll(poll) + if err != nil { + t.Fatalf("Failed to save the poll: %s", err.Error()) + } + + // Reload the Poll + polls, err := profile.GetPollsForTweet(tweet) + if err != nil { + t.Fatalf("Could not load poll: %s", err.Error()) + } + + var new_poll scraper.Poll + for index := range polls { + if polls[index].Choice1 == poll.Choice1 { + new_poll = polls[index] + } + } + if new_poll.Choice1 != poll.Choice1 { + t.Fatalf("Could not find poll for some reason: %s, %s; %+v", new_poll.Choice1, poll.Choice1, polls) + } + if diff := deep.Equal(poll, new_poll); diff != nil { + t.Error(diff) + } +} diff --git a/persistence/schema.sql b/persistence/schema.sql index 814e398..a80083d 100644 --- a/persistence/schema.sql +++ b/persistence/schema.sql @@ -81,6 +81,27 @@ create table urls (rowid integer primary key, foreign key(tweet_id) references tweets(id) ); +create table polls (rowid integer primary key, + tweet_id integer not null, + num_choices integer not null, + + choice1 text, + choice1_votes integer, + choice2 text, + choice2_votes integer, + choice3 text, + choice3_votes integer, + choice4 text, + choice4_votes integer, + + voting_duration integer not null, -- in seconds + voting_ends_at integer not null, + + last_scraped_at integer not null, + + foreign key(tweet_id) references tweets(id) +); + create table images (rowid integer primary key, id integer unique not null check(typeof(id) = 'integer'), tweet_id integer not null, diff --git a/persistence/tweet_queries.go b/persistence/tweet_queries.go index 85436e1..69e6676 100644 --- a/persistence/tweet_queries.go +++ b/persistence/tweet_queries.go @@ -57,6 +57,12 @@ func (p Profile) SaveTweet(t scraper.Tweet) error { return err } } + for _, poll := range t.Polls { + err := p.SavePoll(poll) + if err != nil { + return err + } + } err = tx.Commit() if err != nil { @@ -138,6 +144,12 @@ func (p Profile) GetTweetById(id scraper.TweetID) (scraper.Tweet, error) { } t.Videos = vids + polls, err := p.GetPollsForTweet(t) + if err != nil { + return t, err + } + t.Polls = polls + urls, err := p.GetUrlsForTweet(t) t.Urls = urls diff --git a/persistence/utils_test.go b/persistence/utils_test.go index b3fe82f..f8d73dc 100644 --- a/persistence/utils_test.go +++ b/persistence/utils_test.go @@ -119,6 +119,26 @@ func create_url_from_id(id int) scraper.Url { } } + +/** + * Create a semi-stable Poll based on the given ID + */ +func create_poll_from_id(id int) scraper.Poll { + s := fmt.Sprint(id) + return scraper.Poll{ + TweetID: -1, + NumChoices: 2, + Choice1: s, + Choice1_Votes: 1000, + Choice2: "Not " + s, + Choice2_Votes: 1500, + VotingDuration: 10, + VotingEndsAt: time.Unix(10000000, 0), + LastUpdatedAt: time.Unix(10000, 0), + } +} + + /** * Create a stable tweet with a fixed ID and content */ @@ -144,6 +164,9 @@ func create_stable_tweet() scraper.Tweet { }, Mentions: []scraper.UserHandle{}, Hashtags: []string{}, + Polls: []scraper.Poll{ + create_poll_from_id(-1), + }, } } @@ -207,6 +230,9 @@ func create_dummy_tweet() scraper.Tweet { url2 := create_url_from_id(rand.Int()) url2.TweetID = tweet_id + poll := create_poll_from_id(rand.Int()) + poll.TweetID = tweet_id + return scraper.Tweet{ ID: tweet_id, UserID: -1, @@ -222,6 +248,7 @@ func create_dummy_tweet() scraper.Tweet { Mentions: []scraper.UserHandle{"mention1", "mention2"}, ReplyMentions: []scraper.UserHandle{"replymention1", "replymention2"}, Hashtags: []string{"hash1", "hash2"}, + Polls: []scraper.Poll{poll}, } } diff --git a/scraper/test_responses/single_tweets/tweet_with_poll_4_choices.json b/scraper/test_responses/single_tweets/tweet_with_poll_4_choices.json new file mode 100644 index 0000000..2b204ae --- /dev/null +++ b/scraper/test_responses/single_tweets/tweet_with_poll_4_choices.json @@ -0,0 +1 @@ +{"created_at":"Tue Nov 30 04:12:14 +0000 2021","id_str":"1465534109573390348","full_text":"Which of these tattoos would you get if you had to get one on your arm?","display_text_range":[0,71],"entities":{},"source":"Twitter for Android","user_id_str":"44067298","retweet_count":13,"favorite_count":118,"reply_count":1,"quote_count":18,"conversation_id_str":"1465534109573390348","conversation_control":{"policy":"community","conversation_owner":{"screen_name":"michaelmalice"}},"possibly_sensitive_editable":true,"card":{"name":"poll4choice_text_only","url":"card://1465534108923314180","card_type_url":"http://card-type-url-is-deprecated.invalid","binding_values":{"choice1_label":{"type":"STRING","string_value":"Tribal armband"},"choice2_label":{"type":"STRING","string_value":"Marijuana leaf"},"end_datetime_utc":{"type":"STRING","string_value":"2021-12-01T04:12:14Z"},"counts_are_final":{"type":"BOOLEAN","boolean_value":true},"choice2_count":{"type":"STRING","string_value":"624"},"choice1_count":{"type":"STRING","string_value":"1593"},"choice4_label":{"type":"STRING","string_value":"Maple leaf"},"last_updated_datetime_utc":{"type":"STRING","string_value":"2021-12-01T04:12:15Z"},"duration_minutes":{"type":"STRING","string_value":"1440"},"choice3_count":{"type":"STRING","string_value":"778"},"choice4_count":{"type":"STRING","string_value":"1138"},"choice3_label":{"type":"STRING","string_value":"Butterfly"},"api":{"type":"STRING","string_value":"capi://passthrough/1"},"card_url":{"type":"STRING","string_value":"https://twitter.com","scribe_key":"card_url"}},"card_platform":{"platform":{"device":{"name":"Swift","version":"12"},"audience":{"name":"production"}}}},"lang":"en","self_thread":{"id_str":"1465534109573390348"}} diff --git a/scraper/tweet.go b/scraper/tweet.go index ceaa29f..bac5dca 100644 --- a/scraper/tweet.go +++ b/scraper/tweet.go @@ -25,12 +25,13 @@ type Tweet struct { InReplyToID TweetID QuotedTweetID TweetID - Urls []Url Images []Image Videos []Video Mentions []UserHandle ReplyMentions []UserHandle Hashtags []string + Urls []Url + Polls []Poll TombstoneType string IsStub bool @@ -84,6 +85,7 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) { ret.UserID = UserID(apiTweet.UserID) ret.Text = apiTweet.FullText + // Process "posted-at" date and time if apiTweet.TombstoneText == "" { // Skip time parsing for tombstones ret.PostedAt, err = time.Parse(time.RubyDate, apiTweet.CreatedAt) if err != nil { @@ -97,6 +99,7 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) { ret.NumQuoteTweets = apiTweet.QuoteCount ret.InReplyToID = TweetID(apiTweet.InReplyToStatusID) + // Process URLs and link previews for _, url := range apiTweet.Entities.URLs { var url_object Url if apiTweet.Card.ShortenedUrl == url.ShortenedUrl { @@ -107,6 +110,7 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) { ret.Urls = append(ret.Urls, url_object) } + // Process images for _, media := range apiTweet.Entities.Media { if media.Type != "photo" { // TODO: remove this eventually panic_str := fmt.Sprintf("Unknown media type: %q", media.Type) @@ -116,13 +120,16 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) { new_image.TweetID = ret.ID ret.Images = append(ret.Images, new_image) } + + // Process hashtags for _, hashtag := range apiTweet.Entities.Hashtags { ret.Hashtags = append(ret.Hashtags, hashtag.Text) } + + // Process `@` mentions and reply-mentions for _, mention := range apiTweet.Entities.Mentions { ret.Mentions = append(ret.Mentions, UserHandle(mention.UserName)) } - for _, mention := range strings.Split(apiTweet.Entities.ReplyMentions, " ") { if mention != "" { if mention[0] != '@' { @@ -134,6 +141,7 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) { ret.QuotedTweetID = TweetID(apiTweet.QuotedStatusID) + // Process videos for _, entity := range apiTweet.ExtendedEntities.Media { if entity.Type != "video" && entity.Type != "animated_gif" { continue @@ -146,6 +154,14 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) { ret.Images = []Image{} } + // Process polls + if strings.Index(apiTweet.Card.Name, "poll") == 0 { + poll := ParseAPIPoll(apiTweet.Card) + poll.TweetID = ret.ID + ret.Polls = []Poll{poll} + } + + // Process tombstones ret.TombstoneType = apiTweet.TombstoneText ret.IsStub = !(ret.TombstoneType == "") diff --git a/scraper/tweet_test.go b/scraper/tweet_test.go index 680aeea..896279f 100644 --- a/scraper/tweet_test.go +++ b/scraper/tweet_test.go @@ -51,6 +51,10 @@ func TestParseSingleTweet(t *testing.T) { if tweet.QuotedTweetID != 0 { t.Errorf("Incorrectly believes it quote-tweets tweet with ID %d", tweet.QuotedTweetID) } + + if len(tweet.Polls) != 0 { + t.Errorf("Should not have any polls") + } } func TestParseTweetWithImage(t *testing.T) { @@ -85,6 +89,10 @@ func TestParseTweetWithQuotedTweetAsLink(t *testing.T) { if tweet.QuotedTweetID != expected_quoted_id { t.Errorf("Should be a quoted tweet with ID %d, but got %d instead", expected_quoted_id, tweet.QuotedTweetID) } + + if len(tweet.Polls) != 0 { + t.Errorf("Should not have any polls") + } } func TestParseTweetWithVideo(t *testing.T) { @@ -136,6 +144,10 @@ func TestParseTweetWithUrl(t *testing.T) { if tweet.Urls[0].Domain != expected_url_domain { t.Errorf("Expected Url text to be %q, but got %q", expected_url_domain, tweet.Urls[0].Domain) } + + if len(tweet.Polls) != 0 { + t.Errorf("Should not have any polls") + } } func TestParseTweetWithUrlButNoCard(t *testing.T) { @@ -173,6 +185,10 @@ func TestParseTweetWithMultipleUrls(t *testing.T) { if tweet.Urls[2].Title != expected_title { t.Errorf("Expected title to be %q, but got %q", expected_title, tweet.Urls[2].Title) } + + if len(tweet.Polls) != 0 { + t.Errorf("Should not have any polls") + } } func TestTweetWithLotsOfReplyMentions(t *testing.T) { @@ -188,6 +204,68 @@ func TestTweetWithLotsOfReplyMentions(t *testing.T) { } } +func TestTweetWithPoll(t *testing.T) { + tweet := load_tweet_from_file("test_responses/single_tweets/tweet_with_poll_4_choices.json") + + if len(tweet.Polls) != 1 { + t.Fatalf("Expected there to be 1 poll, but there was %d", len(tweet.Polls)) + } + p := tweet.Polls[0] + + if p.TweetID != tweet.ID { + t.Errorf("Poll's TweetID (%d) should match the tweet's ID (%d)", p.TweetID, tweet.ID) + } + if p.NumChoices != 4 { + t.Errorf("Expected %d choices, got %d instead", 4, p.NumChoices) + } + expected_choice1 := "Tribal armband" + if p.Choice1 != expected_choice1 { + t.Errorf("Expected choice1 %q, got %q", expected_choice1, p.Choice1) + } + expected_choice2 := "Marijuana leaf" + if p.Choice2 != expected_choice2 { + t.Errorf("Expected choice2 %q, got %q", expected_choice2, p.Choice2) + } + expected_choice3 := "Butterfly" + if p.Choice3 != expected_choice3 { + t.Errorf("Expected choice3 %q, got %q", expected_choice3, p.Choice3) + } + expected_choice4 := "Maple leaf" + if p.Choice4 != expected_choice4 { + t.Errorf("Expected choice4 %q, got %q", expected_choice4, p.Choice4) + } + + expected_votes1 := 1593 + expected_votes2 := 624 + expected_votes3 := 778 + expected_votes4 := 1138 + if p.Choice1_Votes != expected_votes1 { + t.Errorf("Expected Choice1_Votes %d, got %d", expected_votes1, p.Choice1_Votes) + } + if p.Choice2_Votes != expected_votes2 { + t.Errorf("Expected Choice2_Votes %d, got %d", expected_votes2, p.Choice2_Votes) + } + if p.Choice3_Votes != expected_votes3 { + t.Errorf("Expected Choice3_Votes %d, got %d", expected_votes3, p.Choice3_Votes) + } + if p.Choice4_Votes != expected_votes4 { + t.Errorf("Expected Choice4_Votes %d, got %d", expected_votes4, p.Choice4_Votes) + } + + expected_duration := 1440 * 60 + if p.VotingDuration != expected_duration { + t.Errorf("Expected voting duration %d seconds, got %d", expected_duration, p.VotingDuration) + } + expected_ends_at := int64(1638331934) + if p.VotingEndsAt.Unix() != expected_ends_at { + t.Errorf("Expected voting ends at %d (unix), got %d", expected_ends_at, p.VotingEndsAt.Unix()) + } + expected_last_updated_at := int64(1638331935) + if p.LastUpdatedAt.Unix() != expected_last_updated_at { + t.Errorf("Expected updated %d, got %d", expected_last_updated_at, p.LastUpdatedAt.Unix()) + } +} + func TestParseTweetResponse(t *testing.T) { data, err := ioutil.ReadFile("test_responses/michael_malice_feed.json")