Add parsing of twitter polls

This commit is contained in:
Alessio 2021-11-07 21:10:24 -08:00
parent 4793cf8a4d
commit 658f3c338f
5 changed files with 190 additions and 0 deletions

View File

@ -77,6 +77,44 @@ type APICard struct {
Url string `json:"url"`
} `json:"image_value"`
} `json:"player_image_large"`
// For polls
Choice1 struct {
StringValue string `json:"string_value"`
} `json:"choice1_label"`
Choice2 struct {
StringValue string `json:"string_value"`
} `json:"choice2_label"`
Choice3 struct {
StringValue string `json:"string_value"`
} `json:"choice3_label"`
Choice4 struct {
StringValue string `json:"string_value"`
} `json:"choice4_label"`
Choice1_Count struct {
StringValue string `json:"string_value"`
} `json:"choice1_count"`
Choice2_Count struct {
StringValue string `json:"string_value"`
} `json:"choice2_count"`
Choice3_Count struct {
StringValue string `json:"string_value"`
} `json:"choice3_count"`
Choice4_Count struct {
StringValue string `json:"string_value"`
} `json:"choice4_count"`
EndDatetimeUTC struct {
StringValue string `json:"string_value"`
} `json:"end_datetime_utc"`
CountsAreFinal struct {
BooleanValue bool `json:"boolean_value"`
} `json:"counts_are_final"`
DurationMinutes struct {
StringValue string `json:"string_value"`
} `json:"duration_minutes"`
} `json:"binding_values"`
}

70
scraper/poll.go Normal file
View File

@ -0,0 +1,70 @@
package scraper
import (
"time"
"strings"
"strconv"
)
type Poll struct {
TweetID TweetID
NumChoices int
Choice1 string
Choice1_Votes int
Choice2 string
Choice2_Votes int
Choice3 string
Choice3_Votes int
Choice4 string
Choice4_Votes int
VotingDuration int // In seconds
VotingEndsAt time.Time
LastScrapedAt time.Time
}
func ParseAPIPoll(apiCard APICard) Poll {
voting_ends_at, err := time.Parse(time.RFC3339, apiCard.BindingValues.EndDatetimeUTC.StringValue)
if err != nil {
panic(err)
}
ret := Poll{}
ret.NumChoices = parse_num_choices(apiCard.Name)
ret.VotingDuration = int_or_panic(apiCard.BindingValues.DurationMinutes.StringValue) * 60
ret.VotingEndsAt = voting_ends_at
ret.Choice1 = apiCard.BindingValues.Choice1.StringValue
ret.Choice1_Votes = int_or_panic(apiCard.BindingValues.Choice1_Count.StringValue)
ret.Choice2 = apiCard.BindingValues.Choice2.StringValue
ret.Choice2_Votes = int_or_panic(apiCard.BindingValues.Choice2_Count.StringValue)
if ret.NumChoices > 2 {
ret.Choice3 = apiCard.BindingValues.Choice3.StringValue
ret.Choice3_Votes = int_or_panic(apiCard.BindingValues.Choice3_Count.StringValue)
}
if ret.NumChoices > 3 {
ret.Choice4 = apiCard.BindingValues.Choice4.StringValue
ret.Choice4_Votes = int_or_panic(apiCard.BindingValues.Choice4_Count.StringValue)
}
return ret
}
func parse_num_choices(card_name string) int {
if strings.Index(card_name, "poll") != 0 || strings.Index(card_name, "choice") != 5 {
panic("Not valid card name: " + card_name)
}
return int_or_panic(card_name[4:5])
}
func int_or_panic(s string) int {
result, err := strconv.Atoi(s)
if err != nil {
panic(err)
}
return result
}

80
scraper/poll_test.go Normal file
View File

@ -0,0 +1,80 @@
package scraper_test
import (
"testing"
"io/ioutil"
"encoding/json"
"offline_twitter/scraper"
)
func TestParsePoll2Choices(t *testing.T) {
data, err := ioutil.ReadFile("test_responses/tweet_content/poll_card_2_options.json")
if err != nil {
panic(err)
}
var apiCard scraper.APICard
err = json.Unmarshal(data, &apiCard)
if err != nil {
t.Fatal(err.Error())
}
poll := scraper.ParseAPIPoll(apiCard)
if poll.NumChoices != 2 {
t.Errorf("Expected %d choices, got %d", 2, poll.NumChoices)
}
if poll.VotingDuration != 60 * 60 * 24 {
t.Errorf("Expected duratino %d, got %d", 60 * 60 * 24, poll.VotingDuration)
}
expected_ending := int64(1636397201)
if poll.VotingEndsAt.Unix() != expected_ending {
t.Errorf("Expected closing time %d, got %d", expected_ending, poll.VotingEndsAt.Unix())
}
if poll.Choice1 != "Yes" || poll.Choice2 != "No" {
t.Errorf("Expected %q and %q, got %q and %q", "Yes", "No", poll.Choice1, poll.Choice2)
}
if poll.Choice1_Votes != 529 {
t.Errorf("Expected %d votes for choice 1, got %d", 529, poll.Choice1_Votes)
}
if poll.Choice2_Votes != 2182 {
t.Errorf("Expected %d votes for choice 2, got %d", 2182, poll.Choice2_Votes)
}
}
func TestParsePoll4Choices(t *testing.T) {
data, err := ioutil.ReadFile("test_responses/tweet_content/poll_card_4_options_ended.json")
if err != nil {
panic(err)
}
var apiCard scraper.APICard
err = json.Unmarshal(data, &apiCard)
if err != nil {
t.Fatal(err.Error())
}
poll := scraper.ParseAPIPoll(apiCard)
if poll.NumChoices != 4 {
t.Errorf("Expected %d choices, got %d", 4, poll.NumChoices)
}
if poll.VotingDuration != 60 * 60 * 24 {
t.Errorf("Expected duratino %d, got %d", 60 * 60 * 24, poll.VotingDuration)
}
expected_ending := int64(1635966221)
if poll.VotingEndsAt.Unix() != expected_ending {
t.Errorf("Expected closing time %d, got %d", expected_ending, poll.VotingEndsAt.Unix())
}
if poll.Choice1 != "Alec Baldwin" || poll.Choice1_Votes != 1669 {
t.Errorf("Expected %q with %d, got %q with %d", "Alec Baldwin", 1669, poll.Choice1, poll.Choice1_Votes)
}
if poll.Choice2 != "Andew Cuomo" || poll.Choice2_Votes != 272 {
t.Errorf("Expected %q with %d, got %q with %d", "Andew Cuomo", 272, poll.Choice2, poll.Choice2_Votes)
}
if poll.Choice3 != "George Floyd" || poll.Choice3_Votes != 829 {
t.Errorf("Expected %q with %d, got %q with %d", "George Floyd", 829, poll.Choice3, poll.Choice3_Votes)
}
if poll.Choice4 != "Derek Chauvin" || poll.Choice4_Votes != 2397 {
t.Errorf("Expected %q with %d, got %q with %d", "Derek Chauvin", 2397, poll.Choice4, poll.Choice4_Votes)
}
}

View File

@ -0,0 +1 @@
{"name":"poll2choice_text_only","url":"card://1457419248461131776","card_type_url":"http://card-type-url-is-deprecated.invalid","binding_values":{"choice1_label":{"type":"STRING","string_value":"Yes"},"choice2_label":{"type":"STRING","string_value":"No"},"end_datetime_utc":{"type":"STRING","string_value":"2021-11-08T18:46:41Z"},"counts_are_final":{"type":"BOOLEAN"},"choice2_count":{"type":"STRING","string_value":"2182"},"choice1_count":{"type":"STRING","string_value":"529"},"last_updated_datetime_utc":{"type":"STRING","string_value":"2021-11-07T20:59:15Z"},"duration_minutes":{"type":"STRING","string_value":"1440"},"api":{"type":"STRING","string_value":"capi://passthrough/1"},"card_url":{"type":"STRING","string_value":"https://twitter.com","scribe_key":"card_url"}},"card_platform":{"platform":{"device":{"name":"Swift","version":"12"},"audience":{"name":"production"}}}}

View File

@ -0,0 +1 @@
{"name":"poll4choice_text_only","url":"card://1455611588854140929","card_type_url":"http://card-type-url-is-deprecated.invalid","binding_values":{"choice1_label":{"type":"STRING","string_value":"Alec Baldwin"},"choice2_label":{"type":"STRING","string_value":"Andew Cuomo"},"end_datetime_utc":{"type":"STRING","string_value":"2021-11-03T19:03:41Z"},"counts_are_final":{"type":"BOOLEAN","boolean_value":true},"choice2_count":{"type":"STRING","string_value":"272"},"choice1_count":{"type":"STRING","string_value":"1669"},"choice4_label":{"type":"STRING","string_value":"Derek Chauvin"},"last_updated_datetime_utc":{"type":"STRING","string_value":"2021-11-03T19:03:46Z"},"duration_minutes":{"type":"STRING","string_value":"1440"},"choice3_count":{"type":"STRING","string_value":"829"},"choice4_count":{"type":"STRING","string_value":"2397"},"choice3_label":{"type":"STRING","string_value":"George Floyd"},"api":{"type":"STRING","string_value":"capi://passthrough/1"},"card_url":{"type":"STRING","string_value":"https://twitter.com","scribe_key":"card_url"}},"card_platform":{"platform":{"device":{"name":"Swift","version":"12"},"audience":{"name":"production"}}}}