diff --git a/scraper/api_types.go b/scraper/api_types.go index 42b0810..adaf77b 100644 --- a/scraper/api_types.go +++ b/scraper/api_types.go @@ -77,6 +77,44 @@ type APICard struct { Url string `json:"url"` } `json:"image_value"` } `json:"player_image_large"` + + // For polls + Choice1 struct { + StringValue string `json:"string_value"` + } `json:"choice1_label"` + Choice2 struct { + StringValue string `json:"string_value"` + } `json:"choice2_label"` + Choice3 struct { + StringValue string `json:"string_value"` + } `json:"choice3_label"` + Choice4 struct { + StringValue string `json:"string_value"` + } `json:"choice4_label"` + + Choice1_Count struct { + StringValue string `json:"string_value"` + } `json:"choice1_count"` + Choice2_Count struct { + StringValue string `json:"string_value"` + } `json:"choice2_count"` + Choice3_Count struct { + StringValue string `json:"string_value"` + } `json:"choice3_count"` + Choice4_Count struct { + StringValue string `json:"string_value"` + } `json:"choice4_count"` + + EndDatetimeUTC struct { + StringValue string `json:"string_value"` + } `json:"end_datetime_utc"` + CountsAreFinal struct { + BooleanValue bool `json:"boolean_value"` + } `json:"counts_are_final"` + DurationMinutes struct { + StringValue string `json:"string_value"` + } `json:"duration_minutes"` + } `json:"binding_values"` } diff --git a/scraper/poll.go b/scraper/poll.go new file mode 100644 index 0000000..e0f760b --- /dev/null +++ b/scraper/poll.go @@ -0,0 +1,70 @@ +package scraper + +import ( + "time" + "strings" + "strconv" +) + +type Poll struct { + TweetID TweetID + NumChoices int + + Choice1 string + Choice1_Votes int + Choice2 string + Choice2_Votes int + Choice3 string + Choice3_Votes int + Choice4 string + Choice4_Votes int + + VotingDuration int // In seconds + VotingEndsAt time.Time + + LastScrapedAt time.Time +} + +func ParseAPIPoll(apiCard APICard) Poll { + voting_ends_at, err := time.Parse(time.RFC3339, apiCard.BindingValues.EndDatetimeUTC.StringValue) + if err != nil { + panic(err) + } + + ret := Poll{} + ret.NumChoices = parse_num_choices(apiCard.Name) + ret.VotingDuration = int_or_panic(apiCard.BindingValues.DurationMinutes.StringValue) * 60 + ret.VotingEndsAt = voting_ends_at + + ret.Choice1 = apiCard.BindingValues.Choice1.StringValue + ret.Choice1_Votes = int_or_panic(apiCard.BindingValues.Choice1_Count.StringValue) + ret.Choice2 = apiCard.BindingValues.Choice2.StringValue + ret.Choice2_Votes = int_or_panic(apiCard.BindingValues.Choice2_Count.StringValue) + + if ret.NumChoices > 2 { + ret.Choice3 = apiCard.BindingValues.Choice3.StringValue + ret.Choice3_Votes = int_or_panic(apiCard.BindingValues.Choice3_Count.StringValue) + } + if ret.NumChoices > 3 { + ret.Choice4 = apiCard.BindingValues.Choice4.StringValue + ret.Choice4_Votes = int_or_panic(apiCard.BindingValues.Choice4_Count.StringValue) + } + + return ret +} + +func parse_num_choices(card_name string) int { + if strings.Index(card_name, "poll") != 0 || strings.Index(card_name, "choice") != 5 { + panic("Not valid card name: " + card_name) + } + + return int_or_panic(card_name[4:5]) +} + +func int_or_panic(s string) int { + result, err := strconv.Atoi(s) + if err != nil { + panic(err) + } + return result +} diff --git a/scraper/poll_test.go b/scraper/poll_test.go new file mode 100644 index 0000000..188ab6e --- /dev/null +++ b/scraper/poll_test.go @@ -0,0 +1,80 @@ +package scraper_test + +import ( + "testing" + "io/ioutil" + "encoding/json" + + "offline_twitter/scraper" +) + +func TestParsePoll2Choices(t *testing.T) { + data, err := ioutil.ReadFile("test_responses/tweet_content/poll_card_2_options.json") + if err != nil { + panic(err) + } + var apiCard scraper.APICard + err = json.Unmarshal(data, &apiCard) + if err != nil { + t.Fatal(err.Error()) + } + + poll := scraper.ParseAPIPoll(apiCard) + if poll.NumChoices != 2 { + t.Errorf("Expected %d choices, got %d", 2, poll.NumChoices) + } + if poll.VotingDuration != 60 * 60 * 24 { + t.Errorf("Expected duratino %d, got %d", 60 * 60 * 24, poll.VotingDuration) + } + expected_ending := int64(1636397201) + if poll.VotingEndsAt.Unix() != expected_ending { + t.Errorf("Expected closing time %d, got %d", expected_ending, poll.VotingEndsAt.Unix()) + } + + if poll.Choice1 != "Yes" || poll.Choice2 != "No" { + t.Errorf("Expected %q and %q, got %q and %q", "Yes", "No", poll.Choice1, poll.Choice2) + } + if poll.Choice1_Votes != 529 { + t.Errorf("Expected %d votes for choice 1, got %d", 529, poll.Choice1_Votes) + } + if poll.Choice2_Votes != 2182 { + t.Errorf("Expected %d votes for choice 2, got %d", 2182, poll.Choice2_Votes) + } +} + +func TestParsePoll4Choices(t *testing.T) { + data, err := ioutil.ReadFile("test_responses/tweet_content/poll_card_4_options_ended.json") + if err != nil { + panic(err) + } + var apiCard scraper.APICard + err = json.Unmarshal(data, &apiCard) + if err != nil { + t.Fatal(err.Error()) + } + + poll := scraper.ParseAPIPoll(apiCard) + if poll.NumChoices != 4 { + t.Errorf("Expected %d choices, got %d", 4, poll.NumChoices) + } + if poll.VotingDuration != 60 * 60 * 24 { + t.Errorf("Expected duratino %d, got %d", 60 * 60 * 24, poll.VotingDuration) + } + expected_ending := int64(1635966221) + if poll.VotingEndsAt.Unix() != expected_ending { + t.Errorf("Expected closing time %d, got %d", expected_ending, poll.VotingEndsAt.Unix()) + } + + if poll.Choice1 != "Alec Baldwin" || poll.Choice1_Votes != 1669 { + t.Errorf("Expected %q with %d, got %q with %d", "Alec Baldwin", 1669, poll.Choice1, poll.Choice1_Votes) + } + if poll.Choice2 != "Andew Cuomo" || poll.Choice2_Votes != 272 { + t.Errorf("Expected %q with %d, got %q with %d", "Andew Cuomo", 272, poll.Choice2, poll.Choice2_Votes) + } + if poll.Choice3 != "George Floyd" || poll.Choice3_Votes != 829 { + t.Errorf("Expected %q with %d, got %q with %d", "George Floyd", 829, poll.Choice3, poll.Choice3_Votes) + } + if poll.Choice4 != "Derek Chauvin" || poll.Choice4_Votes != 2397 { + t.Errorf("Expected %q with %d, got %q with %d", "Derek Chauvin", 2397, poll.Choice4, poll.Choice4_Votes) + } +} diff --git a/scraper/test_responses/tweet_content/poll_card_2_options.json b/scraper/test_responses/tweet_content/poll_card_2_options.json new file mode 100644 index 0000000..39862d0 --- /dev/null +++ b/scraper/test_responses/tweet_content/poll_card_2_options.json @@ -0,0 +1 @@ +{"name":"poll2choice_text_only","url":"card://1457419248461131776","card_type_url":"http://card-type-url-is-deprecated.invalid","binding_values":{"choice1_label":{"type":"STRING","string_value":"Yes"},"choice2_label":{"type":"STRING","string_value":"No"},"end_datetime_utc":{"type":"STRING","string_value":"2021-11-08T18:46:41Z"},"counts_are_final":{"type":"BOOLEAN"},"choice2_count":{"type":"STRING","string_value":"2182"},"choice1_count":{"type":"STRING","string_value":"529"},"last_updated_datetime_utc":{"type":"STRING","string_value":"2021-11-07T20:59:15Z"},"duration_minutes":{"type":"STRING","string_value":"1440"},"api":{"type":"STRING","string_value":"capi://passthrough/1"},"card_url":{"type":"STRING","string_value":"https://twitter.com","scribe_key":"card_url"}},"card_platform":{"platform":{"device":{"name":"Swift","version":"12"},"audience":{"name":"production"}}}} diff --git a/scraper/test_responses/tweet_content/poll_card_4_options_ended.json b/scraper/test_responses/tweet_content/poll_card_4_options_ended.json new file mode 100644 index 0000000..85d0a1e --- /dev/null +++ b/scraper/test_responses/tweet_content/poll_card_4_options_ended.json @@ -0,0 +1 @@ +{"name":"poll4choice_text_only","url":"card://1455611588854140929","card_type_url":"http://card-type-url-is-deprecated.invalid","binding_values":{"choice1_label":{"type":"STRING","string_value":"Alec Baldwin"},"choice2_label":{"type":"STRING","string_value":"Andew Cuomo"},"end_datetime_utc":{"type":"STRING","string_value":"2021-11-03T19:03:41Z"},"counts_are_final":{"type":"BOOLEAN","boolean_value":true},"choice2_count":{"type":"STRING","string_value":"272"},"choice1_count":{"type":"STRING","string_value":"1669"},"choice4_label":{"type":"STRING","string_value":"Derek Chauvin"},"last_updated_datetime_utc":{"type":"STRING","string_value":"2021-11-03T19:03:46Z"},"duration_minutes":{"type":"STRING","string_value":"1440"},"choice3_count":{"type":"STRING","string_value":"829"},"choice4_count":{"type":"STRING","string_value":"2397"},"choice3_label":{"type":"STRING","string_value":"George Floyd"},"api":{"type":"STRING","string_value":"capi://passthrough/1"},"card_url":{"type":"STRING","string_value":"https://twitter.com","scribe_key":"card_url"}},"card_platform":{"platform":{"device":{"name":"Swift","version":"12"},"audience":{"name":"production"}}}}