Add parsing of new GraphQL twitter api for user feeds
This commit is contained in:
parent
8250725d2c
commit
fb421215df
310
scraper/api_types_v2.go
Normal file
310
scraper/api_types_v2.go
Normal file
@ -0,0 +1,310 @@
|
||||
package scraper
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"io/ioutil"
|
||||
"time"
|
||||
"encoding/json"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type APIV2Tweet struct {
|
||||
APITweet
|
||||
RetweetedStatusResult struct {
|
||||
Result struct {
|
||||
ID int `json:"rest_id,string"`
|
||||
Legacy APITweet `json:"legacy"`
|
||||
Core struct {
|
||||
UserResults struct {
|
||||
Result struct {
|
||||
ID int64 `json:"rest_id,string"`
|
||||
Legacy APIUser `json:"legacy"`
|
||||
} `json:"result"`
|
||||
} `json:"user_results"`
|
||||
} `json:"core"`
|
||||
QuotedStatusResult struct {
|
||||
Result struct {
|
||||
ID int64 `json:"rest_id,string"`
|
||||
Legacy APITweet `json:"legacy"`
|
||||
Core struct {
|
||||
UserResults struct {
|
||||
Result struct {
|
||||
ID int64 `json:"rest_id,string"`
|
||||
Legacy APIUser `json:"legacy"`
|
||||
} `json:"result"`
|
||||
} `json:"user_results"`
|
||||
} `json:"core"`
|
||||
} `json:"result"`
|
||||
} `json:"quoted_status_result"`
|
||||
} `json:"result"`
|
||||
} `json:"retweeted_status_result"`
|
||||
}
|
||||
|
||||
type APIV2Response struct {
|
||||
Data struct {
|
||||
User struct {
|
||||
Result struct {
|
||||
Timeline struct {
|
||||
Timeline struct {
|
||||
Instructions []struct {
|
||||
Type string `json:"type"`
|
||||
Entries []struct {
|
||||
EntryID string `json:"entryId"`
|
||||
SortIndex int64 `json:"sortIndex,string"`
|
||||
Content struct {
|
||||
ItemContent struct {
|
||||
EntryType string `json:"entryType"`
|
||||
TweetResults struct {
|
||||
Result struct {
|
||||
Legacy APIV2Tweet `json:"legacy"`
|
||||
Core struct {
|
||||
UserResults struct {
|
||||
Result struct {
|
||||
ID int64 `json:"rest_id,string"`
|
||||
Legacy APIUser `json:"legacy"`
|
||||
} `json:"result"`
|
||||
} `json:"user_results"`
|
||||
} `json:"core"`
|
||||
QuotedStatusResult struct { // Same as "Result"
|
||||
Result struct {
|
||||
ID int64 `json:"rest_id,string"`
|
||||
Legacy APIV2Tweet `json:"legacy"`
|
||||
Core struct {
|
||||
UserResults struct {
|
||||
Result struct {
|
||||
ID int64 `json:"rest_id,string"`
|
||||
Legacy APIUser `json:"legacy"`
|
||||
} `json:"result"`
|
||||
} `json:"user_results"`
|
||||
} `json:"core"`
|
||||
} `json:"result"`
|
||||
} `json:"quoted_status_result"`
|
||||
} `json:"result"`
|
||||
} `json:"tweet_results"`
|
||||
} `json:"itemContent"`
|
||||
|
||||
// Cursors
|
||||
EntryType string `json:"entryType"`
|
||||
Value string `json:"value"`
|
||||
CursorType string `json:"cursorType"`
|
||||
|
||||
} `json:"content"`
|
||||
} `json:"entries"`
|
||||
} `json:"instructions"`
|
||||
} `json:"timeline"`
|
||||
} `json:"timeline"`
|
||||
} `json:"result"`
|
||||
} `json:"user"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
func (api_response APIV2Response) GetCursorBottom() string {
|
||||
entries := api_response.Data.User.Result.Timeline.Timeline.Instructions[0].Entries
|
||||
last_entry := entries[len(entries) - 1]
|
||||
if last_entry.Content.CursorType != "Bottom" {
|
||||
panic("No bottom cursor found")
|
||||
}
|
||||
|
||||
return last_entry.Content.Value
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Parse the collected API response and turn it into a TweetTrove
|
||||
*/
|
||||
func (api_response APIV2Response) ToTweetTrove() (TweetTrove, error) {
|
||||
ret := NewTweetTrove()
|
||||
for _, entry := range api_response.Data.User.Result.Timeline.Timeline.Instructions[0].Entries { // TODO: the second Instruction is the pinned tweet
|
||||
if !strings.HasPrefix(entry.EntryID, "tweet-") {
|
||||
// println(entry.EntryID)
|
||||
continue
|
||||
}
|
||||
|
||||
result := entry.Content.ItemContent.TweetResults.Result
|
||||
apiv2_tweet := result.Legacy
|
||||
apiv2_user_result := result.Core.UserResults.Result
|
||||
apiv2_retweeted_tweet_result := apiv2_tweet.RetweetedStatusResult.Result
|
||||
apiv2_retweeted_tweet_user := apiv2_retweeted_tweet_result.Core.UserResults.Result
|
||||
apiv2_retweeted_quoted_result := apiv2_retweeted_tweet_result.QuotedStatusResult.Result
|
||||
apiv2_retweeted_quoted_user := apiv2_retweeted_quoted_result.Core.UserResults.Result
|
||||
apiv2_quoted_tweet_result := result.QuotedStatusResult.Result
|
||||
apiv2_quoted_user_result := apiv2_quoted_tweet_result.Core.UserResults.Result
|
||||
|
||||
// Handle case of retweet (main tweet doesn't get parsed other than retweeted_at)
|
||||
if apiv2_retweeted_tweet_result.ID != 0 {
|
||||
orig_tweet, err := ParseSingleTweet(apiv2_retweeted_tweet_result.Legacy)
|
||||
if err != nil {
|
||||
return TweetTrove{}, err
|
||||
}
|
||||
ret.Tweets[orig_tweet.ID] = orig_tweet
|
||||
|
||||
orig_user, err := ParseSingleUser(apiv2_retweeted_tweet_user.Legacy)
|
||||
if err != nil {
|
||||
return TweetTrove{}, err
|
||||
}
|
||||
orig_user.ID = UserID(apiv2_retweeted_tweet_user.ID)
|
||||
ret.Users[orig_user.ID] = orig_user
|
||||
|
||||
retweeting_user, err := ParseSingleUser(apiv2_user_result.Legacy)
|
||||
if err != nil {
|
||||
return TweetTrove{}, err
|
||||
}
|
||||
retweeting_user.ID = UserID(apiv2_user_result.ID)
|
||||
ret.Users[retweeting_user.ID] = retweeting_user
|
||||
|
||||
retweet := Retweet{}
|
||||
retweet.RetweetID = TweetID(apiv2_tweet.ID)
|
||||
retweet.TweetID = TweetID(orig_tweet.ID)
|
||||
retweet.RetweetedByID = retweeting_user.ID
|
||||
retweet.RetweetedAt, err = time.Parse(time.RubyDate, apiv2_tweet.CreatedAt)
|
||||
if err != nil {
|
||||
fmt.Printf("%v\n", apiv2_tweet)
|
||||
panic(err)
|
||||
}
|
||||
ret.Retweets[retweet.RetweetID] = retweet
|
||||
|
||||
// Handle quoted tweet
|
||||
if apiv2_retweeted_quoted_result.ID != 0 {
|
||||
quoted_tweet, err := ParseSingleTweet(apiv2_retweeted_quoted_result.Legacy)
|
||||
if err != nil {
|
||||
return TweetTrove{}, err
|
||||
}
|
||||
ret.Tweets[quoted_tweet.ID] = quoted_tweet
|
||||
|
||||
quoted_user, err := ParseSingleUser(apiv2_retweeted_quoted_user.Legacy)
|
||||
if err != nil {
|
||||
return TweetTrove{}, err
|
||||
}
|
||||
quoted_user.ID = UserID(apiv2_retweeted_quoted_user.ID)
|
||||
ret.Users[quoted_user.ID] = quoted_user
|
||||
}
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
// The main tweet
|
||||
tweet, err := ParseSingleTweet(apiv2_tweet.APITweet)
|
||||
if err != nil {
|
||||
return TweetTrove{}, err
|
||||
}
|
||||
ret.Tweets[tweet.ID] = tweet
|
||||
|
||||
user, err := ParseSingleUser(apiv2_user_result.Legacy)
|
||||
if err != nil {
|
||||
return TweetTrove{}, err
|
||||
}
|
||||
user.ID = UserID(apiv2_user_result.ID)
|
||||
ret.Users[user.ID] = user
|
||||
|
||||
// Handle quoted tweet
|
||||
if apiv2_quoted_tweet_result.ID != 0 {
|
||||
quoted_tweet, err := ParseSingleTweet(apiv2_quoted_tweet_result.Legacy.APITweet)
|
||||
if err != nil {
|
||||
return TweetTrove{}, err
|
||||
}
|
||||
ret.Tweets[quoted_tweet.ID] = quoted_tweet
|
||||
|
||||
quoted_user, err := ParseSingleUser(apiv2_quoted_user_result.Legacy)
|
||||
if err != nil {
|
||||
return TweetTrove{}, err
|
||||
}
|
||||
quoted_user.ID = UserID(apiv2_quoted_user_result.ID)
|
||||
ret.Users[quoted_user.ID] = quoted_user
|
||||
}
|
||||
}
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
|
||||
func get_graphql_user_timeline_url(user_id UserID, cursor string) string {
|
||||
if cursor != "" {
|
||||
return "https://twitter.com/i/api/graphql/CwLU7qTfeu0doqhSr6tW4A/UserTweetsAndReplies?variables=%7B%22userId%22%3A%22" + fmt.Sprint(user_id) + "%22%2C%22count%22%3A40%2C%22cursor%22%3A%22" + url.QueryEscape(cursor) + "%22%2C%22includePromotedContent%22%3Atrue%2C%22withCommunity%22%3Atrue%2C%22withSuperFollowsUserFields%22%3Atrue%2C%22withBirdwatchPivots%22%3Afalse%2C%22withDownvotePerspective%22%3Afalse%2C%22withReactionsMetadata%22%3Afalse%2C%22withReactionsPerspective%22%3Afalse%2C%22withSuperFollowsTweetFields%22%3Atrue%2C%22withVoice%22%3Atrue%2C%22withV2Timeline%22%3Afalse%2C%22__fs_interactive_text%22%3Afalse%2C%22__fs_responsive_web_uc_gql_enabled%22%3Afalse%2C%22__fs_dont_mention_me_view_api_enabled%22%3Afalse%7D"
|
||||
}
|
||||
return "https://twitter.com/i/api/graphql/CwLU7qTfeu0doqhSr6tW4A/UserTweetsAndReplies?variables=%7B%22userId%22%3A%22" + fmt.Sprint(user_id) + "%22%2C%22count%22%3A40%2C%22includePromotedContent%22%3Afalse%2C%22withCommunity%22%3Atrue%2C%22withSuperFollowsUserFields%22%3Atrue%2C%22withBirdwatchPivots%22%3Afalse%2C%22withDownvotePerspective%22%3Afalse%2C%22withReactionsMetadata%22%3Afalse%2C%22withReactionsPerspective%22%3Afalse%2C%22withSuperFollowsTweetFields%22%3Atrue%2C%22withVoice%22%3Atrue%2C%22withV2Timeline%22%3Afalse%2C%22__fs_interactive_text%22%3Afalse%2C%22__fs_dont_mention_me_view_api_enabled%22%3Afalse%7D"
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a User feed using the new GraphQL twitter api
|
||||
*/
|
||||
func (api API) GetGraphqlFeedFor(user_id UserID, cursor string) (APIV2Response, error) {
|
||||
client := &http.Client{Timeout: 10 * time.Second}
|
||||
req, err := http.NewRequest("GET", get_graphql_user_timeline_url(user_id, cursor), nil)
|
||||
if err != nil {
|
||||
return APIV2Response{}, err
|
||||
}
|
||||
|
||||
err = ApiRequestAddTokens(req)
|
||||
if err != nil {
|
||||
return APIV2Response{}, err
|
||||
}
|
||||
|
||||
if cursor != "" {
|
||||
UpdateQueryCursor(req, cursor, false)
|
||||
}
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return APIV2Response{}, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
content, _ := ioutil.ReadAll(resp.Body)
|
||||
s := ""
|
||||
for header := range resp.Header {
|
||||
s += fmt.Sprintf(" %s: %s\n", header, resp.Header.Get(header))
|
||||
}
|
||||
return APIV2Response{}, fmt.Errorf("HTTP %s\n%s\n%s", resp.Status, s, content)
|
||||
}
|
||||
|
||||
body, err := ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return APIV2Response{}, err
|
||||
}
|
||||
fmt.Println(string(body))
|
||||
|
||||
var response APIV2Response
|
||||
err = json.Unmarshal(body, &response)
|
||||
return response, err
|
||||
}
|
||||
|
||||
/**
|
||||
* Resend the request to get more tweets if necessary
|
||||
*
|
||||
* args:
|
||||
* - user_id: the user's UserID
|
||||
* - response: an "out" parameter; the APIV2Response that tweets, RTs and users will be appended to
|
||||
* - min_tweets: the desired minimum amount of tweets to get
|
||||
*/
|
||||
func (api API) GetMoreTweetsFromGraphqlFeed(user_id UserID, response *APIV2Response, min_tweets int) error {
|
||||
// TODO user-feed-infinite-fetch: what if you reach the end of the user's timeline? Might loop
|
||||
// forever getting no new tweets
|
||||
last_response := response
|
||||
for last_response.GetCursorBottom() != "" && len(response.Data.User.Result.Timeline.Timeline.Instructions[0].Entries) < min_tweets {
|
||||
fresh_response, err := api.GetGraphqlFeedFor(user_id, last_response.GetCursorBottom())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if fresh_response.GetCursorBottom() == last_response.GetCursorBottom() && len(fresh_response.Data.User.Result.Timeline.Timeline.Instructions[0].Entries) == 0 {
|
||||
// Empty response, cursor same as previous: end of feed has been reached
|
||||
return END_OF_FEED
|
||||
}
|
||||
if len(fresh_response.Data.User.Result.Timeline.Timeline.Instructions[0].Entries) == 0 {
|
||||
// Response has a pinned tweet, but no other content: end of feed has been reached
|
||||
return END_OF_FEED // TODO: check that there actually is a pinned tweet and the request didn't just fail lol
|
||||
}
|
||||
|
||||
last_response = &fresh_response
|
||||
|
||||
// Copy over the entries
|
||||
response.Data.User.Result.Timeline.Timeline.Instructions[0].Entries = append(
|
||||
response.Data.User.Result.Timeline.Timeline.Instructions[0].Entries,
|
||||
last_response.Data.User.Result.Timeline.Timeline.Instructions[0].Entries...)
|
||||
|
||||
fmt.Printf("Have %d entries so far\n", len(response.Data.User.Result.Timeline.Timeline.Instructions[0].Entries))
|
||||
}
|
||||
return nil
|
||||
}
|
309
scraper/api_types_v2_test.go
Normal file
309
scraper/api_types_v2_test.go
Normal file
@ -0,0 +1,309 @@
|
||||
package scraper_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"io/ioutil"
|
||||
"encoding/json"
|
||||
|
||||
"offline_twitter/scraper"
|
||||
)
|
||||
|
||||
|
||||
// Check a plain old tweet
|
||||
func TestAPIV2FeedSimpleTweet(t *testing.T) {
|
||||
data, err := ioutil.ReadFile("test_responses/feeds_api_v2/feed_simple_tweet.json")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
var feed scraper.APIV2Response
|
||||
err = json.Unmarshal(data, &feed)
|
||||
if err != nil {
|
||||
t.Errorf(err.Error())
|
||||
}
|
||||
|
||||
tweet_trove, err := feed.ToTweetTrove()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
if len(tweet_trove.Users) != 1 {
|
||||
t.Errorf("Expected 1 user, got %d", len(tweet_trove.Users))
|
||||
}
|
||||
user := tweet_trove.Users[44067298]
|
||||
if user.ID != 44067298 {
|
||||
t.Errorf("Expected ID %d, got %d", 44067298, user.ID)
|
||||
}
|
||||
if user.DisplayName != "Michael Malice" {
|
||||
t.Errorf("Expected display name %q, got %q", "Michael Malice", user.DisplayName)
|
||||
}
|
||||
|
||||
|
||||
if len(tweet_trove.Tweets) != 1 {
|
||||
t.Errorf("Expected %d tweets, got %d", 1, len(tweet_trove.Tweets))
|
||||
}
|
||||
tweet := tweet_trove.Tweets[1485708879174508550]
|
||||
if tweet.ID != 1485708879174508550 {
|
||||
t.Errorf("Expected ID 1485708879174508550, got %d", tweet.ID)
|
||||
}
|
||||
if tweet.UserID != scraper.UserID(44067298) {
|
||||
t.Errorf("Expected user ID 44067298, got %d", tweet.UserID)
|
||||
}
|
||||
expected_text := "If Boris Johnson is driven out of office, it wouldn't mark the first time the Tories had four PMs in a row\nThey had previously governed the UK for 13 years with 4 PMs, from 1951-1964"
|
||||
if tweet.Text != expected_text {
|
||||
t.Errorf("Expected text: %q, got: %q", expected_text, tweet.Text)
|
||||
}
|
||||
|
||||
if len(tweet_trove.Retweets) != 0 {
|
||||
t.Errorf("Shouldn't be any retweets")
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Check a retweet
|
||||
func TestAPIV2FeedRetweet(t *testing.T) {
|
||||
data, err := ioutil.ReadFile("test_responses/feeds_api_v2/feed_simple_retweet.json")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
var feed scraper.APIV2Response
|
||||
err = json.Unmarshal(data, &feed)
|
||||
if err != nil {
|
||||
t.Errorf(err.Error())
|
||||
}
|
||||
|
||||
tweet_trove, err := feed.ToTweetTrove()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
// Should fetch both the retweeting and retweeted users
|
||||
if len(tweet_trove.Users) != 2 {
|
||||
t.Errorf("Expected %d users, got %d", 2, len(tweet_trove.Users))
|
||||
}
|
||||
user := tweet_trove.Users[44067298]
|
||||
if user.ID != 44067298 {
|
||||
t.Errorf("Expected ID %d, got %d", 44067298, user.ID)
|
||||
}
|
||||
if user.DisplayName != "Michael Malice" {
|
||||
t.Errorf("Expected display name %q, got %q", "Michael Malice", user.DisplayName)
|
||||
}
|
||||
|
||||
retweeted_user := tweet_trove.Users[1326229737551912960]
|
||||
if retweeted_user.ID != 1326229737551912960 {
|
||||
t.Errorf("Expected ID %d, got %d", 1326229737551912960, retweeted_user.ID)
|
||||
}
|
||||
if retweeted_user.Handle != "libsoftiktok" {
|
||||
t.Errorf("Expected handle %q, got %q", "libsoftiktok", retweeted_user.Handle)
|
||||
}
|
||||
|
||||
// Should only be 1 tweet, the retweeted one
|
||||
if len(tweet_trove.Tweets) != 1 {
|
||||
t.Errorf("Expected %d tweets, got %d", 1, len(tweet_trove.Tweets))
|
||||
}
|
||||
tweet, ok := tweet_trove.Tweets[1485694028620316673]
|
||||
if !ok {
|
||||
t.Fatalf("Didn't get the tweet")
|
||||
}
|
||||
if tweet.ID != 1485694028620316673 {
|
||||
t.Errorf("Expected ID %d, got %d", 1485694028620316673, tweet.ID)
|
||||
}
|
||||
if tweet.UserID != scraper.UserID(1326229737551912960) {
|
||||
t.Errorf("Expected user ID %d, got %d", 1326229737551912960, tweet.UserID)
|
||||
}
|
||||
expected_text := "More mask madness, this time in an elevator. The mask police are really nuts https://t.co/3BpvLjdJwD"
|
||||
if tweet.Text != expected_text {
|
||||
t.Errorf("Expected text: %q, got: %q", expected_text, tweet.Text)
|
||||
}
|
||||
|
||||
// Should be 1 retweet
|
||||
if len(tweet_trove.Retweets) != 1 {
|
||||
t.Errorf("Expected %d retweets, got %d", 1, len(tweet_trove.Retweets))
|
||||
}
|
||||
retweet := tweet_trove.Retweets[1485699748514476037]
|
||||
if retweet.RetweetID != 1485699748514476037 {
|
||||
t.Errorf("Expected RetweetID %d, got %d", 1485699748514476037, retweet.RetweetID)
|
||||
}
|
||||
if retweet.TweetID != 1485694028620316673 {
|
||||
t.Errorf("Expected TweetID 1485694028620316673, got %d", retweet.TweetID)
|
||||
}
|
||||
if retweet.RetweetedAt.Unix() != 1643053397 {
|
||||
t.Errorf("Expected retweeted_at %d, got %d", 1643053397, retweet.RetweetedAt.Unix())
|
||||
}
|
||||
if retweet.RetweetedByID != scraper.UserID(44067298) {
|
||||
t.Errorf("Expected retweeted_by 44067298, got %d", retweet.RetweetedByID)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Check a quote-tweet
|
||||
func TestAPIV2FeedQuoteTweet(t *testing.T) {
|
||||
data, err := ioutil.ReadFile("test_responses/feeds_api_v2/feed_quote_tweet.json")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
var feed scraper.APIV2Response
|
||||
err = json.Unmarshal(data, &feed)
|
||||
if err != nil {
|
||||
t.Errorf(err.Error())
|
||||
}
|
||||
|
||||
tweet_trove, err := feed.ToTweetTrove()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
// Should be 2 users: quoter and quoted
|
||||
if len(tweet_trove.Users) != 2 {
|
||||
t.Errorf("Expected %d users, got %d", 2, len(tweet_trove.Users))
|
||||
}
|
||||
quoting_user := tweet_trove.Users[44067298]
|
||||
if quoting_user.ID != 44067298 {
|
||||
t.Errorf("Expected quoting user ID %d, got %d", 44067298, quoting_user.ID)
|
||||
}
|
||||
quoted_user := tweet_trove.Users[892155218292617217]
|
||||
if quoted_user.ID != 892155218292617217 {
|
||||
t.Errorf("Expected quoted user ID %d, got %d", 892155218292617217, quoted_user.ID)
|
||||
}
|
||||
expected_quoted_bio := "Creator of Little Homes and Mooncars"
|
||||
if quoted_user.Bio != expected_quoted_bio {
|
||||
t.Errorf("Expected bio %q, got %q", expected_quoted_bio, quoted_user.Bio)
|
||||
}
|
||||
|
||||
|
||||
// Should be 2 tweets: quote-tweet and quoted-tweet
|
||||
if len(tweet_trove.Tweets) != 2 {
|
||||
t.Errorf("Expected %d tweets, got %d", 2, len(tweet_trove.Tweets))
|
||||
}
|
||||
quoted_tweet := tweet_trove.Tweets[1485690069079846915]
|
||||
if quoted_tweet.ID != 1485690069079846915 {
|
||||
t.Errorf("Expected quoted ID %d, got %d", 1485690069079846915, quoted_tweet.ID)
|
||||
}
|
||||
expected_quoted_text := "The Left hates the Right so much that they won't let them leave the Union. I don't get it."
|
||||
if quoted_tweet.Text != expected_quoted_text {
|
||||
t.Errorf("Expected text %q, got %q", expected_quoted_text, quoted_tweet.Text)
|
||||
}
|
||||
quote_tweet := tweet_trove.Tweets[1485690410899021826]
|
||||
if quote_tweet.ID != 1485690410899021826 {
|
||||
t.Errorf("Expected quoting ID %d, got %d", 1485690410899021826, quote_tweet.ID)
|
||||
}
|
||||
if quote_tweet.QuotedTweetID != 1485690069079846915 {
|
||||
t.Errorf("Expected to be quoting tweet ID %d, got %d", 1485690069079846915, quote_tweet.QuotedTweetID)
|
||||
}
|
||||
|
||||
|
||||
// No retweets
|
||||
if len(tweet_trove.Retweets) != 0 {
|
||||
t.Errorf("Shouldn't be any retweets")
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Check a retweeted quote-tweet
|
||||
func TestAPIV2FeedRetweetedQuoteTweet(t *testing.T) {
|
||||
data, err := ioutil.ReadFile("test_responses/feeds_api_v2/feed_retweeted_quote_tweet.json")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
var feed scraper.APIV2Response
|
||||
err = json.Unmarshal(data, &feed)
|
||||
if err != nil {
|
||||
t.Errorf(err.Error())
|
||||
}
|
||||
|
||||
tweet_trove, err := feed.ToTweetTrove()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
// 3 Users: quoted, quoter, and retweeter
|
||||
if len(tweet_trove.Users) != 3 {
|
||||
t.Errorf("Expected %d users, got %d", 3, len(tweet_trove.Users))
|
||||
}
|
||||
retweeting_user := tweet_trove.Users[599817378]
|
||||
if retweeting_user.ID != 599817378 {
|
||||
t.Errorf("Expected retweeting user ID %d, got %d", 599817378, retweeting_user.ID)
|
||||
}
|
||||
if retweeting_user.Website != "https://www.youtube.com/highlyrespected" {
|
||||
t.Errorf("Expected RTing user website %q, got %q", "https://www.youtube.com/highlyrespected", retweeting_user.Website)
|
||||
}
|
||||
retweeted_user := tweet_trove.Users[1434720042193760256]
|
||||
if retweeted_user.ID != 1434720042193760256 {
|
||||
t.Errorf("Expected retweed user ID %d, got %d", 1434720042193760256, retweeted_user.ID)
|
||||
}
|
||||
if retweeted_user.FollowersCount != 17843 {
|
||||
t.Errorf("Expected %d followers, got %d", 17843, retweeted_user.FollowersCount)
|
||||
}
|
||||
quoted_user := tweet_trove.Users[14347972]
|
||||
if quoted_user.ID != 14347972 {
|
||||
t.Errorf("Expected quoted user ID %d, got %d", 14347972, quoted_user.ID)
|
||||
}
|
||||
if quoted_user.IsVerified != true {
|
||||
t.Errorf("Expected quoted user to be verified")
|
||||
}
|
||||
|
||||
|
||||
// Quoted tweet and quoting tweet
|
||||
if len(tweet_trove.Tweets) != 2 {
|
||||
t.Errorf("Expected %d tweets, got %d", 2, len(tweet_trove.Tweets))
|
||||
}
|
||||
|
||||
// The retweet
|
||||
if len(tweet_trove.Retweets) != 1 {
|
||||
t.Errorf("Expected %d retweets, got %d", 1, len(tweet_trove.Retweets))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
func TestParseAPIV2UserFeed(t *testing.T) {
|
||||
data, err := ioutil.ReadFile("test_responses/feeds_api_v2/user_feed_apiv2.json")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
var feed scraper.APIV2Response
|
||||
err = json.Unmarshal(data, &feed)
|
||||
if err != nil {
|
||||
t.Errorf(err.Error())
|
||||
}
|
||||
|
||||
tweet_trove, err := feed.ToTweetTrove()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
// Check users
|
||||
user := tweet_trove.Users[44067298]
|
||||
if user.ID != 44067298 {
|
||||
t.Errorf("Expected ID %d, got %d", 44067298, user.ID)
|
||||
}
|
||||
if user.DisplayName != "Michael Malice" {
|
||||
t.Errorf("Expected display name %q, got %q", "Michael Malice", user.DisplayName)
|
||||
}
|
||||
|
||||
retweeted_user := tweet_trove.Users[1326229737551912960]
|
||||
if retweeted_user.ID != 1326229737551912960 {
|
||||
t.Errorf("Expected ID %d, got %d", 1326229737551912960, retweeted_user.ID)
|
||||
}
|
||||
if retweeted_user.Handle != "libsoftiktok" {
|
||||
t.Errorf("Expected handle %q, got %q", "libsoftiktok", retweeted_user.Handle)
|
||||
}
|
||||
|
||||
quote_tweeted_user := tweet_trove.Users[892155218292617217]
|
||||
if quote_tweeted_user.ID != 892155218292617217 {
|
||||
t.Errorf("Expected ID %d, got %d", 892155218292617217, quote_tweeted_user.ID)
|
||||
}
|
||||
|
||||
// Check retweets
|
||||
if len(tweet_trove.Retweets) != 2 {
|
||||
t.Errorf("Expected %d retweets but got %d", 2, len(tweet_trove.Retweets))
|
||||
}
|
||||
|
||||
// Test cursor-bottom
|
||||
bottom_cursor := feed.GetCursorBottom()
|
||||
if bottom_cursor != "HBaYgL2Fp/T7nCkAAA==" {
|
||||
t.Errorf("Expected cursor %q, got %q", "HBaYgL2Fp/T7nCkAAA==", bottom_cursor)
|
||||
}
|
||||
|
||||
|
||||
println(len(tweet_trove.Users))
|
||||
}
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
1
scraper/test_responses/feeds_api_v2/user_feed_apiv2.json
Normal file
1
scraper/test_responses/feeds_api_v2/user_feed_apiv2.json
Normal file
File diff suppressed because one or more lines are too long
31
scraper/tweet_trove.go
Normal file
31
scraper/tweet_trove.go
Normal file
@ -0,0 +1,31 @@
|
||||
package scraper
|
||||
|
||||
type TweetTrove struct {
|
||||
Tweets map[TweetID]Tweet
|
||||
Users map[UserID]User
|
||||
Retweets map[TweetID]Retweet
|
||||
}
|
||||
|
||||
func NewTweetTrove() TweetTrove {
|
||||
ret := TweetTrove{}
|
||||
ret.Tweets = make(map[TweetID]Tweet)
|
||||
ret.Users = make(map[UserID]User)
|
||||
ret.Retweets = make(map[TweetID]Retweet)
|
||||
return ret
|
||||
}
|
||||
|
||||
/**
|
||||
* Make it compatible with previous silly interface if needed
|
||||
*/
|
||||
func (trove TweetTrove) Transform() (tweets []Tweet, retweets []Retweet, users []User) {
|
||||
for _, val := range trove.Tweets {
|
||||
tweets = append(tweets, val)
|
||||
}
|
||||
for _, val := range trove.Users {
|
||||
users = append(users, val)
|
||||
}
|
||||
for _, val := range trove.Retweets {
|
||||
retweets = append(retweets, val)
|
||||
}
|
||||
return
|
||||
} // TODO: refactor until this function isn't needed anymore
|
Loading…
x
Reference in New Issue
Block a user