Incorporate the new parsing structure into the actual program

This commit is contained in:
Alessio 2022-02-01 16:04:52 -08:00
parent b2199e9b59
commit 1bc6aec3b5
6 changed files with 22 additions and 448 deletions

View File

@ -114,6 +114,23 @@ func (api_v2_tweet APIV2Tweet) ToTweetTrove() TweetTrove {
return ret return ret
} }
type APIV2Entry struct {
EntryID string `json:"entryId"`
SortIndex int64 `json:"sortIndex,string"`
Content struct {
ItemContent struct {
EntryType string `json:"entryType"`
TweetResults APIV2Result `json:"tweet_results"`
} `json:"itemContent"`
// Cursors
EntryType string `json:"entryType"`
Value string `json:"value"`
CursorType string `json:"cursorType"`
} `json:"content"`
}
type APIV2Response struct { type APIV2Response struct {
Data struct { Data struct {
User struct { User struct {
@ -122,108 +139,7 @@ type APIV2Response struct {
Timeline struct { Timeline struct {
Instructions []struct { Instructions []struct {
Type string `json:"type"` Type string `json:"type"`
Entries []struct { Entries []APIV2Entry`json:"entries"`
EntryID string `json:"entryId"`
SortIndex int64 `json:"sortIndex,string"`
Content struct {
ItemContent struct {
EntryType string `json:"entryType"`
TweetResults struct {
Result struct {
Legacy struct {
APITweet
RetweetedStatusResult struct {
Result struct {
ID int `json:"rest_id,string"`
Legacy APITweet `json:"legacy"`
Core struct {
UserResults struct {
Result struct {
ID int64 `json:"rest_id,string"`
Legacy APIUser `json:"legacy"`
} `json:"result"`
} `json:"user_results"`
} `json:"core"`
QuotedStatusResult struct {
Result struct {
ID int64 `json:"rest_id,string"`
Legacy APITweet `json:"legacy"`
Core struct {
UserResults struct {
Result struct {
ID int64 `json:"rest_id,string"`
Legacy APIUser `json:"legacy"`
} `json:"result"`
} `json:"user_results"`
} `json:"core"`
} `json:"result"`
} `json:"quoted_status_result"`
} `json:"result"`
} `json:"retweeted_status_result"`
} `json:"legacy"`
Core struct {
UserResults struct {
Result struct {
ID int64 `json:"rest_id,string"`
Legacy APIUser `json:"legacy"`
} `json:"result"`
} `json:"user_results"`
} `json:"core"`
QuotedStatusResult struct { // Same as "Result"
Result struct {
ID int64 `json:"rest_id,string"`
Legacy struct {
APITweet
RetweetedStatusResult struct {
Result struct {
ID int `json:"rest_id,string"`
Legacy APITweet `json:"legacy"`
Core struct {
UserResults struct {
Result struct {
ID int64 `json:"rest_id,string"`
Legacy APIUser `json:"legacy"`
} `json:"result"`
} `json:"user_results"`
} `json:"core"`
QuotedStatusResult struct {
Result struct {
ID int64 `json:"rest_id,string"`
Legacy APITweet `json:"legacy"`
Core struct {
UserResults struct {
Result struct {
ID int64 `json:"rest_id,string"`
Legacy APIUser `json:"legacy"`
} `json:"result"`
} `json:"user_results"`
} `json:"core"`
} `json:"result"`
} `json:"quoted_status_result"`
} `json:"result"`
} `json:"retweeted_status_result"`
} `json:"legacy"`
Core struct {
UserResults struct {
Result struct {
ID int64 `json:"rest_id,string"`
Legacy APIUser `json:"legacy"`
} `json:"result"`
} `json:"user_results"`
} `json:"core"`
} `json:"result"`
} `json:"quoted_status_result"`
} `json:"result"`
} `json:"tweet_results"`
} `json:"itemContent"`
// Cursors
EntryType string `json:"entryType"`
Value string `json:"value"`
CursorType string `json:"cursorType"`
} `json:"content"`
} `json:"entries"`
} `json:"instructions"` } `json:"instructions"`
} `json:"timeline"` } `json:"timeline"`
} `json:"timeline"` } `json:"timeline"`
@ -242,7 +158,6 @@ func (api_response APIV2Response) GetCursorBottom() string {
return last_entry.Content.Value return last_entry.Content.Value
} }
/** /**
* Parse the collected API response and turn it into a TweetTrove * Parse the collected API response and turn it into a TweetTrove
*/ */
@ -250,102 +165,15 @@ func (api_response APIV2Response) ToTweetTrove() (TweetTrove, error) {
ret := NewTweetTrove() ret := NewTweetTrove()
for _, entry := range api_response.Data.User.Result.Timeline.Timeline.Instructions[0].Entries { // TODO: the second Instruction is the pinned tweet for _, entry := range api_response.Data.User.Result.Timeline.Timeline.Instructions[0].Entries { // TODO: the second Instruction is the pinned tweet
if !strings.HasPrefix(entry.EntryID, "tweet-") { if !strings.HasPrefix(entry.EntryID, "tweet-") {
// println(entry.EntryID)
continue continue
} }
result := entry.Content.ItemContent.TweetResults.Result result := entry.Content.ItemContent.TweetResults
apiv2_tweet := result.Legacy
apiv2_user_result := result.Core.UserResults.Result
apiv2_retweeted_tweet_result := apiv2_tweet.RetweetedStatusResult.Result
apiv2_retweeted_tweet_user := apiv2_retweeted_tweet_result.Core.UserResults.Result
apiv2_retweeted_quoted_result := apiv2_retweeted_tweet_result.QuotedStatusResult.Result
apiv2_retweeted_quoted_user := apiv2_retweeted_quoted_result.Core.UserResults.Result
apiv2_quoted_tweet_result := result.QuotedStatusResult.Result
apiv2_quoted_user_result := apiv2_quoted_tweet_result.Core.UserResults.Result
// Handle case of retweet (main tweet doesn't get parsed other than retweeted_at) main_trove := result.ToTweetTrove()
if apiv2_retweeted_tweet_result.ID != 0 { ret.MergeWith(main_trove)
orig_tweet, err := ParseSingleTweet(apiv2_retweeted_tweet_result.Legacy)
if err != nil {
return TweetTrove{}, err
}
ret.Tweets[orig_tweet.ID] = orig_tweet
orig_user, err := ParseSingleUser(apiv2_retweeted_tweet_user.Legacy)
if err != nil {
return TweetTrove{}, err
}
orig_user.ID = UserID(apiv2_retweeted_tweet_user.ID)
ret.Users[orig_user.ID] = orig_user
retweeting_user, err := ParseSingleUser(apiv2_user_result.Legacy)
if err != nil {
return TweetTrove{}, err
}
retweeting_user.ID = UserID(apiv2_user_result.ID)
ret.Users[retweeting_user.ID] = retweeting_user
retweet := Retweet{}
retweet.RetweetID = TweetID(apiv2_tweet.ID)
retweet.TweetID = TweetID(orig_tweet.ID)
retweet.RetweetedByID = retweeting_user.ID
retweet.RetweetedAt, err = time.Parse(time.RubyDate, apiv2_tweet.CreatedAt)
if err != nil {
fmt.Printf("%v\n", apiv2_tweet)
panic(err)
}
ret.Retweets[retweet.RetweetID] = retweet
// Handle quoted tweet
if apiv2_retweeted_quoted_result.ID != 0 {
quoted_tweet, err := ParseSingleTweet(apiv2_retweeted_quoted_result.Legacy)
if err != nil {
return TweetTrove{}, err
}
ret.Tweets[quoted_tweet.ID] = quoted_tweet
quoted_user, err := ParseSingleUser(apiv2_retweeted_quoted_user.Legacy)
if err != nil {
return TweetTrove{}, err
}
quoted_user.ID = UserID(apiv2_retweeted_quoted_user.ID)
ret.Users[quoted_user.ID] = quoted_user
}
continue
}
// The main tweet
tweet, err := ParseSingleTweet(apiv2_tweet.APITweet)
if err != nil {
return TweetTrove{}, err
}
ret.Tweets[tweet.ID] = tweet
user, err := ParseSingleUser(apiv2_user_result.Legacy)
if err != nil {
return TweetTrove{}, err
}
user.ID = UserID(apiv2_user_result.ID)
ret.Users[user.ID] = user
// Handle quoted tweet
if apiv2_quoted_tweet_result.ID != 0 {
quoted_tweet, err := ParseSingleTweet(apiv2_quoted_tweet_result.Legacy.APITweet)
if err != nil {
return TweetTrove{}, err
}
ret.Tweets[quoted_tweet.ID] = quoted_tweet
quoted_user, err := ParseSingleUser(apiv2_quoted_user_result.Legacy)
if err != nil {
return TweetTrove{}, err
}
quoted_user.ID = UserID(apiv2_quoted_user_result.ID)
ret.Users[quoted_user.ID] = quoted_user
}
} }
return ret, nil return ret, nil
} }

View File

@ -47,7 +47,6 @@ func TestAPIV2ParseUser(t *testing.T) {
assert.Equal(user.PinnedTweetID, TweetID(1477347403023982596)) assert.Equal(user.PinnedTweetID, TweetID(1477347403023982596))
} }
/** /**
* Parse a plain text tweet * Parse a plain text tweet
*/ */
@ -96,7 +95,6 @@ func TestAPIV2ParseTweet(t *testing.T) {
assert.Equal(0, len(trove.Retweets)) assert.Equal(0, len(trove.Retweets))
} }
/** /**
* Parse a tweet with a quoted tweet * Parse a tweet with a quoted tweet
*/ */
@ -150,7 +148,6 @@ func TestAPIV2ParseTweetWithQuotedTweet(t *testing.T) {
assert.Equal(0, len(trove.Retweets)) assert.Equal(0, len(trove.Retweets))
} }
/** /**
* Parse a retweet * Parse a retweet
*/ */
@ -211,7 +208,6 @@ func TestAPIV2ParseRetweet(t *testing.T) {
assert.Equal(UserID(44067298), retweet.RetweetedByID) assert.Equal(UserID(44067298), retweet.RetweetedByID)
} }
/** /**
* Parse a retweeted quote tweet * Parse a retweeted quote tweet
*/ */
@ -272,252 +268,6 @@ func TestAPIV2ParseRetweetedQuoteTweet(t *testing.T) {
} }
// Check a plain old tweet
func TestAPIV2FeedSimpleTweet(t *testing.T) {
data, err := ioutil.ReadFile("test_responses/api_v2/feed_simple_tweet.json")
if err != nil {
panic(err)
}
var feed APIV2Response
err = json.Unmarshal(data, &feed)
if err != nil {
t.Errorf(err.Error())
}
tweet_trove, err := feed.ToTweetTrove()
if err != nil {
panic(err)
}
if len(tweet_trove.Users) != 1 {
t.Errorf("Expected 1 user, got %d", len(tweet_trove.Users))
}
user := tweet_trove.Users[44067298]
if user.ID != 44067298 {
t.Errorf("Expected ID %d, got %d", 44067298, user.ID)
}
if user.DisplayName != "Michael Malice" {
t.Errorf("Expected display name %q, got %q", "Michael Malice", user.DisplayName)
}
if len(tweet_trove.Tweets) != 1 {
t.Errorf("Expected %d tweets, got %d", 1, len(tweet_trove.Tweets))
}
tweet := tweet_trove.Tweets[1485708879174508550]
if tweet.ID != 1485708879174508550 {
t.Errorf("Expected ID 1485708879174508550, got %d", tweet.ID)
}
if tweet.UserID != UserID(44067298) {
t.Errorf("Expected user ID 44067298, got %d", tweet.UserID)
}
expected_text := "If Boris Johnson is driven out of office, it wouldn't mark the first time the Tories had four PMs in a row\nThey had previously governed the UK for 13 years with 4 PMs, from 1951-1964"
if tweet.Text != expected_text {
t.Errorf("Expected text: %q, got: %q", expected_text, tweet.Text)
}
if len(tweet_trove.Retweets) != 0 {
t.Errorf("Shouldn't be any retweets")
}
}
// Check a retweet
func TestAPIV2FeedRetweet(t *testing.T) {
data, err := ioutil.ReadFile("test_responses/api_v2/feed_simple_retweet.json")
if err != nil {
panic(err)
}
var feed APIV2Response
err = json.Unmarshal(data, &feed)
if err != nil {
t.Errorf(err.Error())
}
tweet_trove, err := feed.ToTweetTrove()
if err != nil {
panic(err)
}
// Should fetch both the retweeting and retweeted users
if len(tweet_trove.Users) != 2 {
t.Errorf("Expected %d users, got %d", 2, len(tweet_trove.Users))
}
user := tweet_trove.Users[44067298]
if user.ID != 44067298 {
t.Errorf("Expected ID %d, got %d", 44067298, user.ID)
}
if user.DisplayName != "Michael Malice" {
t.Errorf("Expected display name %q, got %q", "Michael Malice", user.DisplayName)
}
retweeted_user := tweet_trove.Users[1326229737551912960]
if retweeted_user.ID != 1326229737551912960 {
t.Errorf("Expected ID %d, got %d", 1326229737551912960, retweeted_user.ID)
}
if retweeted_user.Handle != "libsoftiktok" {
t.Errorf("Expected handle %q, got %q", "libsoftiktok", retweeted_user.Handle)
}
// Should only be 1 tweet, the retweeted one
if len(tweet_trove.Tweets) != 1 {
t.Errorf("Expected %d tweets, got %d", 1, len(tweet_trove.Tweets))
}
tweet, ok := tweet_trove.Tweets[1485694028620316673]
if !ok {
t.Fatalf("Didn't get the tweet")
}
if tweet.ID != 1485694028620316673 {
t.Errorf("Expected ID %d, got %d", 1485694028620316673, tweet.ID)
}
if tweet.UserID != UserID(1326229737551912960) {
t.Errorf("Expected user ID %d, got %d", 1326229737551912960, tweet.UserID)
}
expected_text := "More mask madness, this time in an elevator. The mask police are really nuts https://t.co/3BpvLjdJwD"
if tweet.Text != expected_text {
t.Errorf("Expected text: %q, got: %q", expected_text, tweet.Text)
}
// Should be 1 retweet
if len(tweet_trove.Retweets) != 1 {
t.Errorf("Expected %d retweets, got %d", 1, len(tweet_trove.Retweets))
}
retweet := tweet_trove.Retweets[1485699748514476037]
if retweet.RetweetID != 1485699748514476037 {
t.Errorf("Expected RetweetID %d, got %d", 1485699748514476037, retweet.RetweetID)
}
if retweet.TweetID != 1485694028620316673 {
t.Errorf("Expected TweetID 1485694028620316673, got %d", retweet.TweetID)
}
if retweet.RetweetedAt.Unix() != 1643053397 {
t.Errorf("Expected retweeted_at %d, got %d", 1643053397, retweet.RetweetedAt.Unix())
}
if retweet.RetweetedByID != UserID(44067298) {
t.Errorf("Expected retweeted_by 44067298, got %d", retweet.RetweetedByID)
}
}
// Check a quote-tweet
func TestAPIV2FeedQuoteTweet(t *testing.T) {
data, err := ioutil.ReadFile("test_responses/api_v2/feed_quote_tweet.json")
if err != nil {
panic(err)
}
var feed APIV2Response
err = json.Unmarshal(data, &feed)
if err != nil {
t.Errorf(err.Error())
}
tweet_trove, err := feed.ToTweetTrove()
if err != nil {
panic(err)
}
// Should be 2 users: quoter and quoted
if len(tweet_trove.Users) != 2 {
t.Errorf("Expected %d users, got %d", 2, len(tweet_trove.Users))
}
quoting_user := tweet_trove.Users[44067298]
if quoting_user.ID != 44067298 {
t.Errorf("Expected quoting user ID %d, got %d", 44067298, quoting_user.ID)
}
quoted_user := tweet_trove.Users[892155218292617217]
if quoted_user.ID != 892155218292617217 {
t.Errorf("Expected quoted user ID %d, got %d", 892155218292617217, quoted_user.ID)
}
expected_quoted_bio := "Creator of Little Homes and Mooncars"
if quoted_user.Bio != expected_quoted_bio {
t.Errorf("Expected bio %q, got %q", expected_quoted_bio, quoted_user.Bio)
}
// Should be 2 tweets: quote-tweet and quoted-tweet
if len(tweet_trove.Tweets) != 2 {
t.Errorf("Expected %d tweets, got %d", 2, len(tweet_trove.Tweets))
}
quoted_tweet := tweet_trove.Tweets[1485690069079846915]
if quoted_tweet.ID != 1485690069079846915 {
t.Errorf("Expected quoted ID %d, got %d", 1485690069079846915, quoted_tweet.ID)
}
expected_quoted_text := "The Left hates the Right so much that they won't let them leave the Union. I don't get it."
if quoted_tweet.Text != expected_quoted_text {
t.Errorf("Expected text %q, got %q", expected_quoted_text, quoted_tweet.Text)
}
quote_tweet := tweet_trove.Tweets[1485690410899021826]
if quote_tweet.ID != 1485690410899021826 {
t.Errorf("Expected quoting ID %d, got %d", 1485690410899021826, quote_tweet.ID)
}
if quote_tweet.QuotedTweetID != 1485690069079846915 {
t.Errorf("Expected to be quoting tweet ID %d, got %d", 1485690069079846915, quote_tweet.QuotedTweetID)
}
// No retweets
if len(tweet_trove.Retweets) != 0 {
t.Errorf("Shouldn't be any retweets")
}
}
// Check a retweeted quote-tweet
func TestAPIV2FeedRetweetedQuoteTweet(t *testing.T) {
data, err := ioutil.ReadFile("test_responses/api_v2/feed_retweeted_quote_tweet.json")
if err != nil {
panic(err)
}
var feed APIV2Response
err = json.Unmarshal(data, &feed)
if err != nil {
t.Errorf(err.Error())
}
tweet_trove, err := feed.ToTweetTrove()
if err != nil {
panic(err)
}
// 3 Users: quoted, quoter, and retweeter
if len(tweet_trove.Users) != 3 {
t.Errorf("Expected %d users, got %d", 3, len(tweet_trove.Users))
}
retweeting_user := tweet_trove.Users[599817378]
if retweeting_user.ID != 599817378 {
t.Errorf("Expected retweeting user ID %d, got %d", 599817378, retweeting_user.ID)
}
if retweeting_user.Website != "https://www.youtube.com/highlyrespected" {
t.Errorf("Expected RTing user website %q, got %q", "https://www.youtube.com/highlyrespected", retweeting_user.Website)
}
retweeted_user := tweet_trove.Users[1434720042193760256]
if retweeted_user.ID != 1434720042193760256 {
t.Errorf("Expected retweed user ID %d, got %d", 1434720042193760256, retweeted_user.ID)
}
if retweeted_user.FollowersCount != 17843 {
t.Errorf("Expected %d followers, got %d", 17843, retweeted_user.FollowersCount)
}
quoted_user := tweet_trove.Users[14347972]
if quoted_user.ID != 14347972 {
t.Errorf("Expected quoted user ID %d, got %d", 14347972, quoted_user.ID)
}
if quoted_user.IsVerified != true {
t.Errorf("Expected quoted user to be verified")
}
// Quoted tweet and quoting tweet
if len(tweet_trove.Tweets) != 2 {
t.Errorf("Expected %d tweets, got %d", 2, len(tweet_trove.Tweets))
}
// The retweet
if len(tweet_trove.Retweets) != 1 {
t.Errorf("Expected %d retweets, got %d", 1, len(tweet_trove.Retweets))
}
}
func TestParseAPIV2UserFeed(t *testing.T) { func TestParseAPIV2UserFeed(t *testing.T) {
data, err := ioutil.ReadFile("test_responses/api_v2/user_feed_apiv2.json") data, err := ioutil.ReadFile("test_responses/api_v2/user_feed_apiv2.json")
if err != nil { if err != nil {

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long