Expand debug logging to make debugging scraping errors easier
- Print HTTP headers to the debug log on every request - If parsing a APIV2 tweet panics, print the full JSON of that tweet entry to the debug log
This commit is contained in:
parent
cba6631a72
commit
b37977145a
@ -268,6 +268,9 @@ func (api *API) do_http(url string, cursor string, result interface{}) error {
|
||||
return fmt.Errorf("Error executing HTTP request:\n %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
for header := range req.Header {
|
||||
log.Debug(fmt.Sprintf(" %s: %s\n", header, req.Header.Get(header)))
|
||||
}
|
||||
|
||||
if resp.StatusCode != 200 && resp.StatusCode != 403 {
|
||||
content, err := io.ReadAll(resp.Body)
|
||||
|
@ -1,6 +1,7 @@
|
||||
package scraper
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/url"
|
||||
"strings"
|
||||
@ -313,14 +314,22 @@ func (api_v2_tweet APIV2Tweet) ToTweetTrove() TweetTrove {
|
||||
return ret
|
||||
}
|
||||
|
||||
type ItemContent struct {
|
||||
EntryType string `json:"entryType"` // TODO: Assert this is always empty; looks like a typo
|
||||
ItemType string `json:"itemType"`
|
||||
TweetResults APIV2Result `json:"tweet_results"`
|
||||
}
|
||||
|
||||
// Wraps InnerAPIV2Entry to implement `json.Unmarshal`. Does the normal unmarshal but also saves the original JSON.
|
||||
type APIV2Entry struct {
|
||||
InnerAPIV2Entry
|
||||
OriginalJSON string
|
||||
}
|
||||
type InnerAPIV2Entry struct {
|
||||
EntryID string `json:"entryId"`
|
||||
SortIndex int64 `json:"sortIndex,string"`
|
||||
Content struct {
|
||||
ItemContent struct {
|
||||
EntryType string `json:"entryType"`
|
||||
TweetResults APIV2Result `json:"tweet_results"`
|
||||
} `json:"itemContent"`
|
||||
ItemContent ItemContent `json:"itemContent"`
|
||||
|
||||
// Cursors
|
||||
EntryType string `json:"entryType"`
|
||||
@ -329,6 +338,25 @@ type APIV2Entry struct {
|
||||
} `json:"content"`
|
||||
}
|
||||
|
||||
func (e *APIV2Entry) UnmarshalJSON(data []byte) error {
|
||||
err := json.Unmarshal(data, &e.InnerAPIV2Entry)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Error parsing json APIV2Entry:\n %w", err)
|
||||
}
|
||||
e.OriginalJSON = string(data)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e APIV2Entry) ToTweetTrove(ignore_null_entries bool) TweetTrove {
|
||||
defer func() {
|
||||
if obj := recover(); obj != nil {
|
||||
log.Warn(fmt.Sprintf("Panic while decoding entry: %s\n", e.OriginalJSON))
|
||||
panic(obj)
|
||||
}
|
||||
}()
|
||||
return e.Content.ItemContent.TweetResults.ToTweetTrove(ignore_null_entries)
|
||||
}
|
||||
|
||||
type APIV2Instruction struct {
|
||||
Type string `json:"type"`
|
||||
Entries []APIV2Entry `json:"entries"`
|
||||
@ -390,13 +418,11 @@ func (api_response APIV2Response) ToTweetTrove() (TweetTrove, error) {
|
||||
continue
|
||||
}
|
||||
|
||||
result := entry.Content.ItemContent.TweetResults
|
||||
|
||||
main_trove := result.ToTweetTrove(true)
|
||||
main_trove := entry.ToTweetTrove(true)
|
||||
ret.MergeWith(main_trove)
|
||||
}
|
||||
|
||||
return ret, nil
|
||||
return ret, nil // TODO: This doesn't need to return an error, it's always nil
|
||||
}
|
||||
|
||||
func get_graphql_user_timeline_url(user_id UserID, cursor string) string {
|
||||
|
@ -618,8 +618,15 @@ func TestAPIV2GetMainInstructionFromFeed(t *testing.T) {
|
||||
|
||||
assert.Equal(len(feed.GetMainInstruction().Entries), 41)
|
||||
|
||||
// Check that they have OriginalJSON filled out
|
||||
for _, entry := range feed.GetMainInstruction().Entries {
|
||||
assert.True(len(entry.OriginalJSON) > 0)
|
||||
}
|
||||
|
||||
// Test that this is a writable version
|
||||
feed.GetMainInstruction().Entries = append(feed.GetMainInstruction().Entries, APIV2Entry{EntryID: "asdf"})
|
||||
feed.GetMainInstruction().Entries = append(feed.GetMainInstruction().Entries, APIV2Entry{
|
||||
InnerAPIV2Entry: InnerAPIV2Entry{EntryID: "asdf"},
|
||||
})
|
||||
assert.Equal(len(feed.GetMainInstruction().Entries), 42)
|
||||
assert.Equal(feed.GetMainInstruction().Entries[41].EntryID, "asdf")
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user