Expand debug logging to make debugging scraping errors easier
- Print HTTP headers to the debug log on every request - If parsing a APIV2 tweet panics, print the full JSON of that tweet entry to the debug log
This commit is contained in:
parent
cba6631a72
commit
b37977145a
@ -268,6 +268,9 @@ func (api *API) do_http(url string, cursor string, result interface{}) error {
|
|||||||
return fmt.Errorf("Error executing HTTP request:\n %w", err)
|
return fmt.Errorf("Error executing HTTP request:\n %w", err)
|
||||||
}
|
}
|
||||||
defer resp.Body.Close()
|
defer resp.Body.Close()
|
||||||
|
for header := range req.Header {
|
||||||
|
log.Debug(fmt.Sprintf(" %s: %s\n", header, req.Header.Get(header)))
|
||||||
|
}
|
||||||
|
|
||||||
if resp.StatusCode != 200 && resp.StatusCode != 403 {
|
if resp.StatusCode != 200 && resp.StatusCode != 403 {
|
||||||
content, err := io.ReadAll(resp.Body)
|
content, err := io.ReadAll(resp.Body)
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
package scraper
|
package scraper
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"net/url"
|
"net/url"
|
||||||
"strings"
|
"strings"
|
||||||
@ -313,14 +314,22 @@ func (api_v2_tweet APIV2Tweet) ToTweetTrove() TweetTrove {
|
|||||||
return ret
|
return ret
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type ItemContent struct {
|
||||||
|
EntryType string `json:"entryType"` // TODO: Assert this is always empty; looks like a typo
|
||||||
|
ItemType string `json:"itemType"`
|
||||||
|
TweetResults APIV2Result `json:"tweet_results"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wraps InnerAPIV2Entry to implement `json.Unmarshal`. Does the normal unmarshal but also saves the original JSON.
|
||||||
type APIV2Entry struct {
|
type APIV2Entry struct {
|
||||||
|
InnerAPIV2Entry
|
||||||
|
OriginalJSON string
|
||||||
|
}
|
||||||
|
type InnerAPIV2Entry struct {
|
||||||
EntryID string `json:"entryId"`
|
EntryID string `json:"entryId"`
|
||||||
SortIndex int64 `json:"sortIndex,string"`
|
SortIndex int64 `json:"sortIndex,string"`
|
||||||
Content struct {
|
Content struct {
|
||||||
ItemContent struct {
|
ItemContent ItemContent `json:"itemContent"`
|
||||||
EntryType string `json:"entryType"`
|
|
||||||
TweetResults APIV2Result `json:"tweet_results"`
|
|
||||||
} `json:"itemContent"`
|
|
||||||
|
|
||||||
// Cursors
|
// Cursors
|
||||||
EntryType string `json:"entryType"`
|
EntryType string `json:"entryType"`
|
||||||
@ -329,6 +338,25 @@ type APIV2Entry struct {
|
|||||||
} `json:"content"`
|
} `json:"content"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (e *APIV2Entry) UnmarshalJSON(data []byte) error {
|
||||||
|
err := json.Unmarshal(data, &e.InnerAPIV2Entry)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("Error parsing json APIV2Entry:\n %w", err)
|
||||||
|
}
|
||||||
|
e.OriginalJSON = string(data)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e APIV2Entry) ToTweetTrove(ignore_null_entries bool) TweetTrove {
|
||||||
|
defer func() {
|
||||||
|
if obj := recover(); obj != nil {
|
||||||
|
log.Warn(fmt.Sprintf("Panic while decoding entry: %s\n", e.OriginalJSON))
|
||||||
|
panic(obj)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
return e.Content.ItemContent.TweetResults.ToTweetTrove(ignore_null_entries)
|
||||||
|
}
|
||||||
|
|
||||||
type APIV2Instruction struct {
|
type APIV2Instruction struct {
|
||||||
Type string `json:"type"`
|
Type string `json:"type"`
|
||||||
Entries []APIV2Entry `json:"entries"`
|
Entries []APIV2Entry `json:"entries"`
|
||||||
@ -390,13 +418,11 @@ func (api_response APIV2Response) ToTweetTrove() (TweetTrove, error) {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
result := entry.Content.ItemContent.TweetResults
|
main_trove := entry.ToTweetTrove(true)
|
||||||
|
|
||||||
main_trove := result.ToTweetTrove(true)
|
|
||||||
ret.MergeWith(main_trove)
|
ret.MergeWith(main_trove)
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret, nil
|
return ret, nil // TODO: This doesn't need to return an error, it's always nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func get_graphql_user_timeline_url(user_id UserID, cursor string) string {
|
func get_graphql_user_timeline_url(user_id UserID, cursor string) string {
|
||||||
|
@ -618,8 +618,15 @@ func TestAPIV2GetMainInstructionFromFeed(t *testing.T) {
|
|||||||
|
|
||||||
assert.Equal(len(feed.GetMainInstruction().Entries), 41)
|
assert.Equal(len(feed.GetMainInstruction().Entries), 41)
|
||||||
|
|
||||||
|
// Check that they have OriginalJSON filled out
|
||||||
|
for _, entry := range feed.GetMainInstruction().Entries {
|
||||||
|
assert.True(len(entry.OriginalJSON) > 0)
|
||||||
|
}
|
||||||
|
|
||||||
// Test that this is a writable version
|
// Test that this is a writable version
|
||||||
feed.GetMainInstruction().Entries = append(feed.GetMainInstruction().Entries, APIV2Entry{EntryID: "asdf"})
|
feed.GetMainInstruction().Entries = append(feed.GetMainInstruction().Entries, APIV2Entry{
|
||||||
|
InnerAPIV2Entry: InnerAPIV2Entry{EntryID: "asdf"},
|
||||||
|
})
|
||||||
assert.Equal(len(feed.GetMainInstruction().Entries), 42)
|
assert.Equal(len(feed.GetMainInstruction().Entries), 42)
|
||||||
assert.Equal(feed.GetMainInstruction().Entries[41].EntryID, "asdf")
|
assert.Equal(feed.GetMainInstruction().Entries[41].EntryID, "asdf")
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user