From b8b3b56c8edb38815180dac542b0dfc2f4b3745f Mon Sep 17 00:00:00 2001 From: Alessio Date: Fri, 14 Oct 2022 22:44:02 -0400 Subject: [PATCH] Add woodpecker config, update tests due to an account getting banned --- .woodpecker.yml | 34 ++++++++++++++++++++++++++++++++++ cmd/tests.sh | 20 ++++++++++---------- doc/TODO.txt | 1 + scraper/api_types_v2.go | 4 ++-- scraper/guest_token_test.go | 2 +- scraper/tweet_trove.go | 1 - 6 files changed, 48 insertions(+), 14 deletions(-) create mode 100644 .woodpecker.yml diff --git a/.woodpecker.yml b/.woodpecker.yml new file mode 100644 index 0000000..f1942dc --- /dev/null +++ b/.woodpecker.yml @@ -0,0 +1,34 @@ +# All pipeline steps run in their own container, but the working directory (with code) is in a shared +# volume, which is mounted in all the containers. So modifying the filesystem (within the working dir) +# is persistent between build steps. + +pipeline: + lint: + image: offline-twitter/go + commands: + - golangci-lint run + + test: + image: offline-twitter/go + commands: + - mkdir persistence/test_profiles + - go test -bench=. -cover ./... + + integration_test: + image: offline-twitter/go + commands: + - cd cmd + - ./tests.sh + + + + # dpkg_build_and_upload: + # when: + # branch: release-* + # image: offline-twitter/deploy + # commands: + # - export version=$(echo $CI_COMMIT_BRANCH | grep -Poh "(?<=^release-)\d+\.\d+\.\d+") + # - cd build + # - ./build_dpkg.sh $version + # - scp offline-twitter_$${version}_all.deb aptrepo@apt.playfulpachyderm.com:/apt-repo/test-repo + # - ssh aptrepo@apt.playfulpachyderm.com "cd ~/test-repo && ./update.sh" diff --git a/cmd/tests.sh b/cmd/tests.sh index 6f23607..2cdcbd8 100755 --- a/cmd/tests.sh +++ b/cmd/tests.sh @@ -58,27 +58,27 @@ test $(sqlite3 twitter.db "select count(*) from images") = "4" # Fetch a tweet with a video -tw fetch_user DiamondChariots +tw fetch_user SpaceX test $(sqlite3 twitter.db "select handle from users" | wc -l) = "2" -tw fetch_tweet_only https://twitter.com/DiamondChariots/status/1418971605674467340 +tw fetch_tweet_only https://twitter.com/SpaceX/status/1581025285524242432 test $(sqlite3 twitter.db "select count(*) from tweets") = "2" test $(sqlite3 twitter.db "select count(*) from videos") = "1" # Download the video -test $(sqlite3 twitter.db "select count(*) from videos where tweet_id = 1418971605674467340 and is_downloaded = 0") = "1" -test $(sqlite3 twitter.db "select count(*) from videos where tweet_id = 1418971605674467340 and is_downloaded = 1") = "0" -test $(sqlite3 twitter.db "select is_content_downloaded from tweets where id = 1418971605674467340") = "0" +test $(sqlite3 twitter.db "select count(*) from videos where tweet_id = 1581025285524242432 and is_downloaded = 0") = "1" +test $(sqlite3 twitter.db "select count(*) from videos where tweet_id = 1581025285524242432 and is_downloaded = 1") = "0" +test $(sqlite3 twitter.db "select is_content_downloaded from tweets where id = 1581025285524242432") = "0" test $(find videos | wc -l) = "1" test $(find video_thumbnails | wc -l) = "1" -tw download_tweet_content 1418971605674467340 -test $(sqlite3 twitter.db "select count(*) from videos where tweet_id = 1418971605674467340 and is_downloaded = 0") = "0" -test $(sqlite3 twitter.db "select count(*) from videos where tweet_id = 1418971605674467340 and is_downloaded = 1") = "1" -test $(sqlite3 twitter.db "select is_content_downloaded from tweets where id = 1418971605674467340") = "1" +tw download_tweet_content 1581025285524242432 +test $(sqlite3 twitter.db "select count(*) from videos where tweet_id = 1581025285524242432 and is_downloaded = 0") = "0" +test $(sqlite3 twitter.db "select count(*) from videos where tweet_id = 1581025285524242432 and is_downloaded = 1") = "1" +test $(sqlite3 twitter.db "select is_content_downloaded from tweets where id = 1581025285524242432") = "1" test $(find videos | wc -l) = "2" test $(find video_thumbnails | wc -l) = "2" # Try to double-download it -tw fetch_tweet_only https://twitter.com/DiamondChariots/status/1418971605674467340 +tw fetch_tweet_only https://twitter.com/SpaceX/status/1581025285524242432 test $(sqlite3 twitter.db "select count(*) from tweets") = "2" test $(sqlite3 twitter.db "select count(*) from videos") = "1" diff --git a/doc/TODO.txt b/doc/TODO.txt index 6af5801..74e3851 100644 --- a/doc/TODO.txt +++ b/doc/TODO.txt @@ -33,6 +33,7 @@ TODO: Problem tweets - "account no longer exists" tombstone not being collected => https://twitter.com/michaelmalice/status/1461031030278742020 - tweet with warning label not getting scraped right: https://twitter.com/michaelmalice/status/1493324611999748098 - fails to produce any result for the first tweet in the thread => https://twitter.com/CovfefeAnon/status/1498877082838962181 +- twitter fetch_tweet 1517683230421528576 => panic: ScreenName is empty! TODO: videos-view-count - videos don't parse properly in APIv2 diff --git a/scraper/api_types_v2.go b/scraper/api_types_v2.go index 6f87ded..7c081d4 100644 --- a/scraper/api_types_v2.go +++ b/scraper/api_types_v2.go @@ -391,9 +391,9 @@ func (api_response APIV2Response) ToTweetTrove() (TweetTrove, error) { func get_graphql_user_timeline_url(user_id UserID, cursor string) string { if cursor != "" { - return "https://twitter.com/i/api/graphql/CwLU7qTfeu0doqhSr6tW4A/UserTweetsAndReplies?variables=%7B%22userId%22%3A%22" + fmt.Sprint(user_id) + "%22%2C%22count%22%3A40%2C%22cursor%22%3A%22" + url.QueryEscape(cursor) + "%22%2C%22includePromotedContent%22%3Atrue%2C%22withCommunity%22%3Atrue%2C%22withSuperFollowsUserFields%22%3Atrue%2C%22withBirdwatchPivots%22%3Afalse%2C%22withDownvotePerspective%22%3Afalse%2C%22withReactionsMetadata%22%3Afalse%2C%22withReactionsPerspective%22%3Afalse%2C%22withSuperFollowsTweetFields%22%3Atrue%2C%22withVoice%22%3Atrue%2C%22withV2Timeline%22%3Afalse%2C%22__fs_interactive_text%22%3Afalse%2C%22__fs_responsive_web_uc_gql_enabled%22%3Afalse%2C%22__fs_dont_mention_me_view_api_enabled%22%3Afalse%7D" // nolint:lll // It's a URL, come on + return "https://twitter.com/i/api/graphql/CwLU7qTfeu0doqhSr6tW4A/UserTweetsAndReplies?variables=%7B%22userId%22%3A%22" + fmt.Sprint(user_id) + "%22%2C%22count%22%3A40%2C%22cursor%22%3A%22" + url.QueryEscape(cursor) + "%22%2C%22includePromotedContent%22%3Atrue%2C%22withCommunity%22%3Atrue%2C%22withSuperFollowsUserFields%22%3Atrue%2C%22withBirdwatchPivots%22%3Afalse%2C%22withDownvotePerspective%22%3Afalse%2C%22withReactionsMetadata%22%3Afalse%2C%22withReactionsPerspective%22%3Afalse%2C%22withSuperFollowsTweetFields%22%3Atrue%2C%22withVoice%22%3Atrue%2C%22withV2Timeline%22%3Afalse%2C%22__fs_interactive_text%22%3Afalse%2C%22__fs_responsive_web_uc_gql_enabled%22%3Afalse%2C%22__fs_dont_mention_me_view_api_enabled%22%3Afalse%7D" //nolint:lll // It's a URL, come on } - return "https://twitter.com/i/api/graphql/CwLU7qTfeu0doqhSr6tW4A/UserTweetsAndReplies?variables=%7B%22userId%22%3A%22" + fmt.Sprint(user_id) + "%22%2C%22count%22%3A40%2C%22includePromotedContent%22%3Afalse%2C%22withCommunity%22%3Atrue%2C%22withSuperFollowsUserFields%22%3Atrue%2C%22withBirdwatchPivots%22%3Afalse%2C%22withDownvotePerspective%22%3Afalse%2C%22withReactionsMetadata%22%3Afalse%2C%22withReactionsPerspective%22%3Afalse%2C%22withSuperFollowsTweetFields%22%3Atrue%2C%22withVoice%22%3Atrue%2C%22withV2Timeline%22%3Afalse%2C%22__fs_interactive_text%22%3Afalse%2C%22__fs_dont_mention_me_view_api_enabled%22%3Afalse%7D" // nolint:lll // It's a URL, come on + return "https://twitter.com/i/api/graphql/CwLU7qTfeu0doqhSr6tW4A/UserTweetsAndReplies?variables=%7B%22userId%22%3A%22" + fmt.Sprint(user_id) + "%22%2C%22count%22%3A40%2C%22includePromotedContent%22%3Afalse%2C%22withCommunity%22%3Atrue%2C%22withSuperFollowsUserFields%22%3Atrue%2C%22withBirdwatchPivots%22%3Afalse%2C%22withDownvotePerspective%22%3Afalse%2C%22withReactionsMetadata%22%3Afalse%2C%22withReactionsPerspective%22%3Afalse%2C%22withSuperFollowsTweetFields%22%3Atrue%2C%22withVoice%22%3Atrue%2C%22withV2Timeline%22%3Afalse%2C%22__fs_interactive_text%22%3Afalse%2C%22__fs_dont_mention_me_view_api_enabled%22%3Afalse%7D" //nolint:lll // It's a URL, come on } /** diff --git a/scraper/guest_token_test.go b/scraper/guest_token_test.go index b8b1335..8b9d762 100644 --- a/scraper/guest_token_test.go +++ b/scraper/guest_token_test.go @@ -24,6 +24,6 @@ func TestGetGuestToken(t *testing.T) { // other than the first use the cache. func BenchmarkGetGuestToken(b *testing.B) { for i := 0; i < b.N; i++ { - GetGuestToken() // nolint:errcheck // Don't care about errors, just want to time it + GetGuestToken() //nolint:errcheck // Don't care about errors, just want to time it } } diff --git a/scraper/tweet_trove.go b/scraper/tweet_trove.go index 741e79e..3b263ab 100644 --- a/scraper/tweet_trove.go +++ b/scraper/tweet_trove.go @@ -103,7 +103,6 @@ func (trove *TweetTrove) FetchTombstoneUsers() { /** * Checks for tombstoned tweets and fills in their UserIDs based on the collected tombstoned users. - * To be called after calling "scraper.GetUser" on all the tombstoned users. * * At this point, those users should have been added to this trove's Users collection, and the