Add woodpecker config, update tests due to an account getting banned

This commit is contained in:
Alessio 2022-10-14 22:44:02 -04:00
parent 94b8b551f8
commit b8b3b56c8e
6 changed files with 48 additions and 14 deletions

34
.woodpecker.yml Normal file
View File

@ -0,0 +1,34 @@
# All pipeline steps run in their own container, but the working directory (with code) is in a shared
# volume, which is mounted in all the containers. So modifying the filesystem (within the working dir)
# is persistent between build steps.
pipeline:
lint:
image: offline-twitter/go
commands:
- golangci-lint run
test:
image: offline-twitter/go
commands:
- mkdir persistence/test_profiles
- go test -bench=. -cover ./...
integration_test:
image: offline-twitter/go
commands:
- cd cmd
- ./tests.sh
# dpkg_build_and_upload:
# when:
# branch: release-*
# image: offline-twitter/deploy
# commands:
# - export version=$(echo $CI_COMMIT_BRANCH | grep -Poh "(?<=^release-)\d+\.\d+\.\d+")
# - cd build
# - ./build_dpkg.sh $version
# - scp offline-twitter_$${version}_all.deb aptrepo@apt.playfulpachyderm.com:/apt-repo/test-repo
# - ssh aptrepo@apt.playfulpachyderm.com "cd ~/test-repo && ./update.sh"

View File

@ -58,27 +58,27 @@ test $(sqlite3 twitter.db "select count(*) from images") = "4"
# Fetch a tweet with a video
tw fetch_user DiamondChariots
tw fetch_user SpaceX
test $(sqlite3 twitter.db "select handle from users" | wc -l) = "2"
tw fetch_tweet_only https://twitter.com/DiamondChariots/status/1418971605674467340
tw fetch_tweet_only https://twitter.com/SpaceX/status/1581025285524242432
test $(sqlite3 twitter.db "select count(*) from tweets") = "2"
test $(sqlite3 twitter.db "select count(*) from videos") = "1"
# Download the video
test $(sqlite3 twitter.db "select count(*) from videos where tweet_id = 1418971605674467340 and is_downloaded = 0") = "1"
test $(sqlite3 twitter.db "select count(*) from videos where tweet_id = 1418971605674467340 and is_downloaded = 1") = "0"
test $(sqlite3 twitter.db "select is_content_downloaded from tweets where id = 1418971605674467340") = "0"
test $(sqlite3 twitter.db "select count(*) from videos where tweet_id = 1581025285524242432 and is_downloaded = 0") = "1"
test $(sqlite3 twitter.db "select count(*) from videos where tweet_id = 1581025285524242432 and is_downloaded = 1") = "0"
test $(sqlite3 twitter.db "select is_content_downloaded from tweets where id = 1581025285524242432") = "0"
test $(find videos | wc -l) = "1"
test $(find video_thumbnails | wc -l) = "1"
tw download_tweet_content 1418971605674467340
test $(sqlite3 twitter.db "select count(*) from videos where tweet_id = 1418971605674467340 and is_downloaded = 0") = "0"
test $(sqlite3 twitter.db "select count(*) from videos where tweet_id = 1418971605674467340 and is_downloaded = 1") = "1"
test $(sqlite3 twitter.db "select is_content_downloaded from tweets where id = 1418971605674467340") = "1"
tw download_tweet_content 1581025285524242432
test $(sqlite3 twitter.db "select count(*) from videos where tweet_id = 1581025285524242432 and is_downloaded = 0") = "0"
test $(sqlite3 twitter.db "select count(*) from videos where tweet_id = 1581025285524242432 and is_downloaded = 1") = "1"
test $(sqlite3 twitter.db "select is_content_downloaded from tweets where id = 1581025285524242432") = "1"
test $(find videos | wc -l) = "2"
test $(find video_thumbnails | wc -l) = "2"
# Try to double-download it
tw fetch_tweet_only https://twitter.com/DiamondChariots/status/1418971605674467340
tw fetch_tweet_only https://twitter.com/SpaceX/status/1581025285524242432
test $(sqlite3 twitter.db "select count(*) from tweets") = "2"
test $(sqlite3 twitter.db "select count(*) from videos") = "1"

View File

@ -33,6 +33,7 @@ TODO: Problem tweets
- "account no longer exists" tombstone not being collected => https://twitter.com/michaelmalice/status/1461031030278742020
- tweet with warning label not getting scraped right: https://twitter.com/michaelmalice/status/1493324611999748098
- fails to produce any result for the first tweet in the thread => https://twitter.com/CovfefeAnon/status/1498877082838962181
- twitter fetch_tweet 1517683230421528576 => panic: ScreenName is empty!
TODO: videos-view-count
- videos don't parse properly in APIv2

View File

@ -391,9 +391,9 @@ func (api_response APIV2Response) ToTweetTrove() (TweetTrove, error) {
func get_graphql_user_timeline_url(user_id UserID, cursor string) string {
if cursor != "" {
return "https://twitter.com/i/api/graphql/CwLU7qTfeu0doqhSr6tW4A/UserTweetsAndReplies?variables=%7B%22userId%22%3A%22" + fmt.Sprint(user_id) + "%22%2C%22count%22%3A40%2C%22cursor%22%3A%22" + url.QueryEscape(cursor) + "%22%2C%22includePromotedContent%22%3Atrue%2C%22withCommunity%22%3Atrue%2C%22withSuperFollowsUserFields%22%3Atrue%2C%22withBirdwatchPivots%22%3Afalse%2C%22withDownvotePerspective%22%3Afalse%2C%22withReactionsMetadata%22%3Afalse%2C%22withReactionsPerspective%22%3Afalse%2C%22withSuperFollowsTweetFields%22%3Atrue%2C%22withVoice%22%3Atrue%2C%22withV2Timeline%22%3Afalse%2C%22__fs_interactive_text%22%3Afalse%2C%22__fs_responsive_web_uc_gql_enabled%22%3Afalse%2C%22__fs_dont_mention_me_view_api_enabled%22%3Afalse%7D" // nolint:lll // It's a URL, come on
return "https://twitter.com/i/api/graphql/CwLU7qTfeu0doqhSr6tW4A/UserTweetsAndReplies?variables=%7B%22userId%22%3A%22" + fmt.Sprint(user_id) + "%22%2C%22count%22%3A40%2C%22cursor%22%3A%22" + url.QueryEscape(cursor) + "%22%2C%22includePromotedContent%22%3Atrue%2C%22withCommunity%22%3Atrue%2C%22withSuperFollowsUserFields%22%3Atrue%2C%22withBirdwatchPivots%22%3Afalse%2C%22withDownvotePerspective%22%3Afalse%2C%22withReactionsMetadata%22%3Afalse%2C%22withReactionsPerspective%22%3Afalse%2C%22withSuperFollowsTweetFields%22%3Atrue%2C%22withVoice%22%3Atrue%2C%22withV2Timeline%22%3Afalse%2C%22__fs_interactive_text%22%3Afalse%2C%22__fs_responsive_web_uc_gql_enabled%22%3Afalse%2C%22__fs_dont_mention_me_view_api_enabled%22%3Afalse%7D" //nolint:lll // It's a URL, come on
}
return "https://twitter.com/i/api/graphql/CwLU7qTfeu0doqhSr6tW4A/UserTweetsAndReplies?variables=%7B%22userId%22%3A%22" + fmt.Sprint(user_id) + "%22%2C%22count%22%3A40%2C%22includePromotedContent%22%3Afalse%2C%22withCommunity%22%3Atrue%2C%22withSuperFollowsUserFields%22%3Atrue%2C%22withBirdwatchPivots%22%3Afalse%2C%22withDownvotePerspective%22%3Afalse%2C%22withReactionsMetadata%22%3Afalse%2C%22withReactionsPerspective%22%3Afalse%2C%22withSuperFollowsTweetFields%22%3Atrue%2C%22withVoice%22%3Atrue%2C%22withV2Timeline%22%3Afalse%2C%22__fs_interactive_text%22%3Afalse%2C%22__fs_dont_mention_me_view_api_enabled%22%3Afalse%7D" // nolint:lll // It's a URL, come on
return "https://twitter.com/i/api/graphql/CwLU7qTfeu0doqhSr6tW4A/UserTweetsAndReplies?variables=%7B%22userId%22%3A%22" + fmt.Sprint(user_id) + "%22%2C%22count%22%3A40%2C%22includePromotedContent%22%3Afalse%2C%22withCommunity%22%3Atrue%2C%22withSuperFollowsUserFields%22%3Atrue%2C%22withBirdwatchPivots%22%3Afalse%2C%22withDownvotePerspective%22%3Afalse%2C%22withReactionsMetadata%22%3Afalse%2C%22withReactionsPerspective%22%3Afalse%2C%22withSuperFollowsTweetFields%22%3Atrue%2C%22withVoice%22%3Atrue%2C%22withV2Timeline%22%3Afalse%2C%22__fs_interactive_text%22%3Afalse%2C%22__fs_dont_mention_me_view_api_enabled%22%3Afalse%7D" //nolint:lll // It's a URL, come on
}
/**

View File

@ -24,6 +24,6 @@ func TestGetGuestToken(t *testing.T) {
// other than the first use the cache.
func BenchmarkGetGuestToken(b *testing.B) {
for i := 0; i < b.N; i++ {
GetGuestToken() // nolint:errcheck // Don't care about errors, just want to time it
GetGuestToken() //nolint:errcheck // Don't care about errors, just want to time it
}
}

View File

@ -103,7 +103,6 @@ func (trove *TweetTrove) FetchTombstoneUsers() {
/**
* Checks for tombstoned tweets and fills in their UserIDs based on the collected tombstoned users.
* To be called after calling "scraper.GetUser" on all the tombstoned users.
*
* At this point, those users should have been added to this trove's Users collection, and the