From 08d703a26fdd00f6b819175f528fc1cd93851367 Mon Sep 17 00:00:00 2001 From: Alessio Date: Thu, 19 Aug 2021 12:54:08 -0700 Subject: [PATCH] Add command to get a user feed --- cmd/tests.sh | 18 +++++++++++++++++ cmd/twitter/helpers.go | 1 + cmd/twitter/main.go | 44 ++++++++++++++++++++++++++++++++++++++++++ persistence/schema.sql | 4 ++-- 4 files changed, 65 insertions(+), 2 deletions(-) diff --git a/cmd/tests.sh b/cmd/tests.sh index 815117a..cd9ea2d 100755 --- a/cmd/tests.sh +++ b/cmd/tests.sh @@ -81,5 +81,23 @@ test -f profile_images/DiamondChariots_banner_1615811094.jpg cd .. tw --profile data fetch_user michaelmalice test $(sqlite3 data/twitter.db "select count(*) from users where handle = 'michaelmalice'") = "1" +cd data + + +# Get a user's feed +malice_id=$(sqlite3 twitter.db "select id from users where handle='michaelmalice'") +test $(sqlite3 twitter.db "select count(*) from retweets") = "0" +tweet_count_1=$(sqlite3 twitter.db "select count(*) from tweets") +tw get_user_tweets michaelmalice + +# Check that there are some retweets +rts_count=$(sqlite3 twitter.db "select count(*) from retweets") +test $rts_count -gt "0" + +# Check that new retweets plus new tweets > 50 +tweet_count_2=$(sqlite3 twitter.db "select count(*) from tweets") +test $(sqlite3 twitter.db "select count(*) from retweets where retweeted_by != $malice_id") = "0" +test $(($rts_count + $tweet_count_2 - $tweet_count_1)) -gt "50" + echo -e "\033[32mAll tests passed. Finished successfully.\033[0m" diff --git a/cmd/twitter/helpers.go b/cmd/twitter/helpers.go index a9034c4..d42a302 100644 --- a/cmd/twitter/helpers.go +++ b/cmd/twitter/helpers.go @@ -21,6 +21,7 @@ const help_message = `Usage: twitter [--profile ] is the user handle) - fetch_tweet_only ( is the full URL of the tweet) + - get_user_tweets ( is the user handle whomst feed to get) - download_tweet_content ( is the ID of the tweet whomst contents to download / back up) - download_user_content ( is the user handle of the user whomst banner image and profile to download / back up) diff --git a/cmd/twitter/main.go b/cmd/twitter/main.go index 92a3386..b9f690a 100644 --- a/cmd/twitter/main.go +++ b/cmd/twitter/main.go @@ -54,6 +54,8 @@ func main() { fetch_user(scraper.UserHandle(target)) case "fetch_tweet_only": fetch_tweet_only(target) + case "get_user_tweets": + fetch_user_feed(target) case "download_tweet_content": download_tweet_content(target) case "download_user_content": @@ -127,6 +129,48 @@ func fetch_tweet_only(tweet_url string) { fmt.Println("Saved the tweet. Exiting successfully") } +/** + * Scrape a user feed and get a big blob of tweets and retweets. Get 50 tweets. + * + * args: + * - handle: the user handle to get + */ +func fetch_user_feed(handle string) { + user, err := profile.GetUserByHandle(scraper.UserHandle(handle)) + if err != nil { + die(err.Error(), false, -1) + } + + tweets, retweets, users, err := scraper.GetUserFeedFor(user.ID, 50); + if err != nil { + die(err.Error(), false, -2) + } + + for _, u := range users { + fmt.Println(u) + err = profile.SaveUser(u) + if err != nil { + die("Error saving tweet: " + err.Error(), false, 4) + } + } + + for _, t := range tweets { + fmt.Println(t) + err = profile.SaveTweet(t) + if err != nil { + die("Error saving tweet: " + err.Error(), false, 4) + } + } + + for _, r := range retweets { + fmt.Println(r) + err = profile.SaveRetweet(r) + if err != nil { + die("Error saving retweet: " + err.Error(), false, 4) + } + } + fmt.Printf("Saved %d tweets, %d retweets and %d users. Exiting successfully\n", len(tweets), len(retweets), len(users)) +} func download_tweet_content(tweet_id string) { id, err := strconv.Atoi(tweet_id) diff --git a/persistence/schema.sql b/persistence/schema.sql index 70207d4..ffad3ce 100644 --- a/persistence/schema.sql +++ b/persistence/schema.sql @@ -32,8 +32,8 @@ create table tweets (rowid integer primary key, num_retweets integer, num_replies integer, num_quote_tweets integer, - in_reply_to integer, - quoted_tweet integer, + in_reply_to integer, -- TODO hungarian: should be `in_reply_to_id` + quoted_tweet integer, -- TODO hungarian: should be `quoted_tweet_id` mentions text, -- comma-separated hashtags text, -- comma-separated