From b17e0821bb92c8781624f1597fbd3f33e99a818e Mon Sep 17 00:00:00 2001 From: Alessio Date: Mon, 27 Sep 2021 13:49:17 -0700 Subject: [PATCH] Make command line accept either URL or just ID to find a tweet --- cmd/tests.sh | 7 ++++++- cmd/twitter/helpers.go | 27 ++++++++++++++++++--------- cmd/twitter/main.go | 20 ++++++++++---------- 3 files changed, 34 insertions(+), 20 deletions(-) diff --git a/cmd/tests.sh b/cmd/tests.sh index c0aa528..cb0aa8c 100755 --- a/cmd/tests.sh +++ b/cmd/tests.sh @@ -39,7 +39,7 @@ test $(sqlite3 twitter.db "select count(*) from images where tweet_id = 12614833 test $(sqlite3 twitter.db "select count(*) from images where tweet_id = 1261483383483293700 and is_downloaded = 1") = "0" test $(sqlite3 twitter.db "select is_content_downloaded from tweets where id = 1261483383483293700") = "0" test $(find images | wc -l) = "1" -tw download_tweet_content 1261483383483293700 +tw download_tweet_content https://twitter.com/Denlesks/status/1261483383483293700 test $(sqlite3 twitter.db "select count(*) from images where tweet_id = 1261483383483293700 and is_downloaded = 0") = "0" test $(sqlite3 twitter.db "select count(*) from images where tweet_id = 1261483383483293700 and is_downloaded = 1") = "4" test $(sqlite3 twitter.db "select is_content_downloaded from tweets where id = 1261483383483293700") = "1" @@ -89,6 +89,11 @@ tw --profile data fetch_user elonmusk test $(sqlite3 data/twitter.db "select count(*) from users where handle = 'elonmusk'") = "1" cd data +# Test that fetching tweets with ID only (not full URL) works +test $(sqlite3 twitter.db "select count(*) from tweets where id = 1433713164546293767") = "0" # Check it's not already there +tw fetch_tweet 1433713164546293767 +test $(sqlite3 twitter.db "select count(*) from tweets where id = 1433713164546293767") = "1" # Should be there now + # Get a user's feed malice_id=$(sqlite3 twitter.db "select id from users where handle='michaelmalice'") diff --git a/cmd/twitter/helpers.go b/cmd/twitter/helpers.go index 5a7d622..47c4873 100644 --- a/cmd/twitter/helpers.go +++ b/cmd/twitter/helpers.go @@ -7,6 +7,7 @@ import ( "offline_twitter/terminal_utils" "strings" "strconv" + "regexp" ) @@ -33,11 +34,12 @@ This application downloads tweets from twitter and saves them in a SQLite databa fetch_tweet fetch_tweet_only - is the full URL of the tweet. + is either the full URL of the tweet, or its ID. If using "fetch_tweet_only", then only that specific tweet will be saved. "fetch_tweet" will save the whole thread including replies. download_tweet_content - is the ID of the tweet. Downloads videos and images embedded in the tweet. + is either the full URL of the tweet, or its ID. + Downloads videos and images embedded in the tweet. get_user_tweets get_user_tweets_all @@ -69,14 +71,21 @@ func die(text string, display_help bool, exit_code int) { * returns: the id at the end of the tweet: e.g., 1395882872729477131 */ func extract_id_from(url string) (scraper.TweetID, error) { - parts := strings.Split(url, "/") - if len(parts) != 6 { - return 0, fmt.Errorf("Tweet format isn't right (%d)", len(parts)) + var id_str string + + if regexp.MustCompile("^\\d+$").MatchString(url) { + id_str = url + } else { + parts := strings.Split(url, "/") + if len(parts) != 6 { + return 0, fmt.Errorf("Tweet format isn't right (%d)", len(parts)) + } + if parts[0] != "https:" || parts[1] != "" || parts[2] != "twitter.com" || parts[4] != "status" { + return 0, fmt.Errorf("Tweet format isn't right") + } + id_str = parts[5] } - if parts[0] != "https:" || parts[1] != "" || parts[2] != "twitter.com" || parts[4] != "status" { - return 0, fmt.Errorf("Tweet format isn't right") - } - id, err := strconv.Atoi(parts[5]) + id, err := strconv.Atoi(id_str) if err != nil { return 0, err } diff --git a/cmd/twitter/main.go b/cmd/twitter/main.go index 1ffd6cb..899351b 100644 --- a/cmd/twitter/main.go +++ b/cmd/twitter/main.go @@ -2,7 +2,6 @@ package main import ( "os" - "strconv" "fmt" "flag" "offline_twitter/scraper" @@ -55,7 +54,7 @@ func main() { case "fetch_tweet_only": fetch_tweet_only(target) case "fetch_tweet": - fetch_full_tweet(target) + fetch_tweet_conversation(target) case "get_user_tweets": fetch_user_feed(target, 50) case "get_user_tweets_all": @@ -112,8 +111,8 @@ func fetch_user(handle scraper.UserHandle) { * args: * - tweet_url: e.g., "https://twitter.com/michaelmalice/status/1395882872729477131" */ -func fetch_tweet_only(tweet_url string) { - tweet_id, err := extract_id_from(tweet_url) +func fetch_tweet_only(tweet_identifier string) { + tweet_id, err := extract_id_from(tweet_identifier) if err != nil { die(err.Error(), false, -1) } @@ -140,8 +139,8 @@ func fetch_tweet_only(tweet_url string) { * args: * - tweet_url: e.g., "https://twitter.com/michaelmalice/status/1395882872729477131" */ -func fetch_full_tweet(tweet_url string) { - tweet_id, err := extract_id_from(tweet_url) +func fetch_tweet_conversation(tweet_identifier string) { + tweet_id, err := extract_id_from(tweet_identifier) if err != nil { die(err.Error(), false, -1) } @@ -216,12 +215,13 @@ func fetch_user_feed(handle string, how_many int) { } -func download_tweet_content(tweet_id string) { - id, err := strconv.Atoi(tweet_id) +func download_tweet_content(tweet_identifier string) { + tweet_id, err := extract_id_from(tweet_identifier) if err != nil { - panic(err) + die(err.Error(), false, -1) } - tweet, err := profile.GetTweetById(scraper.TweetID(id)) + + tweet, err := profile.GetTweetById(tweet_id) if err != nil { panic("Couldn't get tweet from database: " + err.Error()) }