Make command line accept either URL or just ID to find a tweet

2021-09-27 13:49:17 -07:00 · 2021-09-27 13:49:17 -07:00 · b17e0821bb
commit b17e0821bb
parent 1dd02ff390
3 changed files with 34 additions and 20 deletions
--- a/cmd/tests.sh
+++ b/cmd/tests.sh
@ -39,7 +39,7 @@ test $(sqlite3 twitter.db "select count(*) from images where tweet_id = 12614833
 test $(sqlite3 twitter.db "select count(*) from images where tweet_id = 1261483383483293700 and is_downloaded = 1") = "0"
 test $(sqlite3 twitter.db "select is_content_downloaded from tweets where id = 1261483383483293700") = "0"
 test $(find images | wc -l) = "1"
-tw download_tweet_content 1261483383483293700
+tw download_tweet_content https://twitter.com/Denlesks/status/1261483383483293700
 test $(sqlite3 twitter.db "select count(*) from images where tweet_id = 1261483383483293700 and is_downloaded = 0") = "0"
 test $(sqlite3 twitter.db "select count(*) from images where tweet_id = 1261483383483293700 and is_downloaded = 1") = "4"
 test $(sqlite3 twitter.db "select is_content_downloaded from tweets where id = 1261483383483293700") = "1"
@ -89,6 +89,11 @@ tw --profile data fetch_user elonmusk
 test $(sqlite3 data/twitter.db "select count(*) from users where handle = 'elonmusk'") = "1"
 cd data

+# Test that fetching tweets with ID only (not full URL) works
+test $(sqlite3 twitter.db "select count(*) from tweets where id = 1433713164546293767") = "0"  # Check it's not already there
+tw fetch_tweet 1433713164546293767
+test $(sqlite3 twitter.db "select count(*) from tweets where id = 1433713164546293767") = "1"  # Should be there now
+

 # Get a user's feed
 malice_id=$(sqlite3 twitter.db "select id from users where handle='michaelmalice'")
--- a/cmd/twitter/helpers.go
+++ b/cmd/twitter/helpers.go
@ -7,6 +7,7 @@ import (
 	"offline_twitter/terminal_utils"
 	"strings"
 	"strconv"
+	"regexp"
 )


@ -33,11 +34,12 @@ This application downloads tweets from twitter and saves them in a SQLite databa

    fetch_tweet
    fetch_tweet_only
-          <TARGET> is the full URL of the tweet.
+          <TARGET> is either the full URL of the tweet, or its ID.
          If using "fetch_tweet_only", then only that specific tweet will be saved.  "fetch_tweet" will save the whole thread including replies.

    download_tweet_content
-          <TARGET> is the ID of the tweet.  Downloads videos and images embedded in the tweet.
+          <TARGET> is either the full URL of the tweet, or its ID.
+          Downloads videos and images embedded in the tweet.

    get_user_tweets
    get_user_tweets_all
@ -69,14 +71,21 @@ func die(text string, display_help bool, exit_code int) {
 * returns: the id at the end of the tweet: e.g., 1395882872729477131
 */
 func extract_id_from(url string) (scraper.TweetID, error) {
-	parts := strings.Split(url, "/")
-	if len(parts) != 6 {
-		return 0, fmt.Errorf("Tweet format isn't right (%d)", len(parts))
+	var id_str string
+
+	if regexp.MustCompile("^\\d+$").MatchString(url) {
+		id_str = url
+	} else {
+		parts := strings.Split(url, "/")
+		if len(parts) != 6 {
+			return 0, fmt.Errorf("Tweet format isn't right (%d)", len(parts))
+		}
+		if parts[0] != "https:" || parts[1] != "" || parts[2] != "twitter.com" || parts[4] != "status" {
+			return 0, fmt.Errorf("Tweet format isn't right")
+		}
+		id_str = parts[5]
 	}
-	if parts[0] != "https:" || parts[1] != "" || parts[2] != "twitter.com" || parts[4] != "status" {
-		return 0, fmt.Errorf("Tweet format isn't right")
-	}
-	id, err := strconv.Atoi(parts[5])
+	id, err := strconv.Atoi(id_str)
 	if err != nil {
 		return 0, err
 	}
--- a/cmd/twitter/main.go
+++ b/cmd/twitter/main.go
@ -2,7 +2,6 @@ package main

 import (
 	"os"
-	"strconv"
 	"fmt"
 	"flag"
 	"offline_twitter/scraper"
@ -55,7 +54,7 @@ func main() {
 	case "fetch_tweet_only":
 		fetch_tweet_only(target)
 	case "fetch_tweet":
-		fetch_full_tweet(target)
+		fetch_tweet_conversation(target)
 	case "get_user_tweets":
 		fetch_user_feed(target, 50)
 	case "get_user_tweets_all":
@ -112,8 +111,8 @@ func fetch_user(handle scraper.UserHandle) {
 * args:
 * - tweet_url: e.g., "https://twitter.com/michaelmalice/status/1395882872729477131"
 */
-func fetch_tweet_only(tweet_url string) {
-	tweet_id, err := extract_id_from(tweet_url)
+func fetch_tweet_only(tweet_identifier string) {
+	tweet_id, err := extract_id_from(tweet_identifier)
 	if err != nil {
 		die(err.Error(), false, -1)
 	}
@ -140,8 +139,8 @@ func fetch_tweet_only(tweet_url string) {
 * args:
 * - tweet_url: e.g., "https://twitter.com/michaelmalice/status/1395882872729477131"
 */
-func fetch_full_tweet(tweet_url string) {
-	tweet_id, err := extract_id_from(tweet_url)
+func fetch_tweet_conversation(tweet_identifier string) {
+	tweet_id, err := extract_id_from(tweet_identifier)
 	if err != nil {
 		die(err.Error(), false, -1)
 	}
@ -216,12 +215,13 @@ func fetch_user_feed(handle string, how_many int) {
 }


-func download_tweet_content(tweet_id string) {
-	id, err := strconv.Atoi(tweet_id)
+func download_tweet_content(tweet_identifier string) {
+	tweet_id, err := extract_id_from(tweet_identifier)
 	if err != nil {
-		panic(err)
+		die(err.Error(), false, -1)
 	}
-	tweet, err := profile.GetTweetById(scraper.TweetID(id))
+
+	tweet, err := profile.GetTweetById(tweet_id)
 	if err != nil {
 		panic("Couldn't get tweet from database: " + err.Error())
 	}