Make command line accept either URL or just ID to find a tweet

This commit is contained in:
Alessio 2021-09-27 13:49:17 -07:00
parent 1dd02ff390
commit b17e0821bb
3 changed files with 34 additions and 20 deletions

View File

@ -39,7 +39,7 @@ test $(sqlite3 twitter.db "select count(*) from images where tweet_id = 12614833
test $(sqlite3 twitter.db "select count(*) from images where tweet_id = 1261483383483293700 and is_downloaded = 1") = "0" test $(sqlite3 twitter.db "select count(*) from images where tweet_id = 1261483383483293700 and is_downloaded = 1") = "0"
test $(sqlite3 twitter.db "select is_content_downloaded from tweets where id = 1261483383483293700") = "0" test $(sqlite3 twitter.db "select is_content_downloaded from tweets where id = 1261483383483293700") = "0"
test $(find images | wc -l) = "1" test $(find images | wc -l) = "1"
tw download_tweet_content 1261483383483293700 tw download_tweet_content https://twitter.com/Denlesks/status/1261483383483293700
test $(sqlite3 twitter.db "select count(*) from images where tweet_id = 1261483383483293700 and is_downloaded = 0") = "0" test $(sqlite3 twitter.db "select count(*) from images where tweet_id = 1261483383483293700 and is_downloaded = 0") = "0"
test $(sqlite3 twitter.db "select count(*) from images where tweet_id = 1261483383483293700 and is_downloaded = 1") = "4" test $(sqlite3 twitter.db "select count(*) from images where tweet_id = 1261483383483293700 and is_downloaded = 1") = "4"
test $(sqlite3 twitter.db "select is_content_downloaded from tweets where id = 1261483383483293700") = "1" test $(sqlite3 twitter.db "select is_content_downloaded from tweets where id = 1261483383483293700") = "1"
@ -89,6 +89,11 @@ tw --profile data fetch_user elonmusk
test $(sqlite3 data/twitter.db "select count(*) from users where handle = 'elonmusk'") = "1" test $(sqlite3 data/twitter.db "select count(*) from users where handle = 'elonmusk'") = "1"
cd data cd data
# Test that fetching tweets with ID only (not full URL) works
test $(sqlite3 twitter.db "select count(*) from tweets where id = 1433713164546293767") = "0" # Check it's not already there
tw fetch_tweet 1433713164546293767
test $(sqlite3 twitter.db "select count(*) from tweets where id = 1433713164546293767") = "1" # Should be there now
# Get a user's feed # Get a user's feed
malice_id=$(sqlite3 twitter.db "select id from users where handle='michaelmalice'") malice_id=$(sqlite3 twitter.db "select id from users where handle='michaelmalice'")

View File

@ -7,6 +7,7 @@ import (
"offline_twitter/terminal_utils" "offline_twitter/terminal_utils"
"strings" "strings"
"strconv" "strconv"
"regexp"
) )
@ -33,11 +34,12 @@ This application downloads tweets from twitter and saves them in a SQLite databa
fetch_tweet fetch_tweet
fetch_tweet_only fetch_tweet_only
<TARGET> is the full URL of the tweet. <TARGET> is either the full URL of the tweet, or its ID.
If using "fetch_tweet_only", then only that specific tweet will be saved. "fetch_tweet" will save the whole thread including replies. If using "fetch_tweet_only", then only that specific tweet will be saved. "fetch_tweet" will save the whole thread including replies.
download_tweet_content download_tweet_content
<TARGET> is the ID of the tweet. Downloads videos and images embedded in the tweet. <TARGET> is either the full URL of the tweet, or its ID.
Downloads videos and images embedded in the tweet.
get_user_tweets get_user_tweets
get_user_tweets_all get_user_tweets_all
@ -69,14 +71,21 @@ func die(text string, display_help bool, exit_code int) {
* returns: the id at the end of the tweet: e.g., 1395882872729477131 * returns: the id at the end of the tweet: e.g., 1395882872729477131
*/ */
func extract_id_from(url string) (scraper.TweetID, error) { func extract_id_from(url string) (scraper.TweetID, error) {
parts := strings.Split(url, "/") var id_str string
if len(parts) != 6 {
return 0, fmt.Errorf("Tweet format isn't right (%d)", len(parts)) if regexp.MustCompile("^\\d+$").MatchString(url) {
id_str = url
} else {
parts := strings.Split(url, "/")
if len(parts) != 6 {
return 0, fmt.Errorf("Tweet format isn't right (%d)", len(parts))
}
if parts[0] != "https:" || parts[1] != "" || parts[2] != "twitter.com" || parts[4] != "status" {
return 0, fmt.Errorf("Tweet format isn't right")
}
id_str = parts[5]
} }
if parts[0] != "https:" || parts[1] != "" || parts[2] != "twitter.com" || parts[4] != "status" { id, err := strconv.Atoi(id_str)
return 0, fmt.Errorf("Tweet format isn't right")
}
id, err := strconv.Atoi(parts[5])
if err != nil { if err != nil {
return 0, err return 0, err
} }

View File

@ -2,7 +2,6 @@ package main
import ( import (
"os" "os"
"strconv"
"fmt" "fmt"
"flag" "flag"
"offline_twitter/scraper" "offline_twitter/scraper"
@ -55,7 +54,7 @@ func main() {
case "fetch_tweet_only": case "fetch_tweet_only":
fetch_tweet_only(target) fetch_tweet_only(target)
case "fetch_tweet": case "fetch_tweet":
fetch_full_tweet(target) fetch_tweet_conversation(target)
case "get_user_tweets": case "get_user_tweets":
fetch_user_feed(target, 50) fetch_user_feed(target, 50)
case "get_user_tweets_all": case "get_user_tweets_all":
@ -112,8 +111,8 @@ func fetch_user(handle scraper.UserHandle) {
* args: * args:
* - tweet_url: e.g., "https://twitter.com/michaelmalice/status/1395882872729477131" * - tweet_url: e.g., "https://twitter.com/michaelmalice/status/1395882872729477131"
*/ */
func fetch_tweet_only(tweet_url string) { func fetch_tweet_only(tweet_identifier string) {
tweet_id, err := extract_id_from(tweet_url) tweet_id, err := extract_id_from(tweet_identifier)
if err != nil { if err != nil {
die(err.Error(), false, -1) die(err.Error(), false, -1)
} }
@ -140,8 +139,8 @@ func fetch_tweet_only(tweet_url string) {
* args: * args:
* - tweet_url: e.g., "https://twitter.com/michaelmalice/status/1395882872729477131" * - tweet_url: e.g., "https://twitter.com/michaelmalice/status/1395882872729477131"
*/ */
func fetch_full_tweet(tweet_url string) { func fetch_tweet_conversation(tweet_identifier string) {
tweet_id, err := extract_id_from(tweet_url) tweet_id, err := extract_id_from(tweet_identifier)
if err != nil { if err != nil {
die(err.Error(), false, -1) die(err.Error(), false, -1)
} }
@ -216,12 +215,13 @@ func fetch_user_feed(handle string, how_many int) {
} }
func download_tweet_content(tweet_id string) { func download_tweet_content(tweet_identifier string) {
id, err := strconv.Atoi(tweet_id) tweet_id, err := extract_id_from(tweet_identifier)
if err != nil { if err != nil {
panic(err) die(err.Error(), false, -1)
} }
tweet, err := profile.GetTweetById(scraper.TweetID(id))
tweet, err := profile.GetTweetById(tweet_id)
if err != nil { if err != nil {
panic("Couldn't get tweet from database: " + err.Error()) panic("Couldn't get tweet from database: " + err.Error())
} }