Add twitter command download_tweet_content

This commit is contained in:
Alessio 2021-08-07 17:06:39 -07:00
parent 9734c09426
commit 3de47f6bde
6 changed files with 68 additions and 25 deletions

View File

@ -69,17 +69,3 @@ tasks:
duration=$SECONDS
echo "Task completed in $(($duration / 60))m$(($duration % 60))s."
- compile: |
SECONDS=0
cd twitter_offline_engine/cmd
go build -o ../../twitter ./twitter
cd ../..
chmod +x twitter
sudo mv twitter /usr/local/bin
which twitter
duration=$SECONDS
echo "Task completed in $(($duration / 60))m$(($duration % 60))s."

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
coverage.out

2
cmd/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
data
tw

7
cmd/compile.sh Executable file
View File

@ -0,0 +1,7 @@
#!/bin/bash
set -x
set -e
go build -o tw ./twitter
chmod +x tw

View File

@ -5,36 +5,64 @@ set -x
PS4='+(${BASH_SOURCE}:${LINENO}): '
./compile.sh
test -e data && rm -r data
go run ./twitter create_profile data
./tw create_profile data
# Fetch a user
go run ./twitter fetch_user data Denlesks
./tw fetch_user data Denlesks
test $(sqlite3 data/twitter.db "select handle from users") = "Denlesks"
test $(sqlite3 data/twitter.db "select count(*) from users") = "1"
go run ./twitter fetch_user data Denlesks
./tw fetch_user data Denlesks
test $(sqlite3 data/twitter.db "select count(*) from users") = "1"
# Fetch a tweet with images
go run ./twitter fetch_tweet_only data https://twitter.com/Denlesks/status/1261483383483293700
./tw fetch_tweet_only data https://twitter.com/Denlesks/status/1261483383483293700
test $(sqlite3 data/twitter.db "select count(*) from tweets") = "1"
test "$(sqlite3 data/twitter.db "select text from tweets")" = "These are public health officials who are making decisions about your lifestyle because they know more about health, fitness and well-being than you do"
test $(sqlite3 data/twitter.db "select count(*) from images") = "4"
# Download its images
test $(sqlite3 data/twitter.db "select count(*) from images where tweet_id = 1261483383483293700 and is_downloaded = 0") = "4"
test $(sqlite3 data/twitter.db "select count(*) from images where tweet_id = 1261483383483293700 and is_downloaded = 1") = "0"
test $(sqlite3 data/twitter.db "select is_content_downloaded from tweets where id = 1261483383483293700") = "0"
test $(find data/images | wc -l) = "1"
./tw download_tweet_content data 1261483383483293700
test $(sqlite3 data/twitter.db "select count(*) from images where tweet_id = 1261483383483293700 and is_downloaded = 0") = "0"
test $(sqlite3 data/twitter.db "select count(*) from images where tweet_id = 1261483383483293700 and is_downloaded = 1") = "4"
test $(sqlite3 data/twitter.db "select is_content_downloaded from tweets where id = 1261483383483293700") = "1"
test $(find data/images | wc -l) = "5"
# Try to double-download it
go run ./twitter fetch_tweet_only data https://twitter.com/Denlesks/status/1261483383483293700
./tw fetch_tweet_only data https://twitter.com/Denlesks/status/1261483383483293700
test $(sqlite3 data/twitter.db "select count(*) from tweets") = "1"
test $(sqlite3 data/twitter.db "select count(*) from images") = "4"
# Fetch a tweet with a video
go run ./twitter fetch_user data DiamondChariots
./tw fetch_user data DiamondChariots
test $(sqlite3 data/twitter.db "select handle from users" | wc -l) = "2"
go run ./twitter fetch_tweet_only data https://twitter.com/DiamondChariots/status/1418971605674467340
test $(sqlite3 data/twitter.db "select count(*) from tweets") = "2"
# Try to double-download it
go run ./twitter fetch_tweet_only data https://twitter.com/DiamondChariots/status/1418971605674467340
./tw fetch_tweet_only data https://twitter.com/DiamondChariots/status/1418971605674467340
test $(sqlite3 data/twitter.db "select count(*) from tweets") = "2"
test $(sqlite3 data/twitter.db "select count(*) from videos") = "1"
# Download the video
test $(sqlite3 data/twitter.db "select count(*) from videos where tweet_id = 1418971605674467340 and is_downloaded = 0") = "1"
test $(sqlite3 data/twitter.db "select count(*) from videos where tweet_id = 1418971605674467340 and is_downloaded = 1") = "0"
test $(sqlite3 data/twitter.db "select is_content_downloaded from tweets where id = 1418971605674467340") = "0"
test $(find data/videos| wc -l) = "1"
./tw download_tweet_content data 1418971605674467340
test $(sqlite3 data/twitter.db "select count(*) from videos where tweet_id = 1418971605674467340 and is_downloaded = 0") = "0"
test $(sqlite3 data/twitter.db "select count(*) from videos where tweet_id = 1418971605674467340 and is_downloaded = 1") = "1"
test $(sqlite3 data/twitter.db "select is_content_downloaded from tweets where id = 1418971605674467340") = "1"
test $(find data/videos | wc -l) = "2"
# Try to double-download it
./tw fetch_tweet_only data https://twitter.com/DiamondChariots/status/1418971605674467340
test $(sqlite3 data/twitter.db "select count(*) from tweets") = "2"
test $(sqlite3 data/twitter.db "select count(*) from videos") = "1"
echo -e "\033[32mAll tests passed. Finished successfully.\033[0m"

View File

@ -2,6 +2,7 @@ package main
import (
"os"
"strconv"
"fmt"
"offline_twitter/scraper"
"offline_twitter/persistence"
@ -48,6 +49,8 @@ func main() {
fetch_user(scraper.UserHandle(target))
case "fetch_tweet_only":
fetch_tweet_only(target)
case "download_tweet_content":
download_tweet_content(target)
default:
die("Invalid operation: " + operation, true, 3)
}
@ -116,3 +119,19 @@ func fetch_tweet_only(tweet_url string) {
}
fmt.Println("Saved the tweet. Exiting successfully")
}
func download_tweet_content(tweet_id string) {
id, err := strconv.Atoi(tweet_id)
if err != nil {
panic(err)
}
tweet, err := profile.GetTweetById(scraper.TweetID(id))
if err != nil {
panic("Couldn't get tweet from database: " + err.Error())
}
err = profile.DownloadTweetContentFor(&tweet)
if err != nil {
panic("Error getting content: " + err.Error())
}
}