2021-08-22 18:43:33 -07:00

241 lines
5.2 KiB
Go

package main
import (
"os"
"strconv"
"fmt"
"flag"
"offline_twitter/scraper"
"offline_twitter/persistence"
)
/**
* Global variable referencing the open data profile
*/
var profile persistence.Profile
/**
* Main method
*/
func main() {
if len(os.Args) < 3 {
die("", true, 0)
}
profile_dir := flag.String("profile", ".", "TODO USAGE")
flag.Parse()
args := flag.Args()
if len(args) < 2 {
die("", true, 1)
}
operation := args[0]
target := args[1]
if operation == "create_profile" {
create_profile(target)
return
}
var err error
profile, err = persistence.LoadProfile(*profile_dir)
if err != nil {
die("Could not load profile: " + err.Error(), true, 2)
}
switch (operation) {
case "create_profile":
create_profile(target)
case "fetch_user":
fetch_user(scraper.UserHandle(target))
case "download_user_content":
download_user_content(scraper.UserHandle(target))
case "fetch_tweet_only":
fetch_tweet_only(target)
case "fetch_tweet":
fetch_full_tweet(target)
case "get_user_tweets":
fetch_user_feed(target, 50)
case "get_user_tweets_all":
fetch_user_feed(target, 999999999)
case "download_tweet_content":
download_tweet_content(target)
default:
die("Invalid operation: " + operation, true, 3)
}
}
/**
* Create a data directory.
*
* args:
* - target_dir: the location of the new data dir.
*/
func create_profile(target_dir string) {
_, err := persistence.NewProfile(target_dir)
if err != nil {
panic(err)
}
}
/**
* Scrape a user and save it in the database.
*
* args:
* - handle: e.g., "michaelmalice"
*/
func fetch_user(handle scraper.UserHandle) {
if profile.UserExists(handle) {
fmt.Println("User is already in database. Updating user...")
}
user, err := scraper.GetUser(handle)
if err != nil {
die(err.Error(), false, -1)
}
fmt.Println(user)
err = profile.SaveUser(user)
if err != nil {
die("Error saving user: " + err.Error(), false, 4)
}
fmt.Println("Saved the user. Exiting successfully")
}
/**
* Scrape a single tweet and save it in the database.
*
* args:
* - tweet_url: e.g., "https://twitter.com/michaelmalice/status/1395882872729477131"
*/
func fetch_tweet_only(tweet_url string) {
tweet_id, err := extract_id_from(tweet_url)
if err != nil {
die(err.Error(), false, -1)
}
if profile.IsTweetInDatabase(tweet_id) {
fmt.Println("Tweet is already in database. Updating...")
}
tweet, err := scraper.GetTweet(tweet_id)
if err != nil {
die("Error fetching tweet: " + err.Error(), false, -1)
}
fmt.Println(tweet)
err = profile.SaveTweet(tweet)
if err != nil {
die("Error saving tweet: " + err.Error(), false, 4)
}
fmt.Println("Saved the tweet. Exiting successfully")
}
/**
* Scrape a tweet and all associated info, and save it in the database.
*
* args:
* - tweet_url: e.g., "https://twitter.com/michaelmalice/status/1395882872729477131"
*/
func fetch_full_tweet(tweet_url string) {
tweet_id, err := extract_id_from(tweet_url)
if err != nil {
die(err.Error(), false, -1)
}
if profile.IsTweetInDatabase(tweet_id) {
fmt.Println("Tweet is already in database. Updating...")
}
tweets, _, users, err := scraper.GetTweetFull(tweet_id)
if err != nil {
die(err.Error(), false, -1)
}
for _, u := range users {
fmt.Println(u)
err = profile.SaveUser(u)
if err != nil {
die("Error saving tweet: " + err.Error(), false, 4)
}
}
for _, t := range tweets {
fmt.Println(t)
err = profile.SaveTweet(t)
if err != nil {
die("Error saving tweet: " + err.Error(), false, 4)
}
}
fmt.Printf("Saved %d tweets and %d users. Exiting successfully\n", len(tweets), len(users))
}
/**
* Scrape a user feed and get a big blob of tweets and retweets. Get 50 tweets.
*
* args:
* - handle: the user handle to get
*/
func fetch_user_feed(handle string, how_many int) {
user, err := profile.GetUserByHandle(scraper.UserHandle(handle))
if err != nil {
die(err.Error(), false, -1)
}
tweets, retweets, users, err := scraper.GetUserFeedFor(user.ID, how_many);
if err != nil {
die("Error scraping feed: " + err.Error(), false, -2)
}
for _, u := range users {
err = profile.SaveUser(u)
if err != nil {
die("Error saving tweet: " + err.Error(), false, 4)
}
}
for _, t := range tweets {
err = profile.SaveTweet(t)
if err != nil {
die("Error saving tweet: " + err.Error(), false, 4)
}
}
for _, r := range retweets {
err = profile.SaveRetweet(r)
if err != nil {
die("Error saving retweet: " + err.Error(), false, 4)
}
}
fmt.Printf("Saved %d tweets, %d retweets and %d users. Exiting successfully\n", len(tweets), len(retweets), len(users))
}
func download_tweet_content(tweet_id string) {
id, err := strconv.Atoi(tweet_id)
if err != nil {
panic(err)
}
tweet, err := profile.GetTweetById(scraper.TweetID(id))
if err != nil {
panic("Couldn't get tweet from database: " + err.Error())
}
err = profile.DownloadTweetContentFor(&tweet)
if err != nil {
panic("Error getting content: " + err.Error())
}
}
func download_user_content(handle scraper.UserHandle) {
user, err := profile.GetUserByHandle(handle)
if err != nil {
panic("Couldn't get the user from database: " + err.Error())
}
err = profile.DownloadUserContentFor(&user)
if err != nil {
panic("Error getting content: " + err.Error())
}
}