Enable 'fetch_tweet' full scrape

This commit is contained in:
Alessio 2021-08-22 17:55:21 -07:00
parent ea1264ddbd
commit a2554e9293
3 changed files with 53 additions and 3 deletions

View File

@ -22,6 +22,7 @@ test $(sqlite3 twitter.db "select count(*) from users") = "1"
tw fetch_user Denlesks tw fetch_user Denlesks
test $(sqlite3 twitter.db "select count(*) from users") = "1" test $(sqlite3 twitter.db "select count(*) from users") = "1"
# Fetch a tweet with images # Fetch a tweet with images
tw fetch_tweet_only https://twitter.com/Denlesks/status/1261483383483293700 tw fetch_tweet_only https://twitter.com/Denlesks/status/1261483383483293700
test $(sqlite3 twitter.db "select count(*) from tweets") = "1" test $(sqlite3 twitter.db "select count(*) from tweets") = "1"
@ -77,10 +78,18 @@ test -f profile_images/DiamondChariots_profile_rE4OTedS.jpg
test -f profile_images/DiamondChariots_banner_1615811094.jpg test -f profile_images/DiamondChariots_banner_1615811094.jpg
# Download a full thread
tw fetch_tweet https://twitter.com/RememberAfghan1/status/1429585423702052867
test $(sqlite3 twitter.db "select handle from tweets join users on tweets.user_id = users.id where tweets.id=1429585423702052867") = "RememberAfghan1"
test $(sqlite3 twitter.db "select handle from tweets join users on tweets.user_id = users.id where tweets.id=1429584239570391042") = "michaelmalice"
test $(sqlite3 twitter.db "select handle from tweets join users on tweets.user_id = users.id where tweets.id=1429583672827465730") = "kanesays23"
test $(sqlite3 twitter.db "select handle from tweets join users on tweets.user_id = users.id where tweets.id=1429587734054703113") = "TerraDionysus"
# Test that the `--profile` flag works # Test that the `--profile` flag works
cd .. cd ..
tw --profile data fetch_user michaelmalice tw --profile data fetch_user elonmusk
test $(sqlite3 data/twitter.db "select count(*) from users where handle = 'michaelmalice'") = "1" test $(sqlite3 data/twitter.db "select count(*) from users where handle = 'elonmusk'") = "1"
cd data cd data

View File

@ -54,6 +54,8 @@ func main() {
fetch_user(scraper.UserHandle(target)) fetch_user(scraper.UserHandle(target))
case "fetch_tweet_only": case "fetch_tweet_only":
fetch_tweet_only(target) fetch_tweet_only(target)
case "fetch_tweet":
fetch_full_tweet(target)
case "get_user_tweets": case "get_user_tweets":
fetch_user_feed(target) fetch_user_feed(target)
case "download_tweet_content": case "download_tweet_content":
@ -129,6 +131,45 @@ func fetch_tweet_only(tweet_url string) {
fmt.Println("Saved the tweet. Exiting successfully") fmt.Println("Saved the tweet. Exiting successfully")
} }
/**
* Scrape a tweet and all associated info, and save it in the database.
*
* args:
* - tweet_url: e.g., "https://twitter.com/michaelmalice/status/1395882872729477131"
*/
func fetch_full_tweet(tweet_url string) {
tweet_id, err := extract_id_from(tweet_url)
if err != nil {
die(err.Error(), false, -1)
}
if profile.IsTweetInDatabase(tweet_id) {
fmt.Println("Tweet is already in database. Updating...")
}
tweets, _, users, err := scraper.GetTweetFull(tweet_id)
if err != nil {
die(err.Error(), false, -1)
}
for _, u := range users {
fmt.Println(u)
err = profile.SaveUser(u)
if err != nil {
die("Error saving tweet: " + err.Error(), false, 4)
}
}
for _, t := range tweets {
fmt.Println(t)
err = profile.SaveTweet(t)
if err != nil {
die("Error saving tweet: " + err.Error(), false, 4)
}
}
fmt.Printf("Saved %d tweets and %d users. Exiting successfully\n", len(tweets), len(users))
}
/** /**
* Scrape a user feed and get a big blob of tweets and retweets. Get 50 tweets. * Scrape a user feed and get a big blob of tweets and retweets. Get 50 tweets.
* *

View File

@ -145,7 +145,7 @@ type UserResponse struct {
} `json:"data"` } `json:"data"`
Errors []struct { Errors []struct {
Message string `json:"message"` Message string `json:"message"`
Code int `json:"code,string"` Code int `json:"code"`
} `json:"errors"` } `json:"errors"`
} }
func (u UserResponse) ConvertToAPIUser() APIUser { func (u UserResponse) ConvertToAPIUser() APIUser {