Enable 'fetch_tweet' full scrape
This commit is contained in:
parent
ea1264ddbd
commit
a2554e9293
13
cmd/tests.sh
13
cmd/tests.sh
@ -22,6 +22,7 @@ test $(sqlite3 twitter.db "select count(*) from users") = "1"
|
||||
tw fetch_user Denlesks
|
||||
test $(sqlite3 twitter.db "select count(*) from users") = "1"
|
||||
|
||||
|
||||
# Fetch a tweet with images
|
||||
tw fetch_tweet_only https://twitter.com/Denlesks/status/1261483383483293700
|
||||
test $(sqlite3 twitter.db "select count(*) from tweets") = "1"
|
||||
@ -77,10 +78,18 @@ test -f profile_images/DiamondChariots_profile_rE4OTedS.jpg
|
||||
test -f profile_images/DiamondChariots_banner_1615811094.jpg
|
||||
|
||||
|
||||
# Download a full thread
|
||||
tw fetch_tweet https://twitter.com/RememberAfghan1/status/1429585423702052867
|
||||
test $(sqlite3 twitter.db "select handle from tweets join users on tweets.user_id = users.id where tweets.id=1429585423702052867") = "RememberAfghan1"
|
||||
test $(sqlite3 twitter.db "select handle from tweets join users on tweets.user_id = users.id where tweets.id=1429584239570391042") = "michaelmalice"
|
||||
test $(sqlite3 twitter.db "select handle from tweets join users on tweets.user_id = users.id where tweets.id=1429583672827465730") = "kanesays23"
|
||||
test $(sqlite3 twitter.db "select handle from tweets join users on tweets.user_id = users.id where tweets.id=1429587734054703113") = "TerraDionysus"
|
||||
|
||||
|
||||
# Test that the `--profile` flag works
|
||||
cd ..
|
||||
tw --profile data fetch_user michaelmalice
|
||||
test $(sqlite3 data/twitter.db "select count(*) from users where handle = 'michaelmalice'") = "1"
|
||||
tw --profile data fetch_user elonmusk
|
||||
test $(sqlite3 data/twitter.db "select count(*) from users where handle = 'elonmusk'") = "1"
|
||||
cd data
|
||||
|
||||
|
||||
|
@ -54,6 +54,8 @@ func main() {
|
||||
fetch_user(scraper.UserHandle(target))
|
||||
case "fetch_tweet_only":
|
||||
fetch_tweet_only(target)
|
||||
case "fetch_tweet":
|
||||
fetch_full_tweet(target)
|
||||
case "get_user_tweets":
|
||||
fetch_user_feed(target)
|
||||
case "download_tweet_content":
|
||||
@ -129,6 +131,45 @@ func fetch_tweet_only(tweet_url string) {
|
||||
fmt.Println("Saved the tweet. Exiting successfully")
|
||||
}
|
||||
|
||||
/**
|
||||
* Scrape a tweet and all associated info, and save it in the database.
|
||||
*
|
||||
* args:
|
||||
* - tweet_url: e.g., "https://twitter.com/michaelmalice/status/1395882872729477131"
|
||||
*/
|
||||
func fetch_full_tweet(tweet_url string) {
|
||||
tweet_id, err := extract_id_from(tweet_url)
|
||||
if err != nil {
|
||||
die(err.Error(), false, -1)
|
||||
}
|
||||
|
||||
if profile.IsTweetInDatabase(tweet_id) {
|
||||
fmt.Println("Tweet is already in database. Updating...")
|
||||
}
|
||||
|
||||
tweets, _, users, err := scraper.GetTweetFull(tweet_id)
|
||||
if err != nil {
|
||||
die(err.Error(), false, -1)
|
||||
}
|
||||
|
||||
for _, u := range users {
|
||||
fmt.Println(u)
|
||||
err = profile.SaveUser(u)
|
||||
if err != nil {
|
||||
die("Error saving tweet: " + err.Error(), false, 4)
|
||||
}
|
||||
}
|
||||
|
||||
for _, t := range tweets {
|
||||
fmt.Println(t)
|
||||
err = profile.SaveTweet(t)
|
||||
if err != nil {
|
||||
die("Error saving tweet: " + err.Error(), false, 4)
|
||||
}
|
||||
}
|
||||
fmt.Printf("Saved %d tweets and %d users. Exiting successfully\n", len(tweets), len(users))
|
||||
}
|
||||
|
||||
/**
|
||||
* Scrape a user feed and get a big blob of tweets and retweets. Get 50 tweets.
|
||||
*
|
||||
|
@ -145,7 +145,7 @@ type UserResponse struct {
|
||||
} `json:"data"`
|
||||
Errors []struct {
|
||||
Message string `json:"message"`
|
||||
Code int `json:"code,string"`
|
||||
Code int `json:"code"`
|
||||
} `json:"errors"`
|
||||
}
|
||||
func (u UserResponse) ConvertToAPIUser() APIUser {
|
||||
|
Loading…
x
Reference in New Issue
Block a user