Make fetching very aggressive by default
This commit is contained in:
parent
66daacec34
commit
c219652dcc
@ -137,8 +137,9 @@ test $(sqlite3 twitter.db "select is_private from users where handle = 'HbdNrx'"
|
|||||||
|
|
||||||
|
|
||||||
# Test tweets with URLs
|
# Test tweets with URLs
|
||||||
|
tw fetch_user CovfefeAnon
|
||||||
urls_count=$(sqlite3 twitter.db "select count(*) from urls")
|
urls_count=$(sqlite3 twitter.db "select count(*) from urls")
|
||||||
tw fetch_tweet https://twitter.com/CovfefeAnon/status/1428904664645394433
|
tw fetch_tweet_only https://twitter.com/CovfefeAnon/status/1428904664645394433
|
||||||
urls_count_after=$(sqlite3 twitter.db "select count(*) from urls")
|
urls_count_after=$(sqlite3 twitter.db "select count(*) from urls")
|
||||||
test $urls_count_after = $(($urls_count + 1))
|
test $urls_count_after = $(($urls_count + 1))
|
||||||
test "$(sqlite3 twitter.db "select title from urls where tweet_id = 1428904664645394433")" = "Justice Department investigating Elon Musk's SpaceX following complaint of hiring discrimination"
|
test "$(sqlite3 twitter.db "select title from urls where tweet_id = 1428904664645394433")" = "Justice Department investigating Elon Musk's SpaceX following complaint of hiring discrimination"
|
||||||
@ -147,7 +148,7 @@ thumbnail_name=$(sqlite3 twitter.db "select thumbnail_remote_url from urls where
|
|||||||
test -n "$thumbnail_name" # Not testing for what the thumbnail url is because it keeps changing
|
test -n "$thumbnail_name" # Not testing for what the thumbnail url is because it keeps changing
|
||||||
|
|
||||||
# Try to double-fetch it; shouldn't duplicate the URL
|
# Try to double-fetch it; shouldn't duplicate the URL
|
||||||
tw fetch_tweet https://twitter.com/CovfefeAnon/status/1428904664645394433
|
tw fetch_tweet_only https://twitter.com/CovfefeAnon/status/1428904664645394433
|
||||||
urls_count_after_2x=$(sqlite3 twitter.db "select count(*) from urls")
|
urls_count_after_2x=$(sqlite3 twitter.db "select count(*) from urls")
|
||||||
test $urls_count_after_2x = $urls_count_after
|
test $urls_count_after_2x = $urls_count_after
|
||||||
|
|
||||||
@ -163,7 +164,8 @@ test -f link_preview_images/${thumbnail_name}_800x320_1.jpg
|
|||||||
|
|
||||||
|
|
||||||
# Test a tweet with a URL but no thumbnail
|
# Test a tweet with a URL but no thumbnail
|
||||||
tw fetch_tweet https://twitter.com/Xirong7/status/1413665734866186243
|
tw fetch_user Xirong7
|
||||||
|
tw fetch_tweet_only https://twitter.com/Xirong7/status/1413665734866186243
|
||||||
test $(sqlite3 twitter.db "select is_content_downloaded from urls where tweet_id = 1413665734866186243") = "0"
|
test $(sqlite3 twitter.db "select is_content_downloaded from urls where tweet_id = 1413665734866186243") = "0"
|
||||||
test $(sqlite3 twitter.db "select has_thumbnail from urls where tweet_id = 1413665734866186243") = "0"
|
test $(sqlite3 twitter.db "select has_thumbnail from urls where tweet_id = 1413665734866186243") = "0"
|
||||||
initial_link_preview_images_count=$(find link_preview_images | wc -l) # Check that it doesn't change, since there's no thumbnail
|
initial_link_preview_images_count=$(find link_preview_images | wc -l) # Check that it doesn't change, since there's no thumbnail
|
||||||
|
@ -155,19 +155,26 @@ func fetch_tweet_conversation(tweet_identifier string) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, u := range users {
|
for _, u := range users {
|
||||||
fmt.Println(u)
|
// fmt.Println(u)
|
||||||
err = profile.SaveUser(u)
|
err = profile.SaveUser(u)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
die("Error saving tweet: " + err.Error(), false, 4)
|
die("Error saving user: " + err.Error(), false, 4)
|
||||||
|
}
|
||||||
|
err = profile.DownloadUserContentFor(&u)
|
||||||
|
if err != nil {
|
||||||
|
die("Error getting user content: " + err.Error(), false, 10)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, t := range tweets {
|
for _, t := range tweets {
|
||||||
fmt.Println(t)
|
|
||||||
err = profile.SaveTweet(t)
|
err = profile.SaveTweet(t)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
die("Error saving tweet: " + err.Error(), false, 4)
|
die("Error saving tweet: " + err.Error(), false, 4)
|
||||||
}
|
}
|
||||||
|
err = profile.DownloadTweetContentFor(&t)
|
||||||
|
if err != nil {
|
||||||
|
die("Error getting tweet content: " + err.Error(), false, 11)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
fmt.Printf("Saved %d tweets and %d users. Exiting successfully\n", len(tweets), len(users))
|
fmt.Printf("Saved %d tweets and %d users. Exiting successfully\n", len(tweets), len(users))
|
||||||
}
|
}
|
||||||
@ -193,7 +200,11 @@ func fetch_user_feed(handle string, how_many int) {
|
|||||||
for _, u := range users {
|
for _, u := range users {
|
||||||
err = profile.SaveUser(u)
|
err = profile.SaveUser(u)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
die("Error saving tweet: " + err.Error(), false, 4)
|
die("Error saving user: " + err.Error(), false, 4)
|
||||||
|
}
|
||||||
|
err = profile.DownloadUserContentFor(&u)
|
||||||
|
if err != nil {
|
||||||
|
die("Error getting user content: " + err.Error(), false, 10)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -202,6 +213,10 @@ func fetch_user_feed(handle string, how_many int) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
die("Error saving tweet: " + err.Error(), false, 4)
|
die("Error saving tweet: " + err.Error(), false, 4)
|
||||||
}
|
}
|
||||||
|
err = profile.DownloadTweetContentFor(&t)
|
||||||
|
if err != nil {
|
||||||
|
die("Error getting tweet content: " + err.Error(), false, 11)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, r := range retweets {
|
for _, r := range retweets {
|
||||||
|
@ -6,6 +6,7 @@ import (
|
|||||||
"path"
|
"path"
|
||||||
"net/http"
|
"net/http"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
|
"strings"
|
||||||
|
|
||||||
"offline_twitter/scraper"
|
"offline_twitter/scraper"
|
||||||
)
|
)
|
||||||
@ -150,6 +151,10 @@ func (p Profile) DownloadUserContentWithInjector(u *scraper.User, downloader Med
|
|||||||
outfile = path.Join(p.ProfileDir, "profile_images", u.BannerImageLocalPath)
|
outfile = path.Join(p.ProfileDir, "profile_images", u.BannerImageLocalPath)
|
||||||
err = downloader.Curl(u.BannerImageUrl, outfile)
|
err = downloader.Curl(u.BannerImageUrl, outfile)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
if strings.Contains(err.Error(), "404 Not Found") {
|
||||||
|
// Try adding "600x200". Not sure why this does this but sometimes it does.
|
||||||
|
err = downloader.Curl(u.BannerImageUrl + "/600x200", outfile)
|
||||||
|
}
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user