diff --git a/cmd/twitter/helpers.go b/cmd/twitter/helpers.go index c2eadf5..d5f213a 100644 --- a/cmd/twitter/helpers.go +++ b/cmd/twitter/helpers.go @@ -3,14 +3,14 @@ package main import ( "fmt" "os" + "regexp" + "strconv" + "strings" + "offline_twitter/scraper" "offline_twitter/terminal_utils" - "strings" - "strconv" - "regexp" ) - /** * Help message to print if command syntax is incorrect */ @@ -73,13 +73,13 @@ This application downloads tweets from twitter and saves them in a SQLite databa won't count toward the limit. ` - /** * Helper function */ func die(text string, display_help bool, exit_code int) { if text != "" { - fmt.Fprint(os.Stderr, terminal_utils.COLOR_RED + text + terminal_utils.COLOR_RESET + "\n") + outstring := terminal_utils.COLOR_RED + text + terminal_utils.COLOR_RESET + "\n" + fmt.Fprint(os.Stderr, outstring) } if display_help { fmt.Fprint(os.Stderr, help_message) @@ -91,8 +91,8 @@ func die(text string, display_help bool, exit_code int) { * Print a happy exit message and exit */ func happy_exit(text string) { - fmt.Printf(terminal_utils.COLOR_GREEN + text + terminal_utils.COLOR_RESET + "\n") - fmt.Printf(terminal_utils.COLOR_GREEN + "Exiting successfully." + terminal_utils.COLOR_RESET + "\n") + fmt.Printf(terminal_utils.COLOR_GREEN + text + terminal_utils.COLOR_RESET + "\n") + fmt.Printf(terminal_utils.COLOR_GREEN + "Exiting successfully." + terminal_utils.COLOR_RESET + "\n") } /** diff --git a/cmd/twitter/main.go b/cmd/twitter/main.go index 9d1e64d..3c2565d 100644 --- a/cmd/twitter/main.go +++ b/cmd/twitter/main.go @@ -1,14 +1,14 @@ package main import ( - "os" - "fmt" "flag" + "fmt" + "os" log "github.com/sirupsen/logrus" - "offline_twitter/scraper" "offline_twitter/persistence" + "offline_twitter/scraper" ) /** @@ -87,10 +87,10 @@ func main() { profile, err = persistence.LoadProfile(*profile_dir) if err != nil { - die("Could not load profile: " + err.Error(), true, 2) + die(fmt.Sprintf("Could not load profile: %s", err.Error()), true, 2) } - switch (operation) { + switch operation { case "create_profile": create_profile(target) case "fetch_user": @@ -116,7 +116,7 @@ func main() { case "list_followed": list_followed() default: - die("Invalid operation: " + operation, true, 3) + die(fmt.Sprintf("Invalid operation: %s", operation), true, 3) } } @@ -148,10 +148,10 @@ func fetch_user(handle scraper.UserHandle) { err = profile.SaveUser(&user) if err != nil { - die("Error saving user: " + err.Error(), false, 4) + die(fmt.Sprintf("Error saving user: %s", err.Error()), false, 4) } - download_user_content(handle); + download_user_content(handle) happy_exit("Saved the user") } @@ -169,13 +169,13 @@ func fetch_tweet_only(tweet_identifier string) { tweet, err := scraper.GetTweet(tweet_id) if err != nil { - die("Error fetching tweet: " + err.Error(), false, -1) + die(fmt.Sprintf("Error fetching tweet: %s", err.Error()), false, -1) } log.Debug(tweet) err = profile.SaveTweet(tweet) if err != nil { - die("Error saving tweet: " + err.Error(), false, 4) + die(fmt.Sprintf("Error saving tweet: %s", err.Error()), false, 4) } happy_exit("Saved the tweet") } @@ -222,7 +222,6 @@ func fetch_user_feed(handle string, how_many int) { happy_exit(fmt.Sprintf("Saved %d tweets, %d retweets and %d users", len(trove.Tweets), len(trove.Retweets), len(trove.Users))) } - func download_tweet_content(tweet_identifier string) { tweet_id, err := extract_id_from(tweet_identifier) if err != nil { @@ -253,7 +252,7 @@ func download_user_content(handle scraper.UserHandle) { func search(query string) { trove, err := scraper.Search(query, 1000) if err != nil { - die("Error scraping search results: " + err.Error(), false, -100) + die(fmt.Sprintf("Error scraping search results: %s", err.Error()), false, -100) } profile.SaveTweetTrove(trove) diff --git a/persistence/media_download.go b/persistence/media_download.go index a69b6d6..8fdc595 100644 --- a/persistence/media_download.go +++ b/persistence/media_download.go @@ -1,21 +1,21 @@ package persistence import ( - "fmt" - "os" - "path" - "net/http" - "io/ioutil" - "strings" + "fmt" + "io/ioutil" + "net/http" + "os" + "path" + "strings" - "offline_twitter/scraper" + "offline_twitter/scraper" ) type MediaDownloader interface { - Curl(url string, outpath string) error + Curl(url string, outpath string) error } -type DefaultDownloader struct {} +type DefaultDownloader struct{} /** * Download a file over HTTP and save it. @@ -25,77 +25,75 @@ type DefaultDownloader struct {} * - outpath: the path on disk to save it to */ func (d DefaultDownloader) Curl(url string, outpath string) error { - println(url) - resp, err := http.Get(url) - if err != nil { - return err - } - if resp.StatusCode != 200 { - return fmt.Errorf("Error %s: %s", url, resp.Status) - } + println(url) + resp, err := http.Get(url) + if err != nil { + return err + } + if resp.StatusCode != 200 { + return fmt.Errorf("Error %s: %s", url, resp.Status) + } - data, err := ioutil.ReadAll(resp.Body) - if err != nil { - return fmt.Errorf("Error downloading image %s: %s", url, err.Error()) - } + data, err := ioutil.ReadAll(resp.Body) + if err != nil { + return fmt.Errorf("Error downloading image %s: %s", url, err.Error()) + } - err = os.WriteFile(outpath, data, 0644) - if err != nil { - return fmt.Errorf("Error writing to path: %s, url: %s: %s", outpath, url, err.Error()) - } - return nil + err = os.WriteFile(outpath, data, 0644) + if err != nil { + return fmt.Errorf("Error writing to path: %s, url: %s: %s", outpath, url, err.Error()) + } + return nil } - /** * Downloads an Image, and if successful, marks it as downloaded in the DB */ func (p Profile) download_tweet_image(img *scraper.Image, downloader MediaDownloader) error { - outfile := path.Join(p.ProfileDir, "images", img.LocalFilename) - err := downloader.Curl(img.RemoteURL, outfile) - if err != nil { - return err - } - img.IsDownloaded = true - return p.SaveImage(*img) + outfile := path.Join(p.ProfileDir, "images", img.LocalFilename) + err := downloader.Curl(img.RemoteURL, outfile) + if err != nil { + return err + } + img.IsDownloaded = true + return p.SaveImage(*img) } - /** * Downloads a Video and its thumbnail, and if successful, marks it as downloaded in the DB */ func (p Profile) download_tweet_video(v *scraper.Video, downloader MediaDownloader) error { - // Download the video - outfile := path.Join(p.ProfileDir, "videos", v.LocalFilename) - err := downloader.Curl(v.RemoteURL, outfile) - if err != nil { - return err - } + // Download the video + outfile := path.Join(p.ProfileDir, "videos", v.LocalFilename) + err := downloader.Curl(v.RemoteURL, outfile) + if err != nil { + return err + } - // Download the thumbnail - outfile = path.Join(p.ProfileDir, "video_thumbnails", v.ThumbnailLocalPath) - err = downloader.Curl(v.ThumbnailRemoteUrl, outfile) - if err != nil { - return err - } + // Download the thumbnail + outfile = path.Join(p.ProfileDir, "video_thumbnails", v.ThumbnailLocalPath) + err = downloader.Curl(v.ThumbnailRemoteUrl, outfile) + if err != nil { + return err + } - v.IsDownloaded = true - return p.SaveVideo(*v) + v.IsDownloaded = true + return p.SaveVideo(*v) } /** * Downloads an URL thumbnail image, and if successful, marks it as downloaded in the DB */ func (p Profile) download_link_thumbnail(url *scraper.Url, downloader MediaDownloader) error { - if url.HasCard && url.HasThumbnail { - outfile := path.Join(p.ProfileDir, "link_preview_images", url.ThumbnailLocalPath) - err := downloader.Curl(url.ThumbnailRemoteUrl, outfile) - if err != nil { - return err - } - } - url.IsContentDownloaded = true - return p.SaveUrl(*url) + if url.HasCard && url.HasThumbnail { + outfile := path.Join(p.ProfileDir, "link_preview_images", url.ThumbnailLocalPath) + err := downloader.Curl(url.ThumbnailRemoteUrl, outfile) + if err != nil { + return err + } + } + url.IsContentDownloaded = true + return p.SaveUrl(*url) } /** @@ -104,90 +102,89 @@ func (p Profile) download_link_thumbnail(url *scraper.Url, downloader MediaDownl * Wraps the `DownloadTweetContentWithInjector` method with the default (i.e., real) downloader. */ func (p Profile) DownloadTweetContentFor(t *scraper.Tweet) error { - return p.DownloadTweetContentWithInjector(t, DefaultDownloader{}) + return p.DownloadTweetContentWithInjector(t, DefaultDownloader{}) } - /** * Enable injecting a custom MediaDownloader (i.e., for testing) */ func (p Profile) DownloadTweetContentWithInjector(t *scraper.Tweet, downloader MediaDownloader) error { - // Check if content needs to be downloaded; if not, just return - if !p.CheckTweetContentDownloadNeeded(*t) { - return nil - } + // Check if content needs to be downloaded; if not, just return + if !p.CheckTweetContentDownloadNeeded(*t) { + return nil + } - for i := range t.Images { - err := p.download_tweet_image(&t.Images[i], downloader) - if err != nil { - return err - } - } + for i := range t.Images { + err := p.download_tweet_image(&t.Images[i], downloader) + if err != nil { + return err + } + } - for i := range t.Videos { - err := p.download_tweet_video(&t.Videos[i], downloader) - if err != nil { - return err - } - } + for i := range t.Videos { + err := p.download_tweet_video(&t.Videos[i], downloader) + if err != nil { + return err + } + } - for i := range t.Urls { - err := p.download_link_thumbnail(&t.Urls[i], downloader) - if err != nil { - return err - } - } - t.IsContentDownloaded = true - return p.SaveTweet(*t) + for i := range t.Urls { + err := p.download_link_thumbnail(&t.Urls[i], downloader) + if err != nil { + return err + } + } + t.IsContentDownloaded = true + return p.SaveTweet(*t) } /** * Download a user's banner and profile images */ func (p Profile) DownloadUserContentFor(u *scraper.User) error { - return p.DownloadUserContentWithInjector(u, DefaultDownloader{}) + return p.DownloadUserContentWithInjector(u, DefaultDownloader{}) } /** * Enable injecting a custom MediaDownloader (i.e., for testing) */ func (p Profile) DownloadUserContentWithInjector(u *scraper.User, downloader MediaDownloader) error { - if !p.CheckUserContentDownloadNeeded(*u) { - return nil - } + if !p.CheckUserContentDownloadNeeded(*u) { + return nil + } - var outfile string - var target_url string + var outfile string + var target_url string - if u.ProfileImageUrl == "" { - outfile = path.Join(p.ProfileDir, "profile_images", path.Base(scraper.DEFAULT_PROFILE_IMAGE_URL)) - target_url = scraper.DEFAULT_PROFILE_IMAGE_URL - } else { - outfile = path.Join(p.ProfileDir, "profile_images", u.ProfileImageLocalPath) - target_url = u.ProfileImageUrl - } + if u.ProfileImageUrl == "" { + outfile = path.Join(p.ProfileDir, "profile_images", path.Base(scraper.DEFAULT_PROFILE_IMAGE_URL)) + target_url = scraper.DEFAULT_PROFILE_IMAGE_URL + } else { + outfile = path.Join(p.ProfileDir, "profile_images", u.ProfileImageLocalPath) + target_url = u.ProfileImageUrl + } - err := downloader.Curl(target_url, outfile) - if err != nil { - return err - } + err := downloader.Curl(target_url, outfile) + if err != nil { + return err + } - // Skip it if there's no banner image - if u.BannerImageLocalPath != "" { - outfile = path.Join(p.ProfileDir, "profile_images", u.BannerImageLocalPath) - err = downloader.Curl(u.BannerImageUrl, outfile) + // Skip it if there's no banner image + if u.BannerImageLocalPath != "" { + outfile = path.Join(p.ProfileDir, "profile_images", u.BannerImageLocalPath) + err = downloader.Curl(u.BannerImageUrl, outfile) - if err != nil && strings.Contains(err.Error(), "404 Not Found") { - // Try adding "600x200". Not sure why this does this but sometimes it does. - err = downloader.Curl(u.BannerImageUrl + "/600x200", outfile) - } - if err != nil { - return err - } - } + if err != nil && strings.Contains(err.Error(), "404 Not Found") { + // Try adding "600x200". Not sure why this does this but sometimes it does. + err = downloader.Curl(u.BannerImageUrl+"/600x200", outfile) + } + if err != nil { + return err + } + } - u.IsContentDownloaded = true - return p.SaveUser(u) + u.IsContentDownloaded = true + return p.SaveUser(u) } /** @@ -196,16 +193,16 @@ func (p Profile) DownloadUserContentWithInjector(u *scraper.User, downloader Med * If this user should have a big profile picture, defer to the regular `DownloadUserContentFor` method. */ func (p Profile) DownloadUserProfileImageTiny(u *scraper.User) error { - if p.IsFollowing(u.Handle) { - return p.DownloadUserContentFor(u) - } + if p.IsFollowing(u.Handle) { + return p.DownloadUserContentFor(u) + } - d := DefaultDownloader{} + d := DefaultDownloader{} - outfile := path.Join(p.ProfileDir, "profile_images", u.GetTinyProfileImageLocalPath()) - if file_exists(outfile) { - return nil - } - err := d.Curl(u.GetTinyProfileImageUrl(), outfile) - return err + outfile := path.Join(p.ProfileDir, "profile_images", u.GetTinyProfileImageLocalPath()) + if file_exists(outfile) { + return nil + } + err := d.Curl(u.GetTinyProfileImageUrl(), outfile) + return err } diff --git a/persistence/media_download_test.go b/persistence/media_download_test.go index f47ce02..2a9ba2f 100644 --- a/persistence/media_download_test.go +++ b/persistence/media_download_test.go @@ -1,95 +1,96 @@ package persistence_test import ( - "testing" + "testing" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" - "offline_twitter/scraper" + "offline_twitter/scraper" ) -type FakeDownloader struct {} +type FakeDownloader struct{} + func (d FakeDownloader) Curl(url string, outpath string) error { return nil } func test_all_downloaded(tweet scraper.Tweet, yes_or_no bool, t *testing.T) { - error_msg := map[bool]string{ - true: "Expected to be downloaded, but it wasn't", - false: "Expected not to be downloaded, but it was", - }[yes_or_no] + error_msg := map[bool]string{ + true: "Expected to be downloaded, but it wasn't", + false: "Expected not to be downloaded, but it was", + }[yes_or_no] - assert.Len(t, tweet.Images, 2) - assert.Len(t, tweet.Videos, 1) - for _, img := range tweet.Images { - if img.IsDownloaded != yes_or_no { - t.Errorf("%s: ImageID %d", error_msg, img.ID) - } - } - for _, vid := range tweet.Videos { - if vid.IsDownloaded != yes_or_no { - t.Errorf("Expected not to be downloaded, but it was: VideoID %d", vid.ID) - } - } - if tweet.IsContentDownloaded != yes_or_no { - t.Errorf("%s: the tweet", error_msg) - } + assert.Len(t, tweet.Images, 2) + assert.Len(t, tweet.Videos, 1) + for _, img := range tweet.Images { + if img.IsDownloaded != yes_or_no { + t.Errorf("%s: ImageID %d", error_msg, img.ID) + } + } + for _, vid := range tweet.Videos { + if vid.IsDownloaded != yes_or_no { + t.Errorf("Expected not to be downloaded, but it was: VideoID %d", vid.ID) + } + } + if tweet.IsContentDownloaded != yes_or_no { + t.Errorf("%s: the tweet", error_msg) + } } /** * Downloading a Tweet's contents should mark the Tweet as downloaded */ func TestDownloadTweetContent(t *testing.T) { - profile_path := "test_profiles/TestMediaQueries" - profile := create_or_load_profile(profile_path) + profile_path := "test_profiles/TestMediaQueries" + profile := create_or_load_profile(profile_path) - tweet := create_dummy_tweet() + tweet := create_dummy_tweet() - // Persist the tweet - err := profile.SaveTweet(tweet) - require.NoError(t, err) + // Persist the tweet + err := profile.SaveTweet(tweet) + require.NoError(t, err) - // Make sure everything is marked "not downloaded" - test_all_downloaded(tweet, false, t) + // Make sure everything is marked "not downloaded" + test_all_downloaded(tweet, false, t) - // Do the (fake) downloading - err = profile.DownloadTweetContentWithInjector(&tweet, FakeDownloader{}) - require.NoError(t, err) + // Do the (fake) downloading + err = profile.DownloadTweetContentWithInjector(&tweet, FakeDownloader{}) + require.NoError(t, err) - // It should all be marked "yes downloaded" now - test_all_downloaded(tweet, true, t) + // It should all be marked "yes downloaded" now + test_all_downloaded(tweet, true, t) - // Reload the Tweet (check db); should also be "yes downloaded" - new_tweet, err := profile.GetTweetById(tweet.ID) - require.NoError(t, err) - test_all_downloaded(new_tweet, true, t) + // Reload the Tweet (check db); should also be "yes downloaded" + new_tweet, err := profile.GetTweetById(tweet.ID) + require.NoError(t, err) + test_all_downloaded(new_tweet, true, t) } /** * Downloading a User's contents should mark the User as downloaded */ func TestDownloadUserContent(t *testing.T) { - assert := assert.New(t) - profile_path := "test_profiles/TestMediaQueries" - profile := create_or_load_profile(profile_path) + assert := assert.New(t) + profile_path := "test_profiles/TestMediaQueries" + profile := create_or_load_profile(profile_path) - user := create_dummy_user() + user := create_dummy_user() - // Persist the User - err := profile.SaveUser(&user) - require.NoError(t, err) + // Persist the User + err := profile.SaveUser(&user) + require.NoError(t, err) - // Make sure the User is marked "not downloaded" - assert.False(user.IsContentDownloaded) + // Make sure the User is marked "not downloaded" + assert.False(user.IsContentDownloaded) - // Do the (fake) downloading - err = profile.DownloadUserContentWithInjector(&user, FakeDownloader{}) - require.NoError(t, err) + // Do the (fake) downloading + err = profile.DownloadUserContentWithInjector(&user, FakeDownloader{}) + require.NoError(t, err) - // The User should now be marked "yes downloaded" - assert.True(user.IsContentDownloaded) + // The User should now be marked "yes downloaded" + assert.True(user.IsContentDownloaded) - // Reload the User (check db); should also be "yes downloaded" - new_user, err := profile.GetUserByID(user.ID) - require.NoError(t, err) - assert.True(new_user.IsContentDownloaded) + // Reload the User (check db); should also be "yes downloaded" + new_user, err := profile.GetUserByID(user.ID) + require.NoError(t, err) + assert.True(new_user.IsContentDownloaded) } diff --git a/persistence/media_queries.go b/persistence/media_queries.go index 7f15731..2ef0fb1 100644 --- a/persistence/media_queries.go +++ b/persistence/media_queries.go @@ -1,9 +1,9 @@ package persistence import ( - "time" + "time" - "offline_twitter/scraper" + "offline_twitter/scraper" ) /** @@ -13,16 +13,16 @@ import ( * - img: the Image to save */ func (p Profile) SaveImage(img scraper.Image) error { - _, err := p.DB.Exec(` + _, err := p.DB.Exec(` insert into images (id, tweet_id, width, height, remote_url, local_filename, is_downloaded) values (?, ?, ?, ?, ?, ?, ?) on conflict do update set is_downloaded=(is_downloaded or ?) `, - img.ID, img.TweetID, img.Width, img.Height, img.RemoteURL, img.LocalFilename, img.IsDownloaded, - img.IsDownloaded, - ) - return err + img.ID, img.TweetID, img.Width, img.Height, img.RemoteURL, img.LocalFilename, img.IsDownloaded, + img.IsDownloaded, + ) + return err } /** @@ -32,7 +32,7 @@ func (p Profile) SaveImage(img scraper.Image) error { * - img: the Video to save */ func (p Profile) SaveVideo(vid scraper.Video) error { - _, err := p.DB.Exec(` + _, err := p.DB.Exec(` insert into videos (id, tweet_id, width, height, remote_url, local_filename, thumbnail_remote_url, thumbnail_local_filename, duration, view_count, is_downloaded, is_gif) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) @@ -40,38 +40,38 @@ func (p Profile) SaveVideo(vid scraper.Video) error { set is_downloaded=(is_downloaded or ?), view_count=max(view_count, ?) `, - vid.ID, vid.TweetID, vid.Width, vid.Height, vid.RemoteURL, vid.LocalFilename, vid.ThumbnailRemoteUrl, vid.ThumbnailLocalPath, - vid.Duration, vid.ViewCount, vid.IsDownloaded, vid.IsGif, + vid.ID, vid.TweetID, vid.Width, vid.Height, vid.RemoteURL, vid.LocalFilename, vid.ThumbnailRemoteUrl, vid.ThumbnailLocalPath, + vid.Duration, vid.ViewCount, vid.IsDownloaded, vid.IsGif, - vid.IsDownloaded, vid.ViewCount, - ) - return err + vid.IsDownloaded, vid.ViewCount, + ) + return err } /** * Save an Url */ func (p Profile) SaveUrl(url scraper.Url) error { - _, err := p.DB.Exec(` + _, err := p.DB.Exec(` insert into urls (tweet_id, domain, text, short_text, title, description, creator_id, site_id, thumbnail_width, thumbnail_height, thumbnail_remote_url, thumbnail_local_path, has_card, has_thumbnail, is_content_downloaded) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) on conflict do update set is_content_downloaded=(is_content_downloaded or ?) `, - url.TweetID, url.Domain, url.Text, url.ShortText, url.Title, url.Description, url.CreatorID, url.SiteID, url.ThumbnailWidth, - url.ThumbnailHeight, url.ThumbnailRemoteUrl, url.ThumbnailLocalPath, url.HasCard, url.HasThumbnail, url.IsContentDownloaded, + url.TweetID, url.Domain, url.Text, url.ShortText, url.Title, url.Description, url.CreatorID, url.SiteID, url.ThumbnailWidth, + url.ThumbnailHeight, url.ThumbnailRemoteUrl, url.ThumbnailLocalPath, url.HasCard, url.HasThumbnail, url.IsContentDownloaded, - url.IsContentDownloaded, - ) - return err + url.IsContentDownloaded, + ) + return err } /** * Save a Poll */ func (p Profile) SavePoll(poll scraper.Poll) error { - _, err := p.DB.Exec(` + _, err := p.DB.Exec(` insert into polls (id, tweet_id, num_choices, choice1, choice1_votes, choice2, choice2_votes, choice3, choice3_votes, choice4, choice4_votes, voting_duration, voting_ends_at, last_scraped_at) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) @@ -82,137 +82,135 @@ func (p Profile) SavePoll(poll scraper.Poll) error { choice4_votes=?, last_scraped_at=? `, - poll.ID, poll.TweetID, poll.NumChoices, poll.Choice1, poll.Choice1_Votes, poll.Choice2, poll.Choice2_Votes, poll.Choice3, - poll.Choice3_Votes, poll.Choice4, poll.Choice4_Votes, poll.VotingDuration, poll.VotingEndsAt.Unix(), poll.LastUpdatedAt.Unix(), + poll.ID, poll.TweetID, poll.NumChoices, poll.Choice1, poll.Choice1_Votes, poll.Choice2, poll.Choice2_Votes, poll.Choice3, + poll.Choice3_Votes, poll.Choice4, poll.Choice4_Votes, poll.VotingDuration, poll.VotingEndsAt.Unix(), poll.LastUpdatedAt.Unix(), - poll.Choice1_Votes, poll.Choice2_Votes, poll.Choice3_Votes, poll.Choice4_Votes, poll.LastUpdatedAt.Unix(), - ) - return err + poll.Choice1_Votes, poll.Choice2_Votes, poll.Choice3_Votes, poll.Choice4_Votes, poll.LastUpdatedAt.Unix(), + ) + return err } - /** * Get the list of images for a tweet */ func (p Profile) GetImagesForTweet(t scraper.Tweet) (imgs []scraper.Image, err error) { - stmt, err := p.DB.Prepare("select id, width, height, remote_url, local_filename, is_downloaded from images where tweet_id=?") - if err != nil { - return - } - defer stmt.Close() - rows, err := stmt.Query(t.ID) - if err != nil { - return - } - var img scraper.Image + stmt, err := p.DB.Prepare("select id, width, height, remote_url, local_filename, is_downloaded from images where tweet_id=?") + if err != nil { + return + } + defer stmt.Close() + rows, err := stmt.Query(t.ID) + if err != nil { + return + } + var img scraper.Image - for rows.Next() { - err = rows.Scan(&img.ID, &img.Width, &img.Height, &img.RemoteURL, &img.LocalFilename, &img.IsDownloaded) - if err != nil { - return - } - img.TweetID = t.ID - imgs = append(imgs, img) - } - return + for rows.Next() { + err = rows.Scan(&img.ID, &img.Width, &img.Height, &img.RemoteURL, &img.LocalFilename, &img.IsDownloaded) + if err != nil { + return + } + img.TweetID = t.ID + imgs = append(imgs, img) + } + return } - /** * Get the list of videos for a tweet */ func (p Profile) GetVideosForTweet(t scraper.Tweet) (vids []scraper.Video, err error) { - stmt, err := p.DB.Prepare(` + stmt, err := p.DB.Prepare(` select id, width, height, remote_url, local_filename, thumbnail_remote_url, thumbnail_local_filename, duration, view_count, is_downloaded, is_gif from videos where tweet_id = ? `) - if err != nil { - return - } - defer stmt.Close() - rows, err := stmt.Query(t.ID) - if err != nil { - return - } - var vid scraper.Video - for rows.Next() { - err = rows.Scan(&vid.ID, &vid.Width, &vid.Height, &vid.RemoteURL, &vid.LocalFilename, &vid.ThumbnailRemoteUrl, - &vid.ThumbnailLocalPath, &vid.Duration, &vid.ViewCount, &vid.IsDownloaded, &vid.IsGif) - if err != nil { - return - } - vid.TweetID = t.ID - vids = append(vids, vid) - } - return + if err != nil { + return + } + defer stmt.Close() + rows, err := stmt.Query(t.ID) + if err != nil { + return + } + var vid scraper.Video + for rows.Next() { + err = rows.Scan(&vid.ID, &vid.Width, &vid.Height, &vid.RemoteURL, &vid.LocalFilename, &vid.ThumbnailRemoteUrl, + &vid.ThumbnailLocalPath, &vid.Duration, &vid.ViewCount, &vid.IsDownloaded, &vid.IsGif) + if err != nil { + return + } + vid.TweetID = t.ID + vids = append(vids, vid) + } + return } /** * Get the list of Urls for a Tweet */ func (p Profile) GetUrlsForTweet(t scraper.Tweet) (urls []scraper.Url, err error) { - stmt, err := p.DB.Prepare(` + stmt, err := p.DB.Prepare(` select domain, text, short_text, title, description, creator_id, site_id, thumbnail_width, thumbnail_height, thumbnail_remote_url, thumbnail_local_path, has_card, has_thumbnail, is_content_downloaded from urls where tweet_id = ? order by rowid `) - if err != nil { - return - } - defer stmt.Close() - rows, err := stmt.Query(t.ID) - if err != nil { - return - } - var url scraper.Url - for rows.Next() { - err = rows.Scan(&url.Domain, &url.Text, &url.ShortText, &url.Title, &url.Description, &url.CreatorID, &url.SiteID, - &url.ThumbnailWidth, &url.ThumbnailHeight, &url.ThumbnailRemoteUrl, &url.ThumbnailLocalPath, &url.HasCard, - &url.HasThumbnail, &url.IsContentDownloaded) - if err != nil { - return - } - url.TweetID = t.ID - urls = append(urls, url) - } - return + if err != nil { + return + } + defer stmt.Close() + rows, err := stmt.Query(t.ID) + if err != nil { + return + } + var url scraper.Url + for rows.Next() { + err = rows.Scan(&url.Domain, &url.Text, &url.ShortText, &url.Title, &url.Description, &url.CreatorID, &url.SiteID, + &url.ThumbnailWidth, &url.ThumbnailHeight, &url.ThumbnailRemoteUrl, &url.ThumbnailLocalPath, &url.HasCard, + &url.HasThumbnail, &url.IsContentDownloaded) + if err != nil { + return + } + url.TweetID = t.ID + urls = append(urls, url) + } + return } /** * Get the list of Polls for a Tweet */ func (p Profile) GetPollsForTweet(t scraper.Tweet) (polls []scraper.Poll, err error) { - stmt, err := p.DB.Prepare(` + stmt, err := p.DB.Prepare(` select id, num_choices, choice1, choice1_votes, choice2, choice2_votes, choice3, choice3_votes, choice4, choice4_votes, voting_duration, voting_ends_at, last_scraped_at from polls where tweet_id = ? `) - if err != nil { - return - } - defer stmt.Close() - rows, err := stmt.Query(t.ID) - if err != nil { - return - } - var poll scraper.Poll - var voting_ends_at int - var last_scraped_at int - for rows.Next() { - err = rows.Scan(&poll.ID, &poll.NumChoices, &poll.Choice1, &poll.Choice1_Votes, &poll.Choice2, &poll.Choice2_Votes, &poll.Choice3, - &poll.Choice3_Votes, &poll.Choice4, &poll.Choice4_Votes, &poll.VotingDuration, &voting_ends_at, &last_scraped_at) - if err != nil { - return - } - poll.TweetID = t.ID - poll.VotingEndsAt = time.Unix(int64(voting_ends_at), 0) - poll.LastUpdatedAt = time.Unix(int64(last_scraped_at), 0) - polls = append(polls, poll) - } - return + if err != nil { + return + } + defer stmt.Close() + rows, err := stmt.Query(t.ID) + if err != nil { + return + } + var poll scraper.Poll + var voting_ends_at int + var last_scraped_at int + for rows.Next() { + err = rows.Scan(&poll.ID, &poll.NumChoices, &poll.Choice1, &poll.Choice1_Votes, &poll.Choice2, &poll.Choice2_Votes, &poll.Choice3, + &poll.Choice3_Votes, &poll.Choice4, &poll.Choice4_Votes, &poll.VotingDuration, &voting_ends_at, &last_scraped_at) + if err != nil { + return + } + poll.TweetID = t.ID + poll.VotingEndsAt = time.Unix(int64(voting_ends_at), 0) + poll.LastUpdatedAt = time.Unix(int64(last_scraped_at), 0) + polls = append(polls, poll) + } + return } diff --git a/persistence/media_queries_test.go b/persistence/media_queries_test.go index 28fc912..3cfba4a 100644 --- a/persistence/media_queries_test.go +++ b/persistence/media_queries_test.go @@ -2,281 +2,278 @@ package persistence_test import ( "testing" - "math/rand" - "time" - "github.com/go-test/deep" - "github.com/stretchr/testify/require" + "math/rand" + "time" - "offline_twitter/scraper" + "github.com/go-test/deep" + "github.com/stretchr/testify/require" + + "offline_twitter/scraper" ) - /** * Create an Image, save it, reload it, and make sure it comes back the same */ func TestSaveAndLoadImage(t *testing.T) { - require := require.New(t) - profile_path := "test_profiles/TestMediaQueries" - profile := create_or_load_profile(profile_path) + require := require.New(t) + profile_path := "test_profiles/TestMediaQueries" + profile := create_or_load_profile(profile_path) - tweet := create_stable_tweet() + tweet := create_stable_tweet() - // Create a fresh Image to test on - rand.Seed(time.Now().UnixNano()) - img := create_image_from_id(rand.Int()) - img.TweetID = tweet.ID + // Create a fresh Image to test on + rand.Seed(time.Now().UnixNano()) + img := create_image_from_id(rand.Int()) + img.TweetID = tweet.ID - // Save the Image - err := profile.SaveImage(img) - require.NoError(err) + // Save the Image + err := profile.SaveImage(img) + require.NoError(err) - // Reload the Image - imgs, err := profile.GetImagesForTweet(tweet) - require.NoError(err) + // Reload the Image + imgs, err := profile.GetImagesForTweet(tweet) + require.NoError(err) - var new_img scraper.Image - for index := range imgs { - if imgs[index].ID == img.ID { - new_img = imgs[index] - } - } - require.Equal(img.ID, new_img.ID, "Could not find image for some reason") - if diff := deep.Equal(img, new_img); diff != nil { - t.Error(diff) - } + var new_img scraper.Image + for index := range imgs { + if imgs[index].ID == img.ID { + new_img = imgs[index] + } + } + require.Equal(img.ID, new_img.ID, "Could not find image for some reason") + if diff := deep.Equal(img, new_img); diff != nil { + t.Error(diff) + } } /** * Change an Image, save the changes, reload it, and check if it comes back the same */ func TestModifyImage(t *testing.T) { - require := require.New(t) - profile_path := "test_profiles/TestMediaQueries" - profile := create_or_load_profile(profile_path) + require := require.New(t) + profile_path := "test_profiles/TestMediaQueries" + profile := create_or_load_profile(profile_path) - tweet := create_stable_tweet() - img := tweet.Images[0] + tweet := create_stable_tweet() + img := tweet.Images[0] - require.Equal(scraper.ImageID(-1), img.ID, "Got the wrong image back") + require.Equal(scraper.ImageID(-1), img.ID, "Got the wrong image back") - img.IsDownloaded = true + img.IsDownloaded = true - // Save the changes - err := profile.SaveImage(img) - require.NoError(err) + // Save the changes + err := profile.SaveImage(img) + require.NoError(err) - // Reload it - imgs, err := profile.GetImagesForTweet(tweet) - require.NoError(err) + // Reload it + imgs, err := profile.GetImagesForTweet(tweet) + require.NoError(err) - new_img := imgs[0] - require.Equal(imgs[0], new_img, "Got the wrong image back") + new_img := imgs[0] + require.Equal(imgs[0], new_img, "Got the wrong image back") - if diff := deep.Equal(img, new_img); diff != nil { - t.Error(diff) - } + if diff := deep.Equal(img, new_img); diff != nil { + t.Error(diff) + } } - /** * Create an Video, save it, reload it, and make sure it comes back the same */ func TestSaveAndLoadVideo(t *testing.T) { - require := require.New(t) - profile_path := "test_profiles/TestMediaQueries" - profile := create_or_load_profile(profile_path) + require := require.New(t) + profile_path := "test_profiles/TestMediaQueries" + profile := create_or_load_profile(profile_path) - tweet := create_stable_tweet() + tweet := create_stable_tweet() - // Create a fresh Video to test on - rand.Seed(time.Now().UnixNano()) - vid := create_video_from_id(rand.Int()) - vid.TweetID = tweet.ID - vid.IsGif = true; + // Create a fresh Video to test on + rand.Seed(time.Now().UnixNano()) + vid := create_video_from_id(rand.Int()) + vid.TweetID = tweet.ID + vid.IsGif = true - // Save the Video - err := profile.SaveVideo(vid) - require.NoError(err) + // Save the Video + err := profile.SaveVideo(vid) + require.NoError(err) - // Reload the Video - vids, err := profile.GetVideosForTweet(tweet) - require.NoError(err) + // Reload the Video + vids, err := profile.GetVideosForTweet(tweet) + require.NoError(err) - var new_vid scraper.Video - for index := range vids { - if vids[index].ID == vid.ID { - new_vid = vids[index] - } - } - require.Equal(vid.ID, new_vid.ID, "Could not find video for some reason") + var new_vid scraper.Video + for index := range vids { + if vids[index].ID == vid.ID { + new_vid = vids[index] + } + } + require.Equal(vid.ID, new_vid.ID, "Could not find video for some reason") - if diff := deep.Equal(vid, new_vid); diff != nil { - t.Error(diff) - } + if diff := deep.Equal(vid, new_vid); diff != nil { + t.Error(diff) + } } /** * Change an Video, save the changes, reload it, and check if it comes back the same */ func TestModifyVideo(t *testing.T) { - require := require.New(t) - profile_path := "test_profiles/TestMediaQueries" - profile := create_or_load_profile(profile_path) + require := require.New(t) + profile_path := "test_profiles/TestMediaQueries" + profile := create_or_load_profile(profile_path) - tweet := create_stable_tweet() - vid := tweet.Videos[0] - require.Equal(scraper.VideoID(-1), vid.ID, "Got the wrong video back") + tweet := create_stable_tweet() + vid := tweet.Videos[0] + require.Equal(scraper.VideoID(-1), vid.ID, "Got the wrong video back") - vid.IsDownloaded = true - vid.ViewCount = 23000 + vid.IsDownloaded = true + vid.ViewCount = 23000 - // Save the changes - err := profile.SaveVideo(vid) - require.NoError(err) + // Save the changes + err := profile.SaveVideo(vid) + require.NoError(err) - // Reload it - vids, err := profile.GetVideosForTweet(tweet) - require.NoError(err) + // Reload it + vids, err := profile.GetVideosForTweet(tweet) + require.NoError(err) - new_vid := vids[0] - require.Equal(vid.ID, new_vid.ID, "Got the wrong video back") + new_vid := vids[0] + require.Equal(vid.ID, new_vid.ID, "Got the wrong video back") - if diff := deep.Equal(vid, new_vid); diff != nil { - t.Error(diff) - } + if diff := deep.Equal(vid, new_vid); diff != nil { + t.Error(diff) + } } - /** * Create an Url, save it, reload it, and make sure it comes back the same */ func TestSaveAndLoadUrl(t *testing.T) { - require := require.New(t) - profile_path := "test_profiles/TestMediaQueries" - profile := create_or_load_profile(profile_path) + require := require.New(t) + profile_path := "test_profiles/TestMediaQueries" + profile := create_or_load_profile(profile_path) - tweet := create_stable_tweet() + tweet := create_stable_tweet() - // Create a fresh Url to test on - rand.Seed(time.Now().UnixNano()) - url := create_url_from_id(rand.Int()) - url.TweetID = tweet.ID + // Create a fresh Url to test on + rand.Seed(time.Now().UnixNano()) + url := create_url_from_id(rand.Int()) + url.TweetID = tweet.ID - // Save the Url - err := profile.SaveUrl(url) - require.NoError(err) + // Save the Url + err := profile.SaveUrl(url) + require.NoError(err) - // Reload the Url - urls, err := profile.GetUrlsForTweet(tweet) - require.NoError(err) + // Reload the Url + urls, err := profile.GetUrlsForTweet(tweet) + require.NoError(err) - var new_url scraper.Url - for index := range urls { - if urls[index].Text == url.Text { - new_url = urls[index] - } - } - require.Equal(url.Text, new_url.Text, "Could not find the url for some reason") + var new_url scraper.Url + for index := range urls { + if urls[index].Text == url.Text { + new_url = urls[index] + } + } + require.Equal(url.Text, new_url.Text, "Could not find the url for some reason") - if diff := deep.Equal(url, new_url); diff != nil { - t.Error(diff) - } + if diff := deep.Equal(url, new_url); diff != nil { + t.Error(diff) + } } /** * Change an Url, save the changes, reload it, and check if it comes back the same */ func TestModifyUrl(t *testing.T) { - require := require.New(t) - profile_path := "test_profiles/TestMediaQueries" - profile := create_or_load_profile(profile_path) + require := require.New(t) + profile_path := "test_profiles/TestMediaQueries" + profile := create_or_load_profile(profile_path) - tweet := create_stable_tweet() - url := tweet.Urls[0] + tweet := create_stable_tweet() + url := tweet.Urls[0] - require.Equal("-1text", url.Text, "Got the wrong url back") + require.Equal("-1text", url.Text, "Got the wrong url back") - url.IsContentDownloaded = true + url.IsContentDownloaded = true - // Save the changes - err := profile.SaveUrl(url) - require.NoError(err) + // Save the changes + err := profile.SaveUrl(url) + require.NoError(err) - // Reload it - urls, err := profile.GetUrlsForTweet(tweet) - require.NoError(err) + // Reload it + urls, err := profile.GetUrlsForTweet(tweet) + require.NoError(err) - new_url := urls[0] - require.Equal("-1text", url.Text, "Got the wrong url back") + new_url := urls[0] + require.Equal("-1text", url.Text, "Got the wrong url back") - if diff := deep.Equal(url, new_url); diff != nil { - t.Error(diff) - } + if diff := deep.Equal(url, new_url); diff != nil { + t.Error(diff) + } } - /** * Create a Poll, save it, reload it, and make sure it comes back the same */ func TestSaveAndLoadPoll(t *testing.T) { - require := require.New(t) - profile_path := "test_profiles/TestMediaQueries" - profile := create_or_load_profile(profile_path) + require := require.New(t) + profile_path := "test_profiles/TestMediaQueries" + profile := create_or_load_profile(profile_path) - tweet := create_stable_tweet() + tweet := create_stable_tweet() - poll := create_poll_from_id(rand.Int()) - poll.TweetID = tweet.ID + poll := create_poll_from_id(rand.Int()) + poll.TweetID = tweet.ID - // Save the Poll - err := profile.SavePoll(poll) - require.NoError(err) + // Save the Poll + err := profile.SavePoll(poll) + require.NoError(err) - // Reload the Poll - polls, err := profile.GetPollsForTweet(tweet) - require.NoError(err) + // Reload the Poll + polls, err := profile.GetPollsForTweet(tweet) + require.NoError(err) - var new_poll scraper.Poll - for index := range polls { - if polls[index].ID == poll.ID { - new_poll = polls[index] - } - } - require.Equal(poll.ID, new_poll.ID, "Could not find poll for some reason") + var new_poll scraper.Poll + for index := range polls { + if polls[index].ID == poll.ID { + new_poll = polls[index] + } + } + require.Equal(poll.ID, new_poll.ID, "Could not find poll for some reason") - if diff := deep.Equal(poll, new_poll); diff != nil { - t.Error(diff) - } + if diff := deep.Equal(poll, new_poll); diff != nil { + t.Error(diff) + } } /** * Change an Poll, save the changes, reload it, and check if it comes back the same */ func TestModifyPoll(t *testing.T) { - require := require.New(t) - profile_path := "test_profiles/TestMediaQueries" - profile := create_or_load_profile(profile_path) + require := require.New(t) + profile_path := "test_profiles/TestMediaQueries" + profile := create_or_load_profile(profile_path) - tweet := create_stable_tweet() - poll := tweet.Polls[0] + tweet := create_stable_tweet() + poll := tweet.Polls[0] - require.Equal("-1", poll.Choice1, "Got the wrong Poll back") + require.Equal("-1", poll.Choice1, "Got the wrong Poll back") - poll.Choice1_Votes = 1200 // Increment it by 200 votes + poll.Choice1_Votes = 1200 // Increment it by 200 votes - // Save the changes - err := profile.SavePoll(poll) - require.NoError(err) + // Save the changes + err := profile.SavePoll(poll) + require.NoError(err) - // Reload it - polls, err := profile.GetPollsForTweet(tweet) - require.NoError(err) + // Reload it + polls, err := profile.GetPollsForTweet(tweet) + require.NoError(err) - new_poll := polls[0] - require.Equal("-1", new_poll.Choice1, "Got the wrong poll back") + new_poll := polls[0] + require.Equal("-1", new_poll.Choice1, "Got the wrong poll back") - if diff := deep.Equal(poll, new_poll); diff != nil { - t.Error(diff) - } + if diff := deep.Equal(poll, new_poll); diff != nil { + t.Error(diff) + } } diff --git a/persistence/profile.go b/persistence/profile.go index aa8bf48..1650de2 100644 --- a/persistence/profile.go +++ b/persistence/profile.go @@ -13,12 +13,12 @@ import ( //go:embed schema.sql var sql_init string -type Settings struct {} +type Settings struct{} type Profile struct { ProfileDir string - Settings Settings - DB *sql.DB + Settings Settings + DB *sql.DB } /** @@ -27,11 +27,11 @@ type Profile struct { type ErrTargetAlreadyExists struct { target string } + func (err ErrTargetAlreadyExists) Error() string { return fmt.Sprintf("Target already exists: %s", err.target) } - /** * Create a new profile in the given location. * Fails if target location already exists (i.e., is a file or directory). @@ -124,7 +124,6 @@ func NewProfile(target_dir string) (Profile, error) { return Profile{target_dir, settings, db}, nil } - /** * Loads the profile at the given location. Fails if the given directory is not a Profile. * @@ -139,9 +138,9 @@ func LoadProfile(profile_dir string) (Profile, error) { sqlite_file := path.Join(profile_dir, "twitter.db") for _, file := range []string{ - settings_file, - sqlite_file, - } { + settings_file, + sqlite_file, + } { if !file_exists(file) { return Profile{}, fmt.Errorf("Invalid profile, could not find file: %s", file) } @@ -157,15 +156,15 @@ func LoadProfile(profile_dir string) (Profile, error) { return Profile{}, err } - db, err := sql.Open("sqlite3", sqlite_file + "?_foreign_keys=on&_journal_mode=WAL") + db, err := sql.Open("sqlite3", sqlite_file+"?_foreign_keys=on&_journal_mode=WAL") if err != nil { return Profile{}, err } ret := Profile{ ProfileDir: profile_dir, - Settings: settings, - DB: db, + Settings: settings, + DB: db, } err = ret.check_and_update_version() diff --git a/persistence/profile_test.go b/persistence/profile_test.go index 727d919..d8aab7d 100644 --- a/persistence/profile_test.go +++ b/persistence/profile_test.go @@ -2,8 +2,9 @@ package persistence_test import ( "testing" - "os" + "errors" + "os" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -23,7 +24,6 @@ func file_exists(path string) bool { } } - /** * Should refuse to create a Profile if the target already exists (i.e., is a file or directory). */ @@ -44,7 +44,6 @@ func TestNewProfileInvalidPath(t *testing.T) { assert.True(t, is_right_type, "Expected 'ErrTargetAlreadyExists' error, got %T instead", err) } - /** * Should correctly create a new Profile */ @@ -61,7 +60,7 @@ func TestNewProfile(t *testing.T) { profile, err := persistence.NewProfile(profile_path) require.NoError(err) - assert.Equal(profile_path,profile.ProfileDir) + assert.Equal(profile_path, profile.ProfileDir) // Check files were created contents, err := os.ReadDir(profile_path) @@ -70,8 +69,8 @@ func TestNewProfile(t *testing.T) { expected_files := []struct { filename string - isDir bool - } { + isDir bool + }{ {"images", true}, {"link_preview_images", true}, {"profile_images", true}, @@ -92,7 +91,6 @@ func TestNewProfile(t *testing.T) { assert.Equal(persistence.ENGINE_DATABASE_VERSION, version) } - /** * Should correctly load the Profile */ diff --git a/persistence/retweet_queries.go b/persistence/retweet_queries.go index 1799102..b849631 100644 --- a/persistence/retweet_queries.go +++ b/persistence/retweet_queries.go @@ -20,7 +20,6 @@ func (p Profile) SaveRetweet(r scraper.Retweet) error { return err } - /** * Retrieve a Retweet by ID */ diff --git a/persistence/retweet_queries_test.go b/persistence/retweet_queries_test.go index 9b3bfb3..236dc97 100644 --- a/persistence/retweet_queries_test.go +++ b/persistence/retweet_queries_test.go @@ -8,7 +8,6 @@ import ( "github.com/go-test/deep" ) - func TestSaveAndLoadRetweet(t *testing.T) { require := require.New(t) diff --git a/persistence/tweet_queries.go b/persistence/tweet_queries.go index d558a33..afdd721 100644 --- a/persistence/tweet_queries.go +++ b/persistence/tweet_queries.go @@ -1,21 +1,21 @@ package persistence import ( - "time" - "strings" - "database/sql" + "database/sql" + "strings" + "time" - "offline_twitter/scraper" + "offline_twitter/scraper" ) func (p Profile) SaveTweet(t scraper.Tweet) error { - db := p.DB + db := p.DB - tx, err := db.Begin() - if err != nil { - return err - } - _, err = db.Exec(` + tx, err := db.Begin() + if err != nil { + return err + } + _, err = db.Exec(` insert into tweets (id, user_id, text, posted_at, num_likes, num_retweets, num_replies, num_quote_tweets, in_reply_to_id, quoted_tweet_id, mentions, reply_mentions, hashtags, tombstone_type, is_stub, is_content_downloaded, is_conversation_scraped, last_scraped_at) @@ -30,74 +30,74 @@ func (p Profile) SaveTweet(t scraper.Tweet) error { is_conversation_scraped=(is_conversation_scraped or ?), last_scraped_at=max(last_scraped_at, ?) `, - t.ID, t.UserID, t.Text, t.PostedAt.Unix(), t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.InReplyToID, - t.QuotedTweetID, scraper.JoinArrayOfHandles(t.Mentions), scraper.JoinArrayOfHandles(t.ReplyMentions), - strings.Join(t.Hashtags, ","), t.TombstoneType, t.IsStub, t.IsContentDownloaded, t.IsConversationScraped, t.LastScrapedAt.Unix(), + t.ID, t.UserID, t.Text, t.PostedAt.Unix(), t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.InReplyToID, + t.QuotedTweetID, scraper.JoinArrayOfHandles(t.Mentions), scraper.JoinArrayOfHandles(t.ReplyMentions), + strings.Join(t.Hashtags, ","), t.TombstoneType, t.IsStub, t.IsContentDownloaded, t.IsConversationScraped, t.LastScrapedAt.Unix(), - t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.IsStub, t.IsContentDownloaded, t.IsConversationScraped, - t.LastScrapedAt.Unix(), - ) + t.NumLikes, t.NumRetweets, t.NumReplies, t.NumQuoteTweets, t.IsStub, t.IsContentDownloaded, t.IsConversationScraped, + t.LastScrapedAt.Unix(), + ) - if err != nil { - return err - } - for _, url := range t.Urls { - err := p.SaveUrl(url) - if err != nil { - return err - } - } - for _, image := range t.Images { - err := p.SaveImage(image) - if err != nil { - return err - } - } - for _, video := range t.Videos { - err := p.SaveVideo(video) - if err != nil { - return err - } - } - for _, hashtag := range t.Hashtags { - _, err := db.Exec("insert into hashtags (tweet_id, text) values (?, ?) on conflict do nothing", t.ID, hashtag) - if err != nil { - return err - } - } - for _, poll := range t.Polls { - err := p.SavePoll(poll) - if err != nil { - return err - } - } + if err != nil { + return err + } + for _, url := range t.Urls { + err := p.SaveUrl(url) + if err != nil { + return err + } + } + for _, image := range t.Images { + err := p.SaveImage(image) + if err != nil { + return err + } + } + for _, video := range t.Videos { + err := p.SaveVideo(video) + if err != nil { + return err + } + } + for _, hashtag := range t.Hashtags { + _, err := db.Exec("insert into hashtags (tweet_id, text) values (?, ?) on conflict do nothing", t.ID, hashtag) + if err != nil { + return err + } + } + for _, poll := range t.Polls { + err := p.SavePoll(poll) + if err != nil { + return err + } + } - err = tx.Commit() - if err != nil { - return err - } - return nil + err = tx.Commit() + if err != nil { + return err + } + return nil } func (p Profile) IsTweetInDatabase(id scraper.TweetID) bool { - db := p.DB + db := p.DB - var dummy string - err := db.QueryRow("select 1 from tweets where id = ?", id).Scan(&dummy) - if err != nil { - if err != sql.ErrNoRows { - // A real error - panic(err) - } - return false - } - return true + var dummy string + err := db.QueryRow("select 1 from tweets where id = ?", id).Scan(&dummy) + if err != nil { + if err != sql.ErrNoRows { + // A real error + panic(err) + } + return false + } + return true } func (p Profile) GetTweetById(id scraper.TweetID) (scraper.Tweet, error) { - db := p.DB + db := p.DB - stmt, err := db.Prepare(` + stmt, err := db.Prepare(` select id, user_id, text, posted_at, num_likes, num_retweets, num_replies, num_quote_tweets, in_reply_to_id, quoted_tweet_id, mentions, reply_mentions, hashtags, ifnull(tombstone_types.short_name, ""), is_stub, is_content_downloaded, is_conversation_scraped, last_scraped_at @@ -105,104 +105,103 @@ func (p Profile) GetTweetById(id scraper.TweetID) (scraper.Tweet, error) { where id = ? `) - if err != nil { - return scraper.Tweet{}, err - } - defer stmt.Close() + if err != nil { + return scraper.Tweet{}, err + } + defer stmt.Close() - var t scraper.Tweet - var postedAt int - var last_scraped_at int - var mentions string - var reply_mentions string - var hashtags string + var t scraper.Tweet + var postedAt int + var last_scraped_at int + var mentions string + var reply_mentions string + var hashtags string - row := stmt.QueryRow(id) - err = row.Scan(&t.ID, &t.UserID, &t.Text, &postedAt, &t.NumLikes, &t.NumRetweets, &t.NumReplies, &t.NumQuoteTweets, &t.InReplyToID, - &t.QuotedTweetID, &mentions, &reply_mentions, &hashtags, &t.TombstoneType, &t.IsStub, &t.IsContentDownloaded, - &t.IsConversationScraped, &last_scraped_at) - if err != nil { - return t, err - } + row := stmt.QueryRow(id) + err = row.Scan(&t.ID, &t.UserID, &t.Text, &postedAt, &t.NumLikes, &t.NumRetweets, &t.NumReplies, &t.NumQuoteTweets, &t.InReplyToID, + &t.QuotedTweetID, &mentions, &reply_mentions, &hashtags, &t.TombstoneType, &t.IsStub, &t.IsContentDownloaded, + &t.IsConversationScraped, &last_scraped_at) + if err != nil { + return t, err + } - t.PostedAt = time.Unix(int64(postedAt), 0) // args are `seconds` and `nanoseconds` - t.LastScrapedAt = time.Unix(int64(last_scraped_at), 0) + t.PostedAt = time.Unix(int64(postedAt), 0) // args are `seconds` and `nanoseconds` + t.LastScrapedAt = time.Unix(int64(last_scraped_at), 0) - t.Mentions = []scraper.UserHandle{} - for _, m := range strings.Split(mentions, ",") { - if m != "" { - t.Mentions = append(t.Mentions, scraper.UserHandle(m)) - } - } - t.ReplyMentions = []scraper.UserHandle{} - for _, m := range strings.Split(reply_mentions, ",") { - if m != "" { - t.ReplyMentions = append(t.ReplyMentions, scraper.UserHandle(m)) - } - } - t.Hashtags = []string{} - for _, h := range strings.Split(hashtags, ",") { - if h != "" { - t.Hashtags = append(t.Hashtags, h) - } - } + t.Mentions = []scraper.UserHandle{} + for _, m := range strings.Split(mentions, ",") { + if m != "" { + t.Mentions = append(t.Mentions, scraper.UserHandle(m)) + } + } + t.ReplyMentions = []scraper.UserHandle{} + for _, m := range strings.Split(reply_mentions, ",") { + if m != "" { + t.ReplyMentions = append(t.ReplyMentions, scraper.UserHandle(m)) + } + } + t.Hashtags = []string{} + for _, h := range strings.Split(hashtags, ",") { + if h != "" { + t.Hashtags = append(t.Hashtags, h) + } + } - imgs, err := p.GetImagesForTweet(t) - if err != nil { - return t, err - } - t.Images = imgs + imgs, err := p.GetImagesForTweet(t) + if err != nil { + return t, err + } + t.Images = imgs - vids, err := p.GetVideosForTweet(t) - if err != nil { - return t, err - } - t.Videos = vids + vids, err := p.GetVideosForTweet(t) + if err != nil { + return t, err + } + t.Videos = vids - polls, err := p.GetPollsForTweet(t) - if err != nil { - return t, err - } - t.Polls = polls + polls, err := p.GetPollsForTweet(t) + if err != nil { + return t, err + } + t.Polls = polls - urls, err := p.GetUrlsForTweet(t) - t.Urls = urls + urls, err := p.GetUrlsForTweet(t) + t.Urls = urls - return t, err + return t, err } - /** * Populate the `User` field on a tweet with an actual User */ func (p Profile) LoadUserFor(t *scraper.Tweet) error { - if t.User != nil { - // Already there, no need to load it - return nil - } + if t.User != nil { + // Already there, no need to load it + return nil + } - user, err := p.GetUserByID(t.UserID) - if err != nil { - return err - } - t.User = &user - return nil + user, err := p.GetUserByID(t.UserID) + if err != nil { + return err + } + t.User = &user + return nil } /** * Return `false` if the tweet is in the DB and has had its content downloaded, `false` otherwise */ func (p Profile) CheckTweetContentDownloadNeeded(tweet scraper.Tweet) bool { - row := p.DB.QueryRow(`select is_content_downloaded from tweets where id = ?`, tweet.ID) + row := p.DB.QueryRow(`select is_content_downloaded from tweets where id = ?`, tweet.ID) - var is_content_downloaded bool - err := row.Scan(&is_content_downloaded) - if err != nil { - if err == sql.ErrNoRows { - return true - } else { - panic(err) - } - } - return !is_content_downloaded + var is_content_downloaded bool + err := row.Scan(&is_content_downloaded) + if err != nil { + if err == sql.ErrNoRows { + return true + } else { + panic(err) + } + } + return !is_content_downloaded } diff --git a/persistence/tweet_queries_test.go b/persistence/tweet_queries_test.go index acc2f29..82e1873 100644 --- a/persistence/tweet_queries_test.go +++ b/persistence/tweet_queries_test.go @@ -1,59 +1,58 @@ package persistence_test import ( - "testing" - "time" + "testing" + "time" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" - "github.com/go-test/deep" + "github.com/go-test/deep" ) - /** * Create a Tweet, save it, reload it, and make sure it comes back the same */ func TestSaveAndLoadTweet(t *testing.T) { - profile_path := "test_profiles/TestTweetQueries" - profile := create_or_load_profile(profile_path) + profile_path := "test_profiles/TestTweetQueries" + profile := create_or_load_profile(profile_path) - tweet := create_dummy_tweet() - tweet.IsContentDownloaded = true + tweet := create_dummy_tweet() + tweet.IsContentDownloaded = true - // Save the tweet - err := profile.SaveTweet(tweet) - require.NoError(t, err) + // Save the tweet + err := profile.SaveTweet(tweet) + require.NoError(t, err) - // Reload the tweet - new_tweet, err := profile.GetTweetById(tweet.ID) - require.NoError(t, err) + // Reload the tweet + new_tweet, err := profile.GetTweetById(tweet.ID) + require.NoError(t, err) - if diff := deep.Equal(tweet, new_tweet); diff != nil { - t.Error(diff) - } + if diff := deep.Equal(tweet, new_tweet); diff != nil { + t.Error(diff) + } } /** * Same as above, but with a tombstone */ func TestSaveAndLoadTombstone(t *testing.T) { - profile_path := "test_profiles/TestTweetQueries" - profile := create_or_load_profile(profile_path) + profile_path := "test_profiles/TestTweetQueries" + profile := create_or_load_profile(profile_path) - tweet := create_dummy_tombstone() + tweet := create_dummy_tombstone() - // Save the tweet - err := profile.SaveTweet(tweet) - require.NoError(t, err) + // Save the tweet + err := profile.SaveTweet(tweet) + require.NoError(t, err) - // Reload the tweet - new_tweet, err := profile.GetTweetById(tweet.ID) - require.NoError(t, err) + // Reload the tweet + new_tweet, err := profile.GetTweetById(tweet.ID) + require.NoError(t, err) - if diff := deep.Equal(tweet, new_tweet); diff != nil { - t.Error(diff) - } + if diff := deep.Equal(tweet, new_tweet); diff != nil { + t.Error(diff) + } } /** @@ -65,152 +64,152 @@ func TestSaveAndLoadTombstone(t *testing.T) { * - is_content_downloaded should only go from "no" to "yes" */ func TestNoWorseningTweet(t *testing.T) { - assert := assert.New(t) - require := require.New(t) + assert := assert.New(t) + require := require.New(t) - profile_path := "test_profiles/TestTweetQueries" - profile := create_or_load_profile(profile_path) + profile_path := "test_profiles/TestTweetQueries" + profile := create_or_load_profile(profile_path) - tweet := create_dummy_tweet() - tweet.IsContentDownloaded = true - tweet.IsStub = false - tweet.IsConversationScraped = true - tweet.LastScrapedAt = time.Unix(1000, 0) + tweet := create_dummy_tweet() + tweet.IsContentDownloaded = true + tweet.IsStub = false + tweet.IsConversationScraped = true + tweet.LastScrapedAt = time.Unix(1000, 0) - // Save the tweet - err := profile.SaveTweet(tweet) - require.NoError(err) + // Save the tweet + err := profile.SaveTweet(tweet) + require.NoError(err) - // Worsen the tweet and re-save it - tweet.IsContentDownloaded = false - tweet.IsStub = true - tweet.IsConversationScraped = false - tweet.LastScrapedAt = time.Unix(500, 0) - err = profile.SaveTweet(tweet) - require.NoError(err) + // Worsen the tweet and re-save it + tweet.IsContentDownloaded = false + tweet.IsStub = true + tweet.IsConversationScraped = false + tweet.LastScrapedAt = time.Unix(500, 0) + err = profile.SaveTweet(tweet) + require.NoError(err) - // Reload the tweet - new_tweet, err := profile.GetTweetById(tweet.ID) - require.NoError(err) + // Reload the tweet + new_tweet, err := profile.GetTweetById(tweet.ID) + require.NoError(err) - assert.False(new_tweet.IsStub, "Should have preserved non-stub status") - assert.True(new_tweet.IsContentDownloaded, "Should have preserved is-content-downloaded status") - assert.True(new_tweet.IsConversationScraped, "Should have preserved is-conversation-scraped status") - assert.Equal(int64(1000), new_tweet.LastScrapedAt.Unix(), "Should have preserved last-scraped-at time") + assert.False(new_tweet.IsStub, "Should have preserved non-stub status") + assert.True(new_tweet.IsContentDownloaded, "Should have preserved is-content-downloaded status") + assert.True(new_tweet.IsConversationScraped, "Should have preserved is-conversation-scraped status") + assert.Equal(int64(1000), new_tweet.LastScrapedAt.Unix(), "Should have preserved last-scraped-at time") } func TestModifyTweet(t *testing.T) { - assert := assert.New(t) - require := require.New(t) + assert := assert.New(t) + require := require.New(t) - profile_path := "test_profiles/TestTweetQueries" - profile := create_or_load_profile(profile_path) + profile_path := "test_profiles/TestTweetQueries" + profile := create_or_load_profile(profile_path) - tweet := create_dummy_tweet() - tweet.NumLikes = 1000 - tweet.NumRetweets = 2000 - tweet.NumReplies = 3000 - tweet.NumQuoteTweets = 4000 - tweet.IsStub = true - tweet.IsContentDownloaded = false - tweet.IsConversationScraped = false - tweet.LastScrapedAt = time.Unix(1000, 0) + tweet := create_dummy_tweet() + tweet.NumLikes = 1000 + tweet.NumRetweets = 2000 + tweet.NumReplies = 3000 + tweet.NumQuoteTweets = 4000 + tweet.IsStub = true + tweet.IsContentDownloaded = false + tweet.IsConversationScraped = false + tweet.LastScrapedAt = time.Unix(1000, 0) - err := profile.SaveTweet(tweet) - require.NoError(err) + err := profile.SaveTweet(tweet) + require.NoError(err) - tweet.NumLikes = 1500 - tweet.NumRetweets = 2500 - tweet.NumReplies = 3500 - tweet.NumQuoteTweets = 4500 - tweet.IsStub = false - tweet.IsContentDownloaded = true - tweet.IsConversationScraped = true - tweet.LastScrapedAt = time.Unix(2000, 0) + tweet.NumLikes = 1500 + tweet.NumRetweets = 2500 + tweet.NumReplies = 3500 + tweet.NumQuoteTweets = 4500 + tweet.IsStub = false + tweet.IsContentDownloaded = true + tweet.IsConversationScraped = true + tweet.LastScrapedAt = time.Unix(2000, 0) - err = profile.SaveTweet(tweet) - require.NoError(err) + err = profile.SaveTweet(tweet) + require.NoError(err) - // Reload the tweet - new_tweet, err := profile.GetTweetById(tweet.ID) - require.NoError(err) + // Reload the tweet + new_tweet, err := profile.GetTweetById(tweet.ID) + require.NoError(err) - assert.Equal(1500, new_tweet.NumLikes) - assert.Equal(2500, new_tweet.NumRetweets) - assert.Equal(3500, new_tweet.NumReplies) - assert.Equal(4500, new_tweet.NumQuoteTweets) - assert.False(new_tweet.IsStub) - assert.True(new_tweet.IsContentDownloaded) - assert.True(new_tweet.IsConversationScraped) - assert.Equal(int64(2000), new_tweet.LastScrapedAt.Unix()) + assert.Equal(1500, new_tweet.NumLikes) + assert.Equal(2500, new_tweet.NumRetweets) + assert.Equal(3500, new_tweet.NumReplies) + assert.Equal(4500, new_tweet.NumQuoteTweets) + assert.False(new_tweet.IsStub) + assert.True(new_tweet.IsContentDownloaded) + assert.True(new_tweet.IsConversationScraped) + assert.Equal(int64(2000), new_tweet.LastScrapedAt.Unix()) } /** * Should correctly report whether the User exists in the database */ func TestIsTweetInDatabase(t *testing.T) { - require := require.New(t) - profile_path := "test_profiles/TestTweetQueries" - profile := create_or_load_profile(profile_path) + require := require.New(t) + profile_path := "test_profiles/TestTweetQueries" + profile := create_or_load_profile(profile_path) - tweet := create_dummy_tweet() + tweet := create_dummy_tweet() - exists := profile.IsTweetInDatabase(tweet.ID) - require.False(exists) + exists := profile.IsTweetInDatabase(tweet.ID) + require.False(exists) - err := profile.SaveTweet(tweet) - require.NoError(err) + err := profile.SaveTweet(tweet) + require.NoError(err) - exists = profile.IsTweetInDatabase(tweet.ID) - assert.True(t, exists) + exists = profile.IsTweetInDatabase(tweet.ID) + assert.True(t, exists) } /** * Should correctly populate the `User` field on a Tweet */ func TestLoadUserForTweet(t *testing.T) { - require := require.New(t) - profile_path := "test_profiles/TestTweetQueries" - profile := create_or_load_profile(profile_path) + require := require.New(t) + profile_path := "test_profiles/TestTweetQueries" + profile := create_or_load_profile(profile_path) - tweet := create_dummy_tweet() + tweet := create_dummy_tweet() - // Save the tweet - err := profile.SaveTweet(tweet) - require.NoError(err) - require.Nil(tweet.User, "`User` field is already there for some reason") + // Save the tweet + err := profile.SaveTweet(tweet) + require.NoError(err) + require.Nil(tweet.User, "`User` field is already there for some reason") - err = profile.LoadUserFor(&tweet) - require.NoError(err) - require.NotNil(tweet.User, "Did not load a user. It is still nil.") + err = profile.LoadUserFor(&tweet) + require.NoError(err) + require.NotNil(tweet.User, "Did not load a user. It is still nil.") } /** * Test all the combinations for whether a tweet needs its content downloaded */ func TestCheckTweetContentDownloadNeeded(t *testing.T) { - assert := assert.New(t) - profile_path := "test_profiles/TestTweetQueries" - profile := create_or_load_profile(profile_path) + assert := assert.New(t) + profile_path := "test_profiles/TestTweetQueries" + profile := create_or_load_profile(profile_path) - tweet := create_dummy_tweet() - tweet.IsContentDownloaded = false + tweet := create_dummy_tweet() + tweet.IsContentDownloaded = false - // Non-saved tweets should need to be downloaded - assert.True(profile.CheckTweetContentDownloadNeeded(tweet)) + // Non-saved tweets should need to be downloaded + assert.True(profile.CheckTweetContentDownloadNeeded(tweet)) - // Save the tweet - err := profile.SaveTweet(tweet) - require.NoError(t, err) + // Save the tweet + err := profile.SaveTweet(tweet) + require.NoError(t, err) - // Should still need a download since `is_content_downloaded` is false - assert.True(profile.CheckTweetContentDownloadNeeded(tweet)) + // Should still need a download since `is_content_downloaded` is false + assert.True(profile.CheckTweetContentDownloadNeeded(tweet)) - // Try again but this time with `is_content_downloaded` = true - tweet.IsContentDownloaded = true - err = profile.SaveTweet(tweet) - require.NoError(t, err) + // Try again but this time with `is_content_downloaded` = true + tweet.IsContentDownloaded = true + err = profile.SaveTweet(tweet) + require.NoError(t, err) - // Should no longer need a download - assert.False(profile.CheckTweetContentDownloadNeeded(tweet)) + // Should no longer need a download + assert.False(profile.CheckTweetContentDownloadNeeded(tweet)) } diff --git a/persistence/user_queries.go b/persistence/user_queries.go index 0364b8e..0fa0d61 100644 --- a/persistence/user_queries.go +++ b/persistence/user_queries.go @@ -15,21 +15,21 @@ import ( * - u: the User */ func (p Profile) SaveUser(u *scraper.User) error { - if u.IsNeedingFakeID { - err := p.DB.QueryRow("select id from users where lower(handle) = lower(?)", u.Handle).Scan(&u.ID) - if err == sql.ErrNoRows { - // We need to continue-- create a new fake user - u.ID = p.NextFakeUserID() - } else if err == nil { - // We're done; everything is fine (ID has already been scanned into the User) - return nil - } else { - // A real error occurred - panic(fmt.Sprintf("Error checking for existence of fake user with handle %q: %s", u.Handle, err.Error())) - } - } + if u.IsNeedingFakeID { + err := p.DB.QueryRow("select id from users where lower(handle) = lower(?)", u.Handle).Scan(&u.ID) + if err == sql.ErrNoRows { + // We need to continue-- create a new fake user + u.ID = p.NextFakeUserID() + } else if err == nil { + // We're done; everything is fine (ID has already been scanned into the User) + return nil + } else { + // A real error occurred + panic(fmt.Sprintf("Error checking for existence of fake user with handle %q: %s", u.Handle, err.Error())) + } + } - _, err := p.DB.Exec(` + _, err := p.DB.Exec(` insert into users (id, display_name, handle, bio, following_count, followers_count, location, website, join_date, is_private, is_verified, is_banned, profile_image_url, profile_image_local_path, banner_image_url, banner_image_local_path, pinned_tweet_id, is_content_downloaded, is_id_fake) diff --git a/scraper/test_responses/api_v2/tweet_with_poll.json b/scraper/test_responses/api_v2/tweet_with_poll.json index 0f64ab2..dd88a42 100644 --- a/scraper/test_responses/api_v2/tweet_with_poll.json +++ b/scraper/test_responses/api_v2/tweet_with_poll.json @@ -1,390 +1 @@ -{ - "result": - { - "__typename": "Tweet", - "rest_id": "1485692111106285571", - "core": - { - "user_results": - { - "result": - { - "__typename": "User", - "id": "VXNlcjo0NDA2NzI5OA==", - "rest_id": "44067298", - "affiliates_highlighted_label": - {}, - "has_nft_avatar": false, - "legacy": - { - "created_at": "Tue Jun 02 05:35:52 +0000 2009", - "default_profile": false, - "default_profile_image": false, - "description": "Author of Dear Reader, The New Right & The Anarchist Handbook\nHost of \"YOUR WELCOME\" \nSubject of Ego & Hubris by Harvey Pekar\nHe/Him ⚑\n@SheathUnderwear Model", - "entities": - { - "description": - { - "urls": - [] - }, - "url": - { - "urls": - [ - { - "display_url": "amzn.to/3oInafv", - "expanded_url": "https://amzn.to/3oInafv", - "url": "https://t.co/7VDFOOtFK2", - "indices": - [ - 0, - 23 - ] - } - ] - } - }, - "fast_followers_count": 0, - "favourites_count": 3840, - "followers_count": 334571, - "friends_count": 964, - "has_custom_timelines": false, - "is_translator": false, - "listed_count": 1434, - "location": "Austin", - "media_count": 9504, - "name": "Michael Malice", - "normal_followers_count": 334571, - "pinned_tweet_ids_str": - [ - "1477347403023982596" - ], - "profile_banner_extensions": - { - "mediaColor": - { - "r": - { - "ok": - { - "palette": - [ - { - "percentage": 60.59, - "rgb": - { - "blue": 0, - "green": 0, - "red": 0 - } - }, - { - "percentage": 18.77, - "rgb": - { - "blue": 64, - "green": 60, - "red": 156 - } - }, - { - "percentage": 3.62, - "rgb": - { - "blue": 31, - "green": 29, - "red": 77 - } - }, - { - "percentage": 3.22, - "rgb": - { - "blue": 215, - "green": 199, - "red": 138 - } - }, - { - "percentage": 2.83, - "rgb": - { - "blue": 85, - "green": 79, - "red": 215 - } - } - ] - } - } - } - }, - "profile_banner_url": "https://pbs.twimg.com/profile_banners/44067298/1615134676", - "profile_image_extensions": - { - "mediaColor": - { - "r": - { - "ok": - { - "palette": - [ - { - "percentage": 50.78, - "rgb": - { - "blue": 249, - "green": 247, - "red": 246 - } - }, - { - "percentage": 17.4, - "rgb": - { - "blue": 51, - "green": 51, - "red": 205 - } - }, - { - "percentage": 9.43, - "rgb": - { - "blue": 124, - "green": 139, - "red": 210 - } - }, - { - "percentage": 6.38, - "rgb": - { - "blue": 47, - "green": 63, - "red": 116 - } - }, - { - "percentage": 3.17, - "rgb": - { - "blue": 65, - "green": 45, - "red": 46 - } - } - ] - } - } - } - }, - "profile_image_url_https": "https://pbs.twimg.com/profile_images/1415820415314931715/_VVX4GI8_normal.jpg", - "profile_interstitial_type": "", - "protected": false, - "screen_name": "michaelmalice", - "statuses_count": 138682, - "translator_type": "none", - "url": "https://t.co/7VDFOOtFK2", - "verified": true, - "withheld_in_countries": - [] - }, - "super_follow_eligible": false, - "super_followed_by": false, - "super_following": false - } - } - }, - "card": - { - "rest_id": "card://1485692110472892424", - "legacy": - { - "binding_values": - [ - { - "key": "choice1_label", - "value": - { - "string_value": "1", - "type": "STRING" - } - }, - { - "key": "choice2_label", - "value": - { - "string_value": "2", - "type": "STRING" - } - }, - { - "key": "end_datetime_utc", - "value": - { - "string_value": "2022-01-25T19:12:56Z", - "type": "STRING" - } - }, - { - "key": "counts_are_final", - "value": - { - "boolean_value": false, - "type": "BOOLEAN" - } - }, - { - "key": "choice2_count", - "value": - { - "string_value": "702", - "type": "STRING" - } - }, - { - "key": "choice1_count", - "value": - { - "string_value": "891", - "type": "STRING" - } - }, - { - "key": "choice4_label", - "value": - { - "string_value": "E", - "type": "STRING" - } - }, - { - "key": "last_updated_datetime_utc", - "value": - { - "string_value": "2022-01-24T20:20:38Z", - "type": "STRING" - } - }, - { - "key": "duration_minutes", - "value": - { - "string_value": "1440", - "type": "STRING" - } - }, - { - "key": "choice3_count", - "value": - { - "string_value": "459", - "type": "STRING" - } - }, - { - "key": "choice4_count", - "value": - { - "string_value": "1801", - "type": "STRING" - } - }, - { - "key": "choice3_label", - "value": - { - "string_value": "C", - "type": "STRING" - } - }, - { - "key": "api", - "value": - { - "string_value": "capi://passthrough/1", - "type": "STRING" - } - }, - { - "key": "card_url", - "value": - { - "scribe_key": "card_url", - "string_value": "https://twitter.com", - "type": "STRING" - } - } - ], - "card_platform": - { - "platform": - { - "audience": - { - "name": "production" - }, - "device": - { - "name": "Swift", - "version": "12" - } - } - }, - "name": "poll4choice_text_only", - "url": "card://1485692110472892424", - "user_refs": - [] - } - }, - "legacy": - { - "created_at": "Mon Jan 24 19:12:56 +0000 2022", - "conversation_control": - { - "policy": "Community", - "conversation_owner": - { - "legacy": - { - "screen_name": "michaelmalice" - } - } - }, - "conversation_id_str": "1485692111106285571", - "display_text_range": - [ - 0, - 158 - ], - "entities": - { - "user_mentions": - [], - "urls": - [], - "hashtags": - [], - "symbols": - [] - }, - "favorite_count": 71, - "favorited": false, - "full_text": "Which of these would most make you feel a disconnect from someone else?\n\n1) They don't like music\n2) They don't like pets\nC) They don't read\nE) They are vegan", - "is_quote_status": false, - "lang": "en", - "possibly_sensitive": false, - "possibly_sensitive_editable": true, - "quote_count": 12, - "reply_count": 11, - "retweet_count": 16, - "retweeted": false, - "source": "Twitter Web App", - "user_id_str": "44067298", - "id_str": "1485692111106285571" - } - } -} +{"result":{"__typename":"Tweet","rest_id":"1485692111106285571","core":{"user_results":{"result":{"__typename":"User","id":"VXNlcjo0NDA2NzI5OA==","rest_id":"44067298","affiliates_highlighted_label":{},"has_nft_avatar":false,"legacy":{"created_at":"Tue Jun 02 05:35:52 +0000 2009","default_profile":false,"default_profile_image":false,"description":"Author of Dear Reader, The New Right & The Anarchist Handbook\nHost of \"YOUR WELCOME\" \nSubject of Ego & Hubris by Harvey Pekar\nHe/Him ⚑\n@SheathUnderwear Model","entities":{"description":{"urls":[]},"url":{"urls":[{"display_url":"amzn.to/3oInafv","expanded_url":"https://amzn.to/3oInafv","url":"https://t.co/7VDFOOtFK2","indices":[0,23]}]}},"fast_followers_count":0,"favourites_count":3840,"followers_count":334571,"friends_count":964,"has_custom_timelines":false,"is_translator":false,"listed_count":1434,"location":"Austin","media_count":9504,"name":"Michael Malice","normal_followers_count":334571,"pinned_tweet_ids_str":["1477347403023982596"],"profile_banner_extensions":{"mediaColor":{"r":{"ok":{"palette":[{"percentage":60.59,"rgb":{"blue":0,"green":0,"red":0}},{"percentage":18.77,"rgb":{"blue":64,"green":60,"red":156}},{"percentage":3.62,"rgb":{"blue":31,"green":29,"red":77}},{"percentage":3.22,"rgb":{"blue":215,"green":199,"red":138}},{"percentage":2.83,"rgb":{"blue":85,"green":79,"red":215}}]}}}},"profile_banner_url":"https://pbs.twimg.com/profile_banners/44067298/1615134676","profile_image_extensions":{"mediaColor":{"r":{"ok":{"palette":[{"percentage":50.78,"rgb":{"blue":249,"green":247,"red":246}},{"percentage":17.4,"rgb":{"blue":51,"green":51,"red":205}},{"percentage":9.43,"rgb":{"blue":124,"green":139,"red":210}},{"percentage":6.38,"rgb":{"blue":47,"green":63,"red":116}},{"percentage":3.17,"rgb":{"blue":65,"green":45,"red":46}}]}}}},"profile_image_url_https":"https://pbs.twimg.com/profile_images/1415820415314931715/_VVX4GI8_normal.jpg","profile_interstitial_type":"","protected":false,"screen_name":"michaelmalice","statuses_count":138682,"translator_type":"none","url":"https://t.co/7VDFOOtFK2","verified":true,"withheld_in_countries":[]},"super_follow_eligible":false,"super_followed_by":false,"super_following":false}}},"card":{"rest_id":"card://1485692110472892424","legacy":{"binding_values":[{"key":"choice1_label","value":{"string_value":"1","type":"STRING"}},{"key":"choice2_label","value":{"string_value":"2","type":"STRING"}},{"key":"end_datetime_utc","value":{"string_value":"2022-01-25T19:12:56Z","type":"STRING"}},{"key":"counts_are_final","value":{"boolean_value":false,"type":"BOOLEAN"}},{"key":"choice2_count","value":{"string_value":"702","type":"STRING"}},{"key":"choice1_count","value":{"string_value":"891","type":"STRING"}},{"key":"choice4_label","value":{"string_value":"E","type":"STRING"}},{"key":"last_updated_datetime_utc","value":{"string_value":"2022-01-24T20:20:38Z","type":"STRING"}},{"key":"duration_minutes","value":{"string_value":"1440","type":"STRING"}},{"key":"choice3_count","value":{"string_value":"459","type":"STRING"}},{"key":"choice4_count","value":{"string_value":"1801","type":"STRING"}},{"key":"choice3_label","value":{"string_value":"C","type":"STRING"}},{"key":"api","value":{"string_value":"capi://passthrough/1","type":"STRING"}},{"key":"card_url","value":{"scribe_key":"card_url","string_value":"https://twitter.com","type":"STRING"}}],"card_platform":{"platform":{"audience":{"name":"production"},"device":{"name":"Swift","version":"12"}}},"name":"poll4choice_text_only","url":"card://1485692110472892424","user_refs":[]}},"legacy":{"created_at":"Mon Jan 24 19:12:56 +0000 2022","conversation_control":{"policy":"Community","conversation_owner":{"legacy":{"screen_name":"michaelmalice"}}},"conversation_id_str":"1485692111106285571","display_text_range":[0,158],"entities":{"user_mentions":[],"urls":[],"hashtags":[],"symbols":[]},"favorite_count":71,"favorited":false,"full_text":"Which of these would most make you feel a disconnect from someone else?\n\n1) They don't like music\n2) They don't like pets\nC) They don't read\nE) They are vegan","is_quote_status":false,"lang":"en","possibly_sensitive":false,"possibly_sensitive_editable":true,"quote_count":12,"reply_count":11,"retweet_count":16,"retweeted":false,"source":"Twitter Web App","user_id_str":"44067298","id_str":"1485692111106285571"}}} diff --git a/scraper/test_responses/single_tweets/tweet_with_html_entities.json b/scraper/test_responses/single_tweets/tweet_with_html_entities.json index 43ecb6d..1602bb4 100644 --- a/scraper/test_responses/single_tweets/tweet_with_html_entities.json +++ b/scraper/test_responses/single_tweets/tweet_with_html_entities.json @@ -1,20 +1 @@ -{ - "created_at": "Thu Dec 23 20:55:48 +0000 2021", - "id_str": "1474121585510563845", - "full_text": "By the 1970s the elite consensus was that \"the hunt for atomic spies\" had been a grotesque over-reaction to minor leaks that cost the lives of the Rosenbergs & ruined many innocents. Only when the USSR fell was it discovered that they & other spies had given away ALL the secrets", - "display_text_range": - [ - 0, - 288 - ], - "entities": - {}, - "source": "Twitter Web App", - "user_id_str": "1239676915386068993", - "retweet_count": 239, - "favorite_count": 1118, - "reply_count": 26, - "quote_count": 26, - "conversation_id_str": "1474121585510563845", - "lang": "en" -} +{"created_at":"Thu Dec 23 20:55:48 +0000 2021","id_str":"1474121585510563845","full_text":"By the 1970s the elite consensus was that \"the hunt for atomic spies\" had been a grotesque over-reaction to minor leaks that cost the lives of the Rosenbergs & ruined many innocents. Only when the USSR fell was it discovered that they & other spies had given away ALL the secrets","display_text_range":[0,288],"entities":{},"source":"Twitter Web App","user_id_str":"1239676915386068993","retweet_count":239,"favorite_count":1118,"reply_count":26,"quote_count":26,"conversation_id_str":"1474121585510563845","lang":"en"} diff --git a/scraper/test_responses/single_tweets/tweet_with_quoted_tweet_and_url.json b/scraper/test_responses/single_tweets/tweet_with_quoted_tweet_and_url.json index 7e97470..507c19b 100644 --- a/scraper/test_responses/single_tweets/tweet_with_quoted_tweet_and_url.json +++ b/scraper/test_responses/single_tweets/tweet_with_quoted_tweet_and_url.json @@ -1,367 +1 @@ -{ - "created_at": "Sun Feb 27 18:28:26 +0000 2022", - "id_str": "1498002100294410241", - "full_text": "This is video he’s talking about. Please watch. Is there a single US politician capable of doing this with the weasels and rats running American industry today?\n\nhttps://t.co/xurv3zslRt", - "display_text_range": - [ - 0, - 185 - ], - "entities": - { - "urls": - [ - { - "url": "https://t.co/xurv3zslRt", - "expanded_url": "https://youtu.be/VjrlTMvirVo", - "display_url": "youtu.be/VjrlTMvirVo", - "indices": - [ - 162, - 185 - ] - } - ] - }, - "source": "Twitter for iPhone", - "user_id_str": "2996959115", - "is_quote_status": true, - "quoted_status_id_str": "1497997890999898115", - "quoted_status_permalink": - { - "url": "https://t.co/6KrfeQvaAa", - "expanded": "https://twitter.com/LatinxPutler/status/1497997890999898115", - "display": "twitter.com/LatinxPutler/s…" - }, - "retweet_count": 4, - "favorite_count": 42, - "reply_count": 2, - "quote_count": 0, - "conversation_id_str": "1498002100294410241", - "possibly_sensitive_editable": true, - "card": - { - "name": "player", - "url": "https://t.co/xurv3zslRt", - "card_type_url": "http://card-type-url-is-deprecated.invalid", - "binding_values": - { - "vanity_url": - { - "type": "STRING", - "string_value": "youtube.com", - "scribe_key": "vanity_url" - }, - "player_url": - { - "type": "STRING", - "string_value": "https://www.youtube.com/embed/VjrlTMvirVo" - }, - "app_is_free": - { - "type": "STRING", - "string_value": "true" - }, - "app_price_currency": - { - "type": "STRING", - "string_value": "USD" - }, - "app_price_amount": - { - "type": "STRING", - "string_value": "0.0" - }, - "domain": - { - "type": "STRING", - "string_value": "www.youtube.com" - }, - "app_num_ratings": - { - "type": "STRING", - "string_value": "24,612,724" - }, - "app_star_rating": - { - "type": "STRING", - "string_value": "4.68268" - }, - "app_name": - { - "type": "STRING", - "string_value": "YouTube: Watch, Listen, Stream" - }, - "player_width": - { - "type": "STRING", - "string_value": "480" - }, - "player_height": - { - "type": "STRING", - "string_value": "360" - }, - "site": - { - "type": "USER", - "user_value": - { - "id_str": "10228272", - "path": - [] - }, - "scribe_key": "publisher_id" - }, - "title": - { - "type": "STRING", - "string_value": "Vladimir Putin Rage" - }, - "description": - { - "type": "STRING", - "string_value": "Putin owns business men" - }, - "player_image_small": - { - "type": "IMAGE", - "image_value": - { - "url": "https://pbs.twimg.com/card_img/1497796040619175936/M03IKQ-W?format=jpg&name=144x144", - "width": 144, - "height": 108 - } - }, - "player_image": - { - "type": "IMAGE", - "image_value": - { - "url": "https://pbs.twimg.com/card_img/1497796040619175936/M03IKQ-W?format=jpg&name=280x280", - "width": 280, - "height": 210 - } - }, - "player_image_large": - { - "type": "IMAGE", - "image_value": - { - "url": "https://pbs.twimg.com/card_img/1497796040619175936/M03IKQ-W?format=jpg&name=1200x627", - "width": 1200, - "height": 627 - } - }, - "player_image_x_large": - { - "type": "IMAGE", - "image_value": - { - "url": "https://pbs.twimg.com/card_img/1497796040619175936/M03IKQ-W?format=png&name=2048x2048_2_exp", - "width": 480, - "height": 360 - } - }, - "player_image_color": - { - "type": "IMAGE_COLOR", - "image_color_value": - { - "palette": - [ - { - "percentage": 32.24, - "rgb": - { - "red": 0, - "green": 0, - "blue": 0 - } - }, - { - "percentage": 26.64, - "rgb": - { - "red": 244, - "green": 238, - "blue": 183 - } - }, - { - "percentage": 14.46, - "rgb": - { - "red": 109, - "green": 132, - "blue": 150 - } - }, - { - "percentage": 10.73, - "rgb": - { - "red": 176, - "green": 21, - "blue": 27 - } - }, - { - "percentage": 4.41, - "rgb": - { - "red": 195, - "green": 154, - "blue": 133 - } - } - ] - } - }, - "player_image_original": - { - "type": "IMAGE", - "image_value": - { - "url": "https://pbs.twimg.com/card_img/1497796040619175936/M03IKQ-W?format=jpg&name=orig", - "width": 480, - "height": 360 - } - }, - "card_url": - { - "type": "STRING", - "string_value": "https://t.co/xurv3zslRt", - "scribe_key": "card_url" - } - }, - "users": - { - "10228272": - { - "id_str": "10228272", - "name": "YouTube", - "screen_name": "YouTube", - "location": "San Bruno, CA", - "description": "like and subscribe.", - "url": "https://t.co/bUisN3Gqbw", - "entities": - { - "url": - { - "urls": - [ - { - "url": "https://t.co/bUisN3Gqbw", - "expanded_url": "http://youtube.com", - "display_url": "youtube.com", - "indices": - [ - 0, - 23 - ] - } - ] - }, - "description": - {} - }, - "followers_count": 74506164, - "fast_followers_count": 0, - "normal_followers_count": 74506164, - "friends_count": 1207, - "listed_count": 79844, - "created_at": "Tue Nov 13 21:43:46 +0000 2007", - "favourites_count": 6062, - "verified": true, - "statuses_count": 42888, - "media_count": 13946, - "is_translation_enabled": true, - "profile_image_url_https": "https://pbs.twimg.com/profile_images/1427292844612595720/RC1YSvuT_normal.jpg", - "profile_banner_url": "https://pbs.twimg.com/profile_banners/10228272/1644426960", - "profile_image_extensions_media_availability": null, - "profile_image_extensions_alt_text": null, - "profile_image_extensions": - { - "mediaStats": - { - "r": - { - "missing": null - }, - "ttl": -1 - } - }, - "profile_banner_extensions_media_availability": null, - "profile_banner_extensions_alt_text": null, - "profile_banner_extensions": - { - "mediaStats": - { - "r": - { - "missing": null - }, - "ttl": -1 - } - }, - "profile_link_color": "C9191D", - "pinned_tweet_ids": - [], - "pinned_tweet_ids_str": - [], - "has_custom_timelines": true, - "advertiser_account_type": "promotable_user", - "advertiser_account_service_levels": - [ - "dso", - "dso", - "dso", - "dso", - "dso", - "dso", - "dso", - "dso", - "dso" - ], - "profile_interstitial_type": "", - "business_profile_state": "none", - "translator_type": "regular", - "withheld_in_countries": - [], - "ext": - { - "highlightedLabel": - { - "r": - { - "ok": - {} - }, - "ttl": -1 - } - } - } - }, - "card_platform": - { - "platform": - { - "device": - { - "name": "Swift", - "version": "12" - }, - "audience": - { - "name": "production" - } - } - } - }, - "lang": "en", - "self_thread": - { - "id_str": "1498002100294410241" - } -} +{"created_at":"Sun Feb 27 18:28:26 +0000 2022","id_str":"1498002100294410241","full_text":"This is video he’s talking about. Please watch. Is there a single US politician capable of doing this with the weasels and rats running American industry today?\n\nhttps://t.co/xurv3zslRt","display_text_range":[0,185],"entities":{"urls":[{"url":"https://t.co/xurv3zslRt","expanded_url":"https://youtu.be/VjrlTMvirVo","display_url":"youtu.be/VjrlTMvirVo","indices":[162,185]}]},"source":"Twitter for iPhone","user_id_str":"2996959115","is_quote_status":true,"quoted_status_id_str":"1497997890999898115","quoted_status_permalink":{"url":"https://t.co/6KrfeQvaAa","expanded":"https://twitter.com/LatinxPutler/status/1497997890999898115","display":"twitter.com/LatinxPutler/s…"},"retweet_count":4,"favorite_count":42,"reply_count":2,"quote_count":0,"conversation_id_str":"1498002100294410241","possibly_sensitive_editable":true,"card":{"name":"player","url":"https://t.co/xurv3zslRt","card_type_url":"http://card-type-url-is-deprecated.invalid","binding_values":{"vanity_url":{"type":"STRING","string_value":"youtube.com","scribe_key":"vanity_url"},"player_url":{"type":"STRING","string_value":"https://www.youtube.com/embed/VjrlTMvirVo"},"app_is_free":{"type":"STRING","string_value":"true"},"app_price_currency":{"type":"STRING","string_value":"USD"},"app_price_amount":{"type":"STRING","string_value":"0.0"},"domain":{"type":"STRING","string_value":"www.youtube.com"},"app_num_ratings":{"type":"STRING","string_value":"24,612,724"},"app_star_rating":{"type":"STRING","string_value":"4.68268"},"app_name":{"type":"STRING","string_value":"YouTube: Watch, Listen, Stream"},"player_width":{"type":"STRING","string_value":"480"},"player_height":{"type":"STRING","string_value":"360"},"site":{"type":"USER","user_value":{"id_str":"10228272","path":[]},"scribe_key":"publisher_id"},"title":{"type":"STRING","string_value":"Vladimir Putin Rage"},"description":{"type":"STRING","string_value":"Putin owns business men"},"player_image_small":{"type":"IMAGE","image_value":{"url":"https://pbs.twimg.com/card_img/1497796040619175936/M03IKQ-W?format=jpg&name=144x144","width":144,"height":108}},"player_image":{"type":"IMAGE","image_value":{"url":"https://pbs.twimg.com/card_img/1497796040619175936/M03IKQ-W?format=jpg&name=280x280","width":280,"height":210}},"player_image_large":{"type":"IMAGE","image_value":{"url":"https://pbs.twimg.com/card_img/1497796040619175936/M03IKQ-W?format=jpg&name=1200x627","width":1200,"height":627}},"player_image_x_large":{"type":"IMAGE","image_value":{"url":"https://pbs.twimg.com/card_img/1497796040619175936/M03IKQ-W?format=png&name=2048x2048_2_exp","width":480,"height":360}},"player_image_color":{"type":"IMAGE_COLOR","image_color_value":{"palette":[{"percentage":32.24,"rgb":{"red":0,"green":0,"blue":0}},{"percentage":26.64,"rgb":{"red":244,"green":238,"blue":183}},{"percentage":14.46,"rgb":{"red":109,"green":132,"blue":150}},{"percentage":10.73,"rgb":{"red":176,"green":21,"blue":27}},{"percentage":4.41,"rgb":{"red":195,"green":154,"blue":133}}]}},"player_image_original":{"type":"IMAGE","image_value":{"url":"https://pbs.twimg.com/card_img/1497796040619175936/M03IKQ-W?format=jpg&name=orig","width":480,"height":360}},"card_url":{"type":"STRING","string_value":"https://t.co/xurv3zslRt","scribe_key":"card_url"}},"users":{"10228272":{"id_str":"10228272","name":"YouTube","screen_name":"YouTube","location":"San Bruno, CA","description":"like and subscribe.","url":"https://t.co/bUisN3Gqbw","entities":{"url":{"urls":[{"url":"https://t.co/bUisN3Gqbw","expanded_url":"http://youtube.com","display_url":"youtube.com","indices":[0,23]}]},"description":{}},"followers_count":74506164,"fast_followers_count":0,"normal_followers_count":74506164,"friends_count":1207,"listed_count":79844,"created_at":"Tue Nov 13 21:43:46 +0000 2007","favourites_count":6062,"verified":true,"statuses_count":42888,"media_count":13946,"is_translation_enabled":true,"profile_image_url_https":"https://pbs.twimg.com/profile_images/1427292844612595720/RC1YSvuT_normal.jpg","profile_banner_url":"https://pbs.twimg.com/profile_banners/10228272/1644426960","profile_image_extensions_media_availability":null,"profile_image_extensions_alt_text":null,"profile_image_extensions":{"mediaStats":{"r":{"missing":null},"ttl":-1}},"profile_banner_extensions_media_availability":null,"profile_banner_extensions_alt_text":null,"profile_banner_extensions":{"mediaStats":{"r":{"missing":null},"ttl":-1}},"profile_link_color":"C9191D","pinned_tweet_ids":[],"pinned_tweet_ids_str":[],"has_custom_timelines":true,"advertiser_account_type":"promotable_user","advertiser_account_service_levels":["dso","dso","dso","dso","dso","dso","dso","dso","dso"],"profile_interstitial_type":"","business_profile_state":"none","translator_type":"regular","withheld_in_countries":[],"ext":{"highlightedLabel":{"r":{"ok":{}},"ttl":-1}}}},"card_platform":{"platform":{"device":{"name":"Swift","version":"12"},"audience":{"name":"production"}}}},"lang":"en","self_thread":{"id_str":"1498002100294410241"}} diff --git a/scraper/test_responses/tombstones/tombstone_deleted.json b/scraper/test_responses/tombstones/tombstone_deleted.json index 096bb4f..be414eb 100644 --- a/scraper/test_responses/tombstones/tombstone_deleted.json +++ b/scraper/test_responses/tombstones/tombstone_deleted.json @@ -1,331 +1 @@ -{ - "globalObjects": - { - "tweets": - { - "1454524255127887878": - { - "created_at": "Sat Oct 30 19:03:00 +0000 2021", - "id_str": "1454524255127887878", - "full_text": "@TastefulTyrant Halloween is often the easiest night of the year but women do thirst trap, too.", - "display_text_range": - [ - 16, - 95 - ], - "entities": - { - "user_mentions": - [ - { - "screen_name": "TastefulTyrant", - "name": "ᴛᴀꜱᴛᴇꜰᴜʟ ᴛʏʀᴀɴᴛ", - "id_str": "1218687933391298560", - "indices": - [ - 0, - 15 - ] - } - ] - }, - "source": "Twitter Web App", - "in_reply_to_status_id_str": "1454521654781136902", - "in_reply_to_user_id_str": "1218687933391298560", - "in_reply_to_screen_name": "TastefulTyrant", - "user_id_str": "887434912529338375", - "retweet_count": 0, - "favorite_count": 12, - "reply_count": 0, - "quote_count": 0, - "conversation_id_str": "1454521654781136902", - "lang": "en" - } - }, - "users": - { - "887434912529338375": - { - "id_str": "887434912529338375", - "name": "Covfefe Anon", - "screen_name": "CovfefeAnon", - "location": "", - "description": "Not to be confused with 2001 Nobel Peace Prize winner Kofi Annan.\n\n54th Clause of the Magna Carta absolutist.\n\nCommentary from an NRx perspective.", - "entities": - { - "description": - {} - }, - "followers_count": 8386, - "fast_followers_count": 0, - "normal_followers_count": 8386, - "friends_count": 497, - "listed_count": 59, - "created_at": "Tue Jul 18 22:12:25 +0000 2017", - "favourites_count": 175661, - "statuses_count": 26334, - "media_count": 1755, - "profile_image_url_https": "https://pbs.twimg.com/profile_images/1392509603116617731/TDrNeUiZ_normal.jpg", - "profile_banner_url": "https://pbs.twimg.com/profile_banners/887434912529338375/1598514714", - "profile_image_extensions_alt_text": null, - "profile_image_extensions_media_color": - { - "palette": - [ - { - "rgb": - { - "red": 127, - "green": 125, - "blue": 102 - }, - "percentage": 34.13 - }, - { - "rgb": - { - "red": 68, - "green": 50, - "blue": 44 - }, - "percentage": 26.45 - }, - { - "rgb": - { - "red": 167, - "green": 170, - "blue": 176 - }, - "percentage": 12.16 - }, - { - "rgb": - { - "red": 102, - "green": 47, - "blue": 31 - }, - "percentage": 6.4 - }, - { - "rgb": - { - "red": 43, - "green": 52, - "blue": 65 - }, - "percentage": 3.54 - } - ] - }, - "profile_image_extensions_media_availability": null, - "profile_image_extensions": - { - "mediaStats": - { - "r": - { - "missing": null - }, - "ttl": -1 - } - }, - "profile_banner_extensions_alt_text": null, - "profile_banner_extensions_media_availability": null, - "profile_banner_extensions_media_color": - { - "palette": - [ - { - "rgb": - { - "red": 254, - "green": 254, - "blue": 254 - }, - "percentage": 44.66 - }, - { - "rgb": - { - "red": 122, - "green": 116, - "blue": 123 - }, - "percentage": 24.0 - }, - { - "rgb": - { - "red": 131, - "green": 164, - "blue": 104 - }, - "percentage": 18.44 - }, - { - "rgb": - { - "red": 50, - "green": 50, - "blue": 50 - }, - "percentage": 6.56 - }, - { - "rgb": - { - "red": 114, - "green": 156, - "blue": 99 - }, - "percentage": 2.85 - } - ] - }, - "profile_banner_extensions": - { - "mediaStats": - { - "r": - { - "missing": null - }, - "ttl": -1 - } - }, - "profile_link_color": "1B95E0", - "pinned_tweet_ids": - [ - 1005906691324596224 - ], - "pinned_tweet_ids_str": - [ - "1005906691324596224" - ], - "advertiser_account_type": "promotable_user", - "advertiser_account_service_levels": - [ - "analytics" - ], - "profile_interstitial_type": "", - "business_profile_state": "none", - "translator_type": "none", - "withheld_in_countries": - [], - "ext": - { - "highlightedLabel": - { - "r": - { - "ok": - {} - }, - "ttl": -1 - } - } - } - }, - "moments": - {}, - "cards": - {}, - "places": - {}, - "media": - {}, - "broadcasts": - {}, - "topics": - {}, - "lists": - {} - }, - "timeline": - { - "id": "Conversation-1454521654781136902", - "instructions": - [ - { - "addEntries": - { - "entries": - [ - { - "entryId": "tombstone-7768850382073638905", - "sortIndex": "7768850382073638905", - "content": - { - "item": - { - "content": - { - "tombstone": - { - "displayType": "Inline", - "tombstoneInfo": - { - "text": "", - "richText": - { - "text": "This Tweet was deleted by the Tweet author. Learn more", - "entities": - [ - { - "fromIndex": 44, - "toIndex": 54, - "ref": - { - "url": - { - "urlType": "ExternalUrl", - "url": "https://help.twitter.com/rules-and-policies/notices-on-twitter" - } - } - } - ], - "rtl": false - } - } - } - } - } - } - }, - { - "entryId": "tweet-1454524255127887878", - "sortIndex": "7768847781726887929", - "content": - { - "item": - { - "content": - { - "tweet": - { - "id": "1454524255127887878", - "displayType": "Tweet" - } - } - } - } - } - ] - } - }, - { - "terminateTimeline": - { - "direction": "Top" - } - } - ], - "responseObjects": - { - "feedbackActions": - {} - } - } -} +{"globalObjects":{"tweets":{"1454524255127887878":{"created_at":"Sat Oct 30 19:03:00 +0000 2021","id_str":"1454524255127887878","full_text":"@TastefulTyrant Halloween is often the easiest night of the year but women do thirst trap, too.","display_text_range":[16,95],"entities":{"user_mentions":[{"screen_name":"TastefulTyrant","name":"ᴛᴀꜱᴛᴇꜰᴜʟ ᴛʏʀᴀɴᴛ","id_str":"1218687933391298560","indices":[0,15]}]},"source":"Twitter Web App","in_reply_to_status_id_str":"1454521654781136902","in_reply_to_user_id_str":"1218687933391298560","in_reply_to_screen_name":"TastefulTyrant","user_id_str":"887434912529338375","retweet_count":0,"favorite_count":12,"reply_count":0,"quote_count":0,"conversation_id_str":"1454521654781136902","lang":"en"}},"users":{"887434912529338375":{"id_str":"887434912529338375","name":"Covfefe Anon","screen_name":"CovfefeAnon","location":"","description":"Not to be confused with 2001 Nobel Peace Prize winner Kofi Annan.\n\n54th Clause of the Magna Carta absolutist.\n\nCommentary from an NRx perspective.","entities":{"description":{}},"followers_count":8386,"fast_followers_count":0,"normal_followers_count":8386,"friends_count":497,"listed_count":59,"created_at":"Tue Jul 18 22:12:25 +0000 2017","favourites_count":175661,"statuses_count":26334,"media_count":1755,"profile_image_url_https":"https://pbs.twimg.com/profile_images/1392509603116617731/TDrNeUiZ_normal.jpg","profile_banner_url":"https://pbs.twimg.com/profile_banners/887434912529338375/1598514714","profile_image_extensions_alt_text":null,"profile_image_extensions_media_color":{"palette":[{"rgb":{"red":127,"green":125,"blue":102},"percentage":34.13},{"rgb":{"red":68,"green":50,"blue":44},"percentage":26.45},{"rgb":{"red":167,"green":170,"blue":176},"percentage":12.16},{"rgb":{"red":102,"green":47,"blue":31},"percentage":6.4},{"rgb":{"red":43,"green":52,"blue":65},"percentage":3.54}]},"profile_image_extensions_media_availability":null,"profile_image_extensions":{"mediaStats":{"r":{"missing":null},"ttl":-1}},"profile_banner_extensions_alt_text":null,"profile_banner_extensions_media_availability":null,"profile_banner_extensions_media_color":{"palette":[{"rgb":{"red":254,"green":254,"blue":254},"percentage":44.66},{"rgb":{"red":122,"green":116,"blue":123},"percentage":24.0},{"rgb":{"red":131,"green":164,"blue":104},"percentage":18.44},{"rgb":{"red":50,"green":50,"blue":50},"percentage":6.56},{"rgb":{"red":114,"green":156,"blue":99},"percentage":2.85}]},"profile_banner_extensions":{"mediaStats":{"r":{"missing":null},"ttl":-1}},"profile_link_color":"1B95E0","pinned_tweet_ids":[1005906691324596224],"pinned_tweet_ids_str":["1005906691324596224"],"advertiser_account_type":"promotable_user","advertiser_account_service_levels":["analytics"],"profile_interstitial_type":"","business_profile_state":"none","translator_type":"none","withheld_in_countries":[],"ext":{"highlightedLabel":{"r":{"ok":{}},"ttl":-1}}}},"moments":{},"cards":{},"places":{},"media":{},"broadcasts":{},"topics":{},"lists":{}},"timeline":{"id":"Conversation-1454521654781136902","instructions":[{"addEntries":{"entries":[{"entryId":"tombstone-7768850382073638905","sortIndex":"7768850382073638905","content":{"item":{"content":{"tombstone":{"displayType":"Inline","tombstoneInfo":{"text":"","richText":{"text":"This Tweet was deleted by the Tweet author. Learn more","entities":[{"fromIndex":44,"toIndex":54,"ref":{"url":{"urlType":"ExternalUrl","url":"https://help.twitter.com/rules-and-policies/notices-on-twitter"}}}],"rtl":false}}}}}}},{"entryId":"tweet-1454524255127887878","sortIndex":"7768847781726887929","content":{"item":{"content":{"tweet":{"id":"1454524255127887878","displayType":"Tweet"}}}}}]}},{"terminateTimeline":{"direction":"Top"}}],"responseObjects":{"feedbackActions":{}}}} diff --git a/terminal_utils/colors.go b/terminal_utils/colors.go index 8bda0c1..569a3ce 100644 --- a/terminal_utils/colors.go +++ b/terminal_utils/colors.go @@ -1,15 +1,17 @@ package terminal_utils - /** * Colors for terminal output */ -const COLOR_RESET = "\033[0m" -const COLOR_RED = "\033[31m" -const COLOR_GREEN = "\033[32m" -const COLOR_YELLOW = "\033[33m" -const COLOR_BLUE = "\033[34m" -const COLOR_PURPLE = "\033[35m" -const COLOR_CYAN = "\033[36m" -const COLOR_GRAY = "\033[37m" -const COLOR_WHITE = "\033[97m" + +const ( + COLOR_RESET = "\033[0m" + COLOR_RED = "\033[31m" + COLOR_GREEN = "\033[32m" + COLOR_YELLOW = "\033[33m" + COLOR_BLUE = "\033[34m" + COLOR_PURPLE = "\033[35m" + COLOR_CYAN = "\033[36m" + COLOR_GRAY = "\033[37m" + COLOR_WHITE = "\033[97m" +) diff --git a/terminal_utils/formatting.go b/terminal_utils/formatting.go index 0f1a99b..9006211 100644 --- a/terminal_utils/formatting.go +++ b/terminal_utils/formatting.go @@ -1,46 +1,44 @@ package terminal_utils import ( - "time" - "strings" + "strings" + "time" ) /** * Format a timestamp in human-readable form. */ func FormatDate(t time.Time) string { - return t.Format("Jan 2, 2006 15:04:05") + return t.Format("Jan 2, 2006 15:04:05") } - /** * Wrap lines to fixed width, while respecting word breaks */ func WrapParagraph(paragraph string, width int) []string { - var lines []string - i := 0 - for i < len(paragraph) - width { - // Find a word break at the end of the line to avoid splitting up words - end := i + width - for end > i && paragraph[end] != ' ' { // Look for a space, starting at the end - end -= 1 - } - lines = append(lines, paragraph[i:end]) - i = end + 1 - } - lines = append(lines, paragraph[i:]) - return lines + var lines []string + i := 0 + for i < len(paragraph)-width { + // Find a word break at the end of the line to avoid splitting up words + end := i + width + for end > i && paragraph[end] != ' ' { // Look for a space, starting at the end + end -= 1 + } + lines = append(lines, paragraph[i:end]) + i = end + 1 + } + lines = append(lines, paragraph[i:]) + return lines } - /** * Return the text as a wrapped, indented block */ func WrapText(text string, width int) string { - paragraphs := strings.Split(text, "\n") - var lines []string - for _, paragraph := range paragraphs { - lines = append(lines, WrapParagraph(paragraph, width)...) - } - return strings.Join(lines, "\n ") + paragraphs := strings.Split(text, "\n") + var lines []string + for _, paragraph := range paragraphs { + lines = append(lines, WrapParagraph(paragraph, width)...) + } + return strings.Join(lines, "\n ") } diff --git a/terminal_utils/formatting_test.go b/terminal_utils/formatting_test.go index 6d2882c..34141a1 100644 --- a/terminal_utils/formatting_test.go +++ b/terminal_utils/formatting_test.go @@ -1,79 +1,77 @@ package terminal_utils_test import ( - "testing" - "reflect" + "testing" - "offline_twitter/terminal_utils" + "reflect" + + "offline_twitter/terminal_utils" ) - func TestWrapParagraph(t *testing.T) { - test_cases := []struct{ - Text string - Expected []string - } { - { - "These are public health officials who are making decisions about your lifestyle because they know more about health, " + - "fitness and well-being than you do", - []string{ - "These are public health officials who are making decisions", - "about your lifestyle because they know more about health,", - "fitness and well-being than you do", - }, - }, - { - `Things I learned in law school:`, - []string{`Things I learned in law school:`}, - }, - { - `Every student is smarter than you except the ones in your group project.`, - []string{ - `Every student is smarter than you except the ones in your`, - `group project.`, - }, - }, - } - for _, testcase := range test_cases { - result := terminal_utils.WrapParagraph(testcase.Text, 60) - if !reflect.DeepEqual(result, testcase.Expected) { - t.Errorf("Expected:\n%s\nGot:\n%s\n", testcase.Expected, result) - } - } + test_cases := []struct { + Text string + Expected []string + }{ + { + "These are public health officials who are making decisions about your lifestyle because they know more about health, " + + "fitness and well-being than you do", + []string{ + "These are public health officials who are making decisions", + "about your lifestyle because they know more about health,", + "fitness and well-being than you do", + }, + }, + { + `Things I learned in law school:`, + []string{`Things I learned in law school:`}, + }, + { + `Every student is smarter than you except the ones in your group project.`, + []string{ + `Every student is smarter than you except the ones in your`, + `group project.`, + }, + }, + } + for _, testcase := range test_cases { + result := terminal_utils.WrapParagraph(testcase.Text, 60) + if !reflect.DeepEqual(result, testcase.Expected) { + t.Errorf("Expected:\n%s\nGot:\n%s\n", testcase.Expected, result) + } + } } - - func TestWrapText(t *testing.T) { - test_cases := []struct{ - Text string - Expected string - } { - { - "These are public health officials who are making decisions about your lifestyle because they know more about health, " + - "fitness and well-being than you do", -`These are public health officials who are making decisions + test_cases := []struct { + Text string + Expected string + }{ + { + "These are public health officials who are making decisions about your lifestyle because they know more about health, " + + "fitness and well-being than you do", + `These are public health officials who are making decisions about your lifestyle because they know more about health, fitness and well-being than you do`, - }, - { -`Things I learned in law school: + }, + { + `Things I learned in law school: Falling behind early gives you more time to catch up. Never use a long word when a diminutive one will suffice. Every student is smarter than you except the ones in your group project. If you try & fail, doesn’t matter. Try again & fail better`, -`Things I learned in law school: + `Things I learned in law school: Falling behind early gives you more time to catch up. Never use a long word when a diminutive one will suffice. Every student is smarter than you except the ones in your group project. If you try & fail, doesn’t matter. Try again & fail better`, - }, - } - for _, testcase := range test_cases { - result := terminal_utils.WrapText(testcase.Text, 60) - if result != testcase.Expected { - t.Errorf("Expected:\n%s\nGot:\n%s\n", testcase.Expected, result) - } - } + }, + } + for _, testcase := range test_cases { + result := terminal_utils.WrapText(testcase.Text, 60) + if result != testcase.Expected { + t.Errorf("Expected:\n%s\nGot:\n%s\n", testcase.Expected, result) + } + } }