REFACTOR: in persistence package, import scraper as '.'

This commit is contained in:
Alessio 2025-02-03 16:25:34 -08:00
parent 95a52906ba
commit ac763a97a6
20 changed files with 185 additions and 176 deletions

View File

@ -1,5 +1,5 @@
#!/bin/bash #!/bin/bash
sudo mount -t tmpfs -o size=100M tmpfs pkg/persistence/test_profiles sudo mount -t tmpfs -o size=100M tmpfs pkg/persistence/test_profiles
sudo mount -t tmpfs -o size=100M tmpfs cmd/data sudo mount -t tmpfs -o size=500M tmpfs cmd/data
sudo mount -t tmpfs -o size=1000M tmpfs sample_data/profile sudo mount -t tmpfs -o size=1000M tmpfs sample_data/profile

View File

@ -3,10 +3,10 @@ package persistence
import ( import (
"fmt" "fmt"
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper" . "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
) )
func (p Profile) SaveBookmark(l scraper.Bookmark) error { func (p Profile) SaveBookmark(l Bookmark) error {
_, err := p.DB.NamedExec(` _, err := p.DB.NamedExec(`
insert into bookmarks (sort_order, user_id, tweet_id) insert into bookmarks (sort_order, user_id, tweet_id)
values (:sort_order, :user_id, :tweet_id) values (:sort_order, :user_id, :tweet_id)
@ -20,7 +20,7 @@ func (p Profile) SaveBookmark(l scraper.Bookmark) error {
return nil return nil
} }
func (p Profile) DeleteBookmark(l scraper.Bookmark) error { func (p Profile) DeleteBookmark(l Bookmark) error {
_, err := p.DB.NamedExec(`delete from bookmarks where user_id = :user_id and tweet_id = :tweet_id`, l) _, err := p.DB.NamedExec(`delete from bookmarks where user_id = :user_id and tweet_id = :tweet_id`, l)
if err != nil { if err != nil {
return fmt.Errorf("Error executing DeleteBookmark(%#v):\n %w", l, err) return fmt.Errorf("Error executing DeleteBookmark(%#v):\n %w", l, err)
@ -28,8 +28,8 @@ func (p Profile) DeleteBookmark(l scraper.Bookmark) error {
return nil return nil
} }
func (p Profile) GetBookmarkBySortID(id scraper.BookmarkSortID) (scraper.Bookmark, error) { func (p Profile) GetBookmarkBySortID(id BookmarkSortID) (Bookmark, error) {
var l scraper.Bookmark var l Bookmark
err := p.DB.Get(&l, ` err := p.DB.Get(&l, `
select sort_order, user_id, tweet_id select sort_order, user_id, tweet_id
from bookmarks from bookmarks

View File

@ -7,7 +7,7 @@ import (
"strings" "strings"
"time" "time"
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper" . "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
) )
type SortOrder int type SortOrder int
@ -91,7 +91,7 @@ func (o SortOrder) NextCursorValue(r CursorResult) int {
panic(fmt.Sprintf("Invalid sort order: %d", o)) panic(fmt.Sprintf("Invalid sort order: %d", o))
} }
} }
func (o SortOrder) NextDMCursorValue(m scraper.DMMessage) int64 { func (o SortOrder) NextDMCursorValue(m DMMessage) int64 {
switch o { switch o {
case SORT_ORDER_NEWEST, SORT_ORDER_OLDEST: case SORT_ORDER_NEWEST, SORT_ORDER_OLDEST:
return m.SentAt.UnixMilli() return m.SentAt.UnixMilli()
@ -131,12 +131,12 @@ const (
) )
type CursorResult struct { type CursorResult struct {
scraper.Tweet Tweet
scraper.Retweet Retweet
Chrono int `db:"chrono"` Chrono int `db:"chrono"`
LikeSortOrder int `db:"likes_sort_order"` LikeSortOrder int `db:"likes_sort_order"`
BookmarkSortOrder int `db:"bookmarks_sort_order"` BookmarkSortOrder int `db:"bookmarks_sort_order"`
ByUserID scraper.UserID `db:"by_user_id"` ByUserID UserID `db:"by_user_id"`
} }
type Cursor struct { type Cursor struct {
@ -147,16 +147,16 @@ type Cursor struct {
// Search params // Search params
Keywords []string Keywords []string
FromUserHandle scraper.UserHandle // Tweeted by this user FromUserHandle UserHandle // Tweeted by this user
RetweetedByUserHandle scraper.UserHandle // Retweeted by this user RetweetedByUserHandle UserHandle // Retweeted by this user
ByUserHandle scraper.UserHandle // Either tweeted or retweeted by this user ByUserHandle UserHandle // Either tweeted or retweeted by this user
ToUserHandles []scraper.UserHandle // In reply to these users ToUserHandles []UserHandle // In reply to these users
LikedByUserHandle scraper.UserHandle // Liked by this user LikedByUserHandle UserHandle // Liked by this user
BookmarkedByUserHandle scraper.UserHandle // Bookmarked by this user BookmarkedByUserHandle UserHandle // Bookmarked by this user
ListID scraper.ListID // Either tweeted or retweeted by users from this List ListID ListID // Either tweeted or retweeted by users from this List
FollowedByUserHandle scraper.UserHandle // Either tweeted or retweeted by users followed by this user FollowedByUserHandle UserHandle // Either tweeted or retweeted by users followed by this user
SinceTimestamp scraper.Timestamp SinceTimestamp Timestamp
UntilTimestamp scraper.Timestamp UntilTimestamp Timestamp
TombstoneType string TombstoneType string
FilterLinks Filter FilterLinks Filter
FilterImages Filter FilterImages Filter
@ -173,9 +173,9 @@ type Cursor struct {
func NewCursor() Cursor { func NewCursor() Cursor {
return Cursor{ return Cursor{
Keywords: []string{}, Keywords: []string{},
ToUserHandles: []scraper.UserHandle{}, ToUserHandles: []UserHandle{},
SinceTimestamp: scraper.TimestampFromUnix(0), SinceTimestamp: TimestampFromUnix(0),
UntilTimestamp: scraper.TimestampFromUnix(0), UntilTimestamp: TimestampFromUnix(0),
CursorPosition: CURSOR_START, CursorPosition: CURSOR_START,
CursorValue: 0, CursorValue: 0,
SortOrder: SORT_ORDER_NEWEST, SortOrder: SORT_ORDER_NEWEST,
@ -189,9 +189,9 @@ func NewCursor() Cursor {
func NewTimelineCursor() Cursor { func NewTimelineCursor() Cursor {
return Cursor{ return Cursor{
Keywords: []string{}, Keywords: []string{},
ToUserHandles: []scraper.UserHandle{}, ToUserHandles: []UserHandle{},
SinceTimestamp: scraper.TimestampFromUnix(0), SinceTimestamp: TimestampFromUnix(0),
UntilTimestamp: scraper.TimestampFromUnix(0), UntilTimestamp: TimestampFromUnix(0),
CursorPosition: CURSOR_START, CursorPosition: CURSOR_START,
CursorValue: 0, CursorValue: 0,
SortOrder: SORT_ORDER_NEWEST, SortOrder: SORT_ORDER_NEWEST,
@ -202,13 +202,13 @@ func NewTimelineCursor() Cursor {
} }
// Generate a cursor appropriate for showing a List feed // Generate a cursor appropriate for showing a List feed
func NewListCursor(list_id scraper.ListID) Cursor { func NewListCursor(list_id ListID) Cursor {
return Cursor{ return Cursor{
Keywords: []string{}, Keywords: []string{},
ToUserHandles: []scraper.UserHandle{}, ToUserHandles: []UserHandle{},
ListID: list_id, ListID: list_id,
SinceTimestamp: scraper.TimestampFromUnix(0), SinceTimestamp: TimestampFromUnix(0),
UntilTimestamp: scraper.TimestampFromUnix(0), UntilTimestamp: TimestampFromUnix(0),
CursorPosition: CURSOR_START, CursorPosition: CURSOR_START,
CursorValue: 0, CursorValue: 0,
SortOrder: SORT_ORDER_NEWEST, SortOrder: SORT_ORDER_NEWEST,
@ -217,12 +217,12 @@ func NewListCursor(list_id scraper.ListID) Cursor {
} }
// Generate a cursor appropriate for fetching a User Feed // Generate a cursor appropriate for fetching a User Feed
func NewUserFeedCursor(h scraper.UserHandle) Cursor { func NewUserFeedCursor(h UserHandle) Cursor {
return Cursor{ return Cursor{
Keywords: []string{}, Keywords: []string{},
ToUserHandles: []scraper.UserHandle{}, ToUserHandles: []UserHandle{},
SinceTimestamp: scraper.TimestampFromUnix(0), SinceTimestamp: TimestampFromUnix(0),
UntilTimestamp: scraper.TimestampFromUnix(0), UntilTimestamp: TimestampFromUnix(0),
CursorPosition: CURSOR_START, CursorPosition: CURSOR_START,
CursorValue: 0, CursorValue: 0,
SortOrder: SORT_ORDER_NEWEST, SortOrder: SORT_ORDER_NEWEST,
@ -233,12 +233,12 @@ func NewUserFeedCursor(h scraper.UserHandle) Cursor {
} }
// Generate a cursor appropriate for a user's Media tab // Generate a cursor appropriate for a user's Media tab
func NewUserFeedMediaCursor(h scraper.UserHandle) Cursor { func NewUserFeedMediaCursor(h UserHandle) Cursor {
return Cursor{ return Cursor{
Keywords: []string{}, Keywords: []string{},
ToUserHandles: []scraper.UserHandle{}, ToUserHandles: []UserHandle{},
SinceTimestamp: scraper.TimestampFromUnix(0), SinceTimestamp: TimestampFromUnix(0),
UntilTimestamp: scraper.TimestampFromUnix(0), UntilTimestamp: TimestampFromUnix(0),
CursorPosition: CURSOR_START, CursorPosition: CURSOR_START,
CursorValue: 0, CursorValue: 0,
SortOrder: SORT_ORDER_NEWEST, SortOrder: SORT_ORDER_NEWEST,
@ -250,12 +250,12 @@ func NewUserFeedMediaCursor(h scraper.UserHandle) Cursor {
} }
// Generate a cursor for a User's Likes // Generate a cursor for a User's Likes
func NewUserFeedLikesCursor(h scraper.UserHandle) Cursor { func NewUserFeedLikesCursor(h UserHandle) Cursor {
return Cursor{ return Cursor{
Keywords: []string{}, Keywords: []string{},
ToUserHandles: []scraper.UserHandle{}, ToUserHandles: []UserHandle{},
SinceTimestamp: scraper.TimestampFromUnix(0), SinceTimestamp: TimestampFromUnix(0),
UntilTimestamp: scraper.TimestampFromUnix(0), UntilTimestamp: TimestampFromUnix(0),
CursorPosition: CURSOR_START, CursorPosition: CURSOR_START,
CursorValue: 0, CursorValue: 0,
SortOrder: SORT_ORDER_LIKED_AT, SortOrder: SORT_ORDER_LIKED_AT,
@ -266,12 +266,12 @@ func NewUserFeedLikesCursor(h scraper.UserHandle) Cursor {
} }
// Generate a cursor for a User's Bookmarks // Generate a cursor for a User's Bookmarks
func NewUserFeedBookmarksCursor(h scraper.UserHandle) Cursor { func NewUserFeedBookmarksCursor(h UserHandle) Cursor {
return Cursor{ return Cursor{
Keywords: []string{}, Keywords: []string{},
ToUserHandles: []scraper.UserHandle{}, ToUserHandles: []UserHandle{},
SinceTimestamp: scraper.TimestampFromUnix(0), SinceTimestamp: TimestampFromUnix(0),
UntilTimestamp: scraper.TimestampFromUnix(0), UntilTimestamp: TimestampFromUnix(0),
CursorPosition: CURSOR_START, CursorPosition: CURSOR_START,
CursorValue: 0, CursorValue: 0,
SortOrder: SORT_ORDER_BOOKMARKED_AT, SortOrder: SORT_ORDER_BOOKMARKED_AT,
@ -343,24 +343,24 @@ func (c *Cursor) apply_token(token string) error {
var err error var err error
switch parts[0] { switch parts[0] {
case "from": case "from":
c.FromUserHandle = scraper.UserHandle(parts[1]) c.FromUserHandle = UserHandle(parts[1])
case "to": case "to":
c.ToUserHandles = append(c.ToUserHandles, scraper.UserHandle(parts[1])) c.ToUserHandles = append(c.ToUserHandles, UserHandle(parts[1]))
case "retweeted_by": case "retweeted_by":
c.RetweetedByUserHandle = scraper.UserHandle(parts[1]) c.RetweetedByUserHandle = UserHandle(parts[1])
c.FilterRetweets = NONE // Clear the "exclude retweets" filter set by default in NewCursor c.FilterRetweets = NONE // Clear the "exclude retweets" filter set by default in NewCursor
case "liked_by": case "liked_by":
c.LikedByUserHandle = scraper.UserHandle(parts[1]) c.LikedByUserHandle = UserHandle(parts[1])
case "bookmarked_by": case "bookmarked_by":
c.BookmarkedByUserHandle = scraper.UserHandle(parts[1]) c.BookmarkedByUserHandle = UserHandle(parts[1])
case "followed_by": case "followed_by":
c.FollowedByUserHandle = scraper.UserHandle(parts[1]) c.FollowedByUserHandle = UserHandle(parts[1])
case "list": case "list":
i, err := strconv.Atoi(parts[1]) i, err := strconv.Atoi(parts[1])
if err != nil { if err != nil {
return fmt.Errorf("%w: filter 'list:' must be a number (list ID), got %q", ErrInvalidQuery, parts[1]) return fmt.Errorf("%w: filter 'list:' must be a number (list ID), got %q", ErrInvalidQuery, parts[1])
} }
c.ListID = scraper.ListID(i) c.ListID = ListID(i)
case "since": case "since":
c.SinceTimestamp.Time, err = time.Parse("2006-01-02", parts[1]) c.SinceTimestamp.Time, err = time.Parse("2006-01-02", parts[1])
case "until": case "until":
@ -413,7 +413,7 @@ func (c *Cursor) apply_token(token string) error {
return nil return nil
} }
func (p Profile) NextPage(c Cursor, current_user_id scraper.UserID) (Feed, error) { func (p Profile) NextPage(c Cursor, current_user_id UserID) (Feed, error) {
where_clauses := []string{} where_clauses := []string{}
bind_values := []interface{}{} bind_values := []interface{}{}

View File

@ -3,10 +3,10 @@ package persistence
import ( import (
"fmt" "fmt"
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper" . "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
) )
func (p Profile) SaveLike(l scraper.Like) error { func (p Profile) SaveLike(l Like) error {
_, err := p.DB.NamedExec(` _, err := p.DB.NamedExec(`
insert into likes (sort_order, user_id, tweet_id) insert into likes (sort_order, user_id, tweet_id)
values (:sort_order, :user_id, :tweet_id) values (:sort_order, :user_id, :tweet_id)
@ -20,7 +20,7 @@ func (p Profile) SaveLike(l scraper.Like) error {
return nil return nil
} }
func (p Profile) DeleteLike(l scraper.Like) error { func (p Profile) DeleteLike(l Like) error {
_, err := p.DB.NamedExec(`delete from likes where user_id = :user_id and tweet_id = :tweet_id`, l) _, err := p.DB.NamedExec(`delete from likes where user_id = :user_id and tweet_id = :tweet_id`, l)
if err != nil { if err != nil {
return fmt.Errorf("Error executing DeleteLike(%#v):\n %w", l, err) return fmt.Errorf("Error executing DeleteLike(%#v):\n %w", l, err)
@ -28,8 +28,8 @@ func (p Profile) DeleteLike(l scraper.Like) error {
return nil return nil
} }
func (p Profile) GetLikeBySortID(id scraper.LikeSortID) (scraper.Like, error) { func (p Profile) GetLikeBySortID(id LikeSortID) (Like, error) {
var l scraper.Like var l Like
err := p.DB.Get(&l, ` err := p.DB.Get(&l, `
select sort_order, user_id, tweet_id select sort_order, user_id, tweet_id
from likes from likes

View File

@ -6,7 +6,7 @@ import (
"os" "os"
"path" "path"
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper" . "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
) )
type MediaDownloader interface { type MediaDownloader interface {
@ -14,7 +14,7 @@ type MediaDownloader interface {
} }
type DefaultDownloader struct { type DefaultDownloader struct {
*scraper.API *API
} }
// Download a file over HTTP and save it. // Download a file over HTTP and save it.
@ -47,7 +47,7 @@ func (d DefaultDownloader) Curl(url string, outpath string) error {
// Downloads an Image, and if successful, marks it as downloaded in the DB // Downloads an Image, and if successful, marks it as downloaded in the DB
// DUPE: download-image // DUPE: download-image
func (p Profile) download_tweet_image(img *scraper.Image, downloader MediaDownloader) error { func (p Profile) download_tweet_image(img *Image, downloader MediaDownloader) error {
outfile := path.Join(p.ProfileDir, "images", img.LocalFilename) outfile := path.Join(p.ProfileDir, "images", img.LocalFilename)
err := downloader.Curl(img.RemoteURL, outfile) err := downloader.Curl(img.RemoteURL, outfile)
if err != nil { if err != nil {
@ -59,12 +59,12 @@ func (p Profile) download_tweet_image(img *scraper.Image, downloader MediaDownlo
// Downloads a Video and its thumbnail, and if successful, marks it as downloaded in the DB // Downloads a Video and its thumbnail, and if successful, marks it as downloaded in the DB
// DUPE: download-video // DUPE: download-video
func (p Profile) download_tweet_video(v *scraper.Video, downloader MediaDownloader) error { func (p Profile) download_tweet_video(v *Video, downloader MediaDownloader) error {
// Download the video // Download the video
outfile := path.Join(p.ProfileDir, "videos", v.LocalFilename) outfile := path.Join(p.ProfileDir, "videos", v.LocalFilename)
err := downloader.Curl(v.RemoteURL, outfile) err := downloader.Curl(v.RemoteURL, outfile)
if errors.Is(err, scraper.ErrorDMCA) { if errors.Is(err, ErrorDMCA) {
v.IsDownloaded = false v.IsDownloaded = false
v.IsBlockedByDMCA = true v.IsBlockedByDMCA = true
} else if err != nil { } else if err != nil {
@ -86,7 +86,7 @@ func (p Profile) download_tweet_video(v *scraper.Video, downloader MediaDownload
// Downloads an URL thumbnail image, and if successful, marks it as downloaded in the DB // Downloads an URL thumbnail image, and if successful, marks it as downloaded in the DB
// DUPE: download-link-thumbnail // DUPE: download-link-thumbnail
func (p Profile) download_link_thumbnail(url *scraper.Url, downloader MediaDownloader) error { func (p Profile) download_link_thumbnail(url *Url, downloader MediaDownloader) error {
if url.HasCard && url.HasThumbnail { if url.HasCard && url.HasThumbnail {
outfile := path.Join(p.ProfileDir, "link_preview_images", url.ThumbnailLocalPath) outfile := path.Join(p.ProfileDir, "link_preview_images", url.ThumbnailLocalPath)
err := downloader.Curl(url.ThumbnailRemoteUrl, outfile) err := downloader.Curl(url.ThumbnailRemoteUrl, outfile)
@ -100,12 +100,12 @@ func (p Profile) download_link_thumbnail(url *scraper.Url, downloader MediaDownl
// Download a tweet's video and picture content. // Download a tweet's video and picture content.
// Wraps the `DownloadTweetContentWithInjector` method with the default (i.e., real) downloader. // Wraps the `DownloadTweetContentWithInjector` method with the default (i.e., real) downloader.
func (p Profile) DownloadTweetContentFor(t *scraper.Tweet, api *scraper.API) error { func (p Profile) DownloadTweetContentFor(t *Tweet, api *API) error {
return p.DownloadTweetContentWithInjector(t, DefaultDownloader{API: api}) return p.DownloadTweetContentWithInjector(t, DefaultDownloader{API: api})
} }
// Enable injecting a custom MediaDownloader (i.e., for testing) // Enable injecting a custom MediaDownloader (i.e., for testing)
func (p Profile) DownloadTweetContentWithInjector(t *scraper.Tweet, downloader MediaDownloader) error { func (p Profile) DownloadTweetContentWithInjector(t *Tweet, downloader MediaDownloader) error {
// Check if content needs to be downloaded; if not, just return // Check if content needs to be downloaded; if not, just return
if !p.CheckTweetContentDownloadNeeded(*t) { if !p.CheckTweetContentDownloadNeeded(*t) {
return nil return nil
@ -141,12 +141,12 @@ func (p Profile) DownloadTweetContentWithInjector(t *scraper.Tweet, downloader M
} }
// Download a user's banner and profile images // Download a user's banner and profile images
func (p Profile) DownloadUserContentFor(u *scraper.User, api *scraper.API) error { func (p Profile) DownloadUserContentFor(u *User, api *API) error {
return p.DownloadUserContentWithInjector(u, DefaultDownloader{API: api}) return p.DownloadUserContentWithInjector(u, DefaultDownloader{API: api})
} }
// Enable injecting a custom MediaDownloader (i.e., for testing) // Enable injecting a custom MediaDownloader (i.e., for testing)
func (p Profile) DownloadUserContentWithInjector(u *scraper.User, downloader MediaDownloader) error { func (p Profile) DownloadUserContentWithInjector(u *User, downloader MediaDownloader) error {
if !p.CheckUserContentDownloadNeeded(*u) { if !p.CheckUserContentDownloadNeeded(*u) {
return nil return nil
} }
@ -155,7 +155,7 @@ func (p Profile) DownloadUserContentWithInjector(u *scraper.User, downloader Med
var target_url string var target_url string
if u.ProfileImageUrl == "" { if u.ProfileImageUrl == "" {
target_url = scraper.DEFAULT_PROFILE_IMAGE_URL target_url = DEFAULT_PROFILE_IMAGE_URL
} else { } else {
target_url = u.ProfileImageUrl target_url = u.ProfileImageUrl
} }
@ -170,7 +170,7 @@ func (p Profile) DownloadUserContentWithInjector(u *scraper.User, downloader Med
outfile = p.get_banner_image_output_path(*u) outfile = p.get_banner_image_output_path(*u)
err = downloader.Curl(u.BannerImageUrl, outfile) err = downloader.Curl(u.BannerImageUrl, outfile)
if errors.Is(err, scraper.ErrMediaDownload404) { if errors.Is(err, ErrMediaDownload404) {
// Try adding "600x200". Not sure why this does this but sometimes it does. // Try adding "600x200". Not sure why this does this but sometimes it does.
err = downloader.Curl(u.BannerImageUrl+"/600x200", outfile) err = downloader.Curl(u.BannerImageUrl+"/600x200", outfile)
} }
@ -186,7 +186,7 @@ func (p Profile) DownloadUserContentWithInjector(u *scraper.User, downloader Med
// Download a User's tiny profile image, if it hasn't been downloaded yet. // Download a User's tiny profile image, if it hasn't been downloaded yet.
// If it has been downloaded, do nothing. // If it has been downloaded, do nothing.
// If this user should have a big profile picture, defer to the regular `DownloadUserContentFor` method. // If this user should have a big profile picture, defer to the regular `DownloadUserContentFor` method.
func (p Profile) DownloadUserProfileImageTiny(u *scraper.User, api *scraper.API) error { func (p Profile) DownloadUserProfileImageTiny(u *User, api *API) error {
if p.IsFollowing(*u) { if p.IsFollowing(*u) {
return p.DownloadUserContentFor(u, api) return p.DownloadUserContentFor(u, api)
} }

View File

@ -6,7 +6,7 @@ import (
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper" . "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
) )
// Some types to spy on a MediaDownloader // Some types to spy on a MediaDownloader
@ -38,7 +38,7 @@ func (d FakeDownloader) Contains(result SpyResult) bool {
return false return false
} }
func test_all_downloaded(tweet scraper.Tweet, yes_or_no bool, t *testing.T) { func test_all_downloaded(tweet Tweet, yes_or_no bool, t *testing.T) {
error_msg := map[bool]string{ error_msg := map[bool]string{
true: "Expected to be downloaded, but it wasn't", true: "Expected to be downloaded, but it wasn't",
false: "Expected not to be downloaded, but it was", false: "Expected not to be downloaded, but it was",
@ -147,7 +147,7 @@ func TestDownloadDefaultUserContent(t *testing.T) {
// Check that the downloader was called with the appropriate stuff // Check that the downloader was called with the appropriate stuff
assert.Len(*fake_downloader.Spy, 1) assert.Len(*fake_downloader.Spy, 1)
assert.True(fake_downloader.Contains(SpyResult{ assert.True(fake_downloader.Contains(SpyResult{
scraper.DEFAULT_PROFILE_IMAGE_URL, DEFAULT_PROFILE_IMAGE_URL,
"test_profiles/TestMediaQueries/profile_images/default_profile.png", "test_profiles/TestMediaQueries/profile_images/default_profile.png",
})) }))
} }

View File

@ -3,14 +3,14 @@ package persistence
import ( import (
"fmt" "fmt"
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper" . "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
) )
// Save an Image // Save an Image
// //
// args: // args:
// - img: the Image to save // - img: the Image to save
func (p Profile) SaveImage(img scraper.Image) error { func (p Profile) SaveImage(img Image) error {
_, err := p.DB.NamedExec(` _, err := p.DB.NamedExec(`
insert into images (id, tweet_id, width, height, remote_url, local_filename, is_downloaded) insert into images (id, tweet_id, width, height, remote_url, local_filename, is_downloaded)
values (:id, :tweet_id, :width, :height, :remote_url, :local_filename, :is_downloaded) values (:id, :tweet_id, :width, :height, :remote_url, :local_filename, :is_downloaded)
@ -29,7 +29,7 @@ func (p Profile) SaveImage(img scraper.Image) error {
// //
// args: // args:
// - img: the Video to save // - img: the Video to save
func (p Profile) SaveVideo(vid scraper.Video) error { func (p Profile) SaveVideo(vid Video) error {
_, err := p.DB.NamedExec(` _, err := p.DB.NamedExec(`
insert into videos (id, tweet_id, width, height, remote_url, local_filename, thumbnail_remote_url, thumbnail_local_filename, insert into videos (id, tweet_id, width, height, remote_url, local_filename, thumbnail_remote_url, thumbnail_local_filename,
duration, view_count, is_downloaded, is_blocked_by_dmca, is_gif) duration, view_count, is_downloaded, is_blocked_by_dmca, is_gif)
@ -49,7 +49,7 @@ func (p Profile) SaveVideo(vid scraper.Video) error {
} }
// Save an Url // Save an Url
func (p Profile) SaveUrl(url scraper.Url) error { func (p Profile) SaveUrl(url Url) error {
_, err := p.DB.NamedExec(` _, err := p.DB.NamedExec(`
insert into urls (tweet_id, domain, text, short_text, title, description, creator_id, site_id, thumbnail_width, thumbnail_height, insert into urls (tweet_id, domain, text, short_text, title, description, creator_id, site_id, thumbnail_width, thumbnail_height,
thumbnail_remote_url, thumbnail_local_path, has_card, has_thumbnail, is_content_downloaded) thumbnail_remote_url, thumbnail_local_path, has_card, has_thumbnail, is_content_downloaded)
@ -68,7 +68,7 @@ func (p Profile) SaveUrl(url scraper.Url) error {
} }
// Save a Poll // Save a Poll
func (p Profile) SavePoll(poll scraper.Poll) error { func (p Profile) SavePoll(poll Poll) error {
_, err := p.DB.NamedExec(` _, err := p.DB.NamedExec(`
insert into polls (id, tweet_id, num_choices, choice1, choice1_votes, choice2, choice2_votes, choice3, choice3_votes, choice4, insert into polls (id, tweet_id, num_choices, choice1, choice1_votes, choice2, choice2_votes, choice3, choice3_votes, choice4,
choice4_votes, voting_duration, voting_ends_at, last_scraped_at) choice4_votes, voting_duration, voting_ends_at, last_scraped_at)
@ -90,7 +90,7 @@ func (p Profile) SavePoll(poll scraper.Poll) error {
} }
// Get the list of images for a tweet // Get the list of images for a tweet
func (p Profile) GetImagesForTweet(t scraper.Tweet) (imgs []scraper.Image, err error) { func (p Profile) GetImagesForTweet(t Tweet) (imgs []Image, err error) {
err = p.DB.Select(&imgs, err = p.DB.Select(&imgs,
"select id, tweet_id, width, height, remote_url, local_filename, is_downloaded from images where tweet_id=?", "select id, tweet_id, width, height, remote_url, local_filename, is_downloaded from images where tweet_id=?",
t.ID) t.ID)
@ -98,7 +98,7 @@ func (p Profile) GetImagesForTweet(t scraper.Tweet) (imgs []scraper.Image, err e
} }
// Get the list of videos for a tweet // Get the list of videos for a tweet
func (p Profile) GetVideosForTweet(t scraper.Tweet) (vids []scraper.Video, err error) { func (p Profile) GetVideosForTweet(t Tweet) (vids []Video, err error) {
err = p.DB.Select(&vids, ` err = p.DB.Select(&vids, `
select id, tweet_id, width, height, remote_url, local_filename, thumbnail_remote_url, thumbnail_local_filename, duration, select id, tweet_id, width, height, remote_url, local_filename, thumbnail_remote_url, thumbnail_local_filename, duration,
view_count, is_downloaded, is_blocked_by_dmca, is_gif view_count, is_downloaded, is_blocked_by_dmca, is_gif
@ -109,7 +109,7 @@ func (p Profile) GetVideosForTweet(t scraper.Tweet) (vids []scraper.Video, err e
} }
// Get the list of Urls for a Tweet // Get the list of Urls for a Tweet
func (p Profile) GetUrlsForTweet(t scraper.Tweet) (urls []scraper.Url, err error) { func (p Profile) GetUrlsForTweet(t Tweet) (urls []Url, err error) {
err = p.DB.Select(&urls, ` err = p.DB.Select(&urls, `
select tweet_id, domain, text, short_text, title, description, creator_id, site_id, thumbnail_width, thumbnail_height, select tweet_id, domain, text, short_text, title, description, creator_id, site_id, thumbnail_width, thumbnail_height,
thumbnail_remote_url, thumbnail_local_path, has_card, has_thumbnail, is_content_downloaded thumbnail_remote_url, thumbnail_local_path, has_card, has_thumbnail, is_content_downloaded
@ -121,7 +121,7 @@ func (p Profile) GetUrlsForTweet(t scraper.Tweet) (urls []scraper.Url, err error
} }
// Get the list of Polls for a Tweet // Get the list of Polls for a Tweet
func (p Profile) GetPollsForTweet(t scraper.Tweet) (polls []scraper.Poll, err error) { func (p Profile) GetPollsForTweet(t Tweet) (polls []Poll, err error) {
err = p.DB.Select(&polls, ` err = p.DB.Select(&polls, `
select id, tweet_id, num_choices, choice1, choice1_votes, choice2, choice2_votes, choice3, choice3_votes, choice4, choice4_votes, select id, tweet_id, num_choices, choice1, choice1_votes, choice2, choice2_votes, choice3, choice3_votes, choice4, choice4_votes,
voting_duration, voting_ends_at, last_scraped_at voting_duration, voting_ends_at, last_scraped_at

View File

@ -8,7 +8,7 @@ import (
"github.com/go-test/deep" "github.com/go-test/deep"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper" . "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
) )
// Create an Image, save it, reload it, and make sure it comes back the same // Create an Image, save it, reload it, and make sure it comes back the same
@ -31,7 +31,7 @@ func TestSaveAndLoadImage(t *testing.T) {
imgs, err := profile.GetImagesForTweet(tweet) imgs, err := profile.GetImagesForTweet(tweet)
require.NoError(err) require.NoError(err)
var new_img scraper.Image var new_img Image
for index := range imgs { for index := range imgs {
if imgs[index].ID == img.ID { if imgs[index].ID == img.ID {
new_img = imgs[index] new_img = imgs[index]
@ -52,7 +52,7 @@ func TestModifyImage(t *testing.T) {
tweet := create_stable_tweet() tweet := create_stable_tweet()
img := tweet.Images[0] img := tweet.Images[0]
require.Equal(scraper.ImageID(-1), img.ID, "Got the wrong image back") require.Equal(ImageID(-1), img.ID, "Got the wrong image back")
img.IsDownloaded = true img.IsDownloaded = true
@ -94,7 +94,7 @@ func TestSaveAndLoadVideo(t *testing.T) {
vids, err := profile.GetVideosForTweet(tweet) vids, err := profile.GetVideosForTweet(tweet)
require.NoError(err) require.NoError(err)
var new_vid scraper.Video var new_vid Video
for index := range vids { for index := range vids {
if vids[index].ID == vid.ID { if vids[index].ID == vid.ID {
new_vid = vids[index] new_vid = vids[index]
@ -115,7 +115,7 @@ func TestModifyVideo(t *testing.T) {
tweet := create_stable_tweet() tweet := create_stable_tweet()
vid := tweet.Videos[0] vid := tweet.Videos[0]
require.Equal(scraper.VideoID(-1), vid.ID, "Got the wrong video back") require.Equal(VideoID(-1), vid.ID, "Got the wrong video back")
vid.IsDownloaded = true vid.IsDownloaded = true
vid.IsBlockedByDMCA = true vid.IsBlockedByDMCA = true
@ -157,7 +157,7 @@ func TestSaveAndLoadUrl(t *testing.T) {
urls, err := profile.GetUrlsForTweet(tweet) urls, err := profile.GetUrlsForTweet(tweet)
require.NoError(err) require.NoError(err)
var new_url scraper.Url var new_url Url
for index := range urls { for index := range urls {
if urls[index].Text == url.Text { if urls[index].Text == url.Text {
new_url = urls[index] new_url = urls[index]
@ -218,7 +218,7 @@ func TestSaveAndLoadPoll(t *testing.T) {
polls, err := profile.GetPollsForTweet(tweet) polls, err := profile.GetPollsForTweet(tweet)
require.NoError(err) require.NoError(err)
var new_poll scraper.Poll var new_poll Poll
for index := range polls { for index := range polls {
if polls[index].ID == poll.ID { if polls[index].ID == poll.ID {
new_poll = polls[index] new_poll = polls[index]

View File

@ -4,6 +4,7 @@ import (
"database/sql" "database/sql"
"errors" "errors"
"fmt" "fmt"
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper" . "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
) )

View File

@ -9,7 +9,7 @@ import (
sql "github.com/jmoiron/sqlx" sql "github.com/jmoiron/sqlx"
_ "github.com/mattn/go-sqlite3" _ "github.com/mattn/go-sqlite3"
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper" . "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
) )
//go:embed schema.sql //go:embed schema.sql
@ -125,14 +125,14 @@ func LoadProfile(profile_dir string) (Profile, error) {
return ret, err return ret, err
} }
func (p Profile) ListSessions() []scraper.UserHandle { func (p Profile) ListSessions() []UserHandle {
result, err := filepath.Glob(filepath.Join(p.ProfileDir, "*.session")) result, err := filepath.Glob(filepath.Join(p.ProfileDir, "*.session"))
if err != nil { if err != nil {
panic(err) panic(err)
} }
ret := []scraper.UserHandle{} ret := []UserHandle{}
for _, filename := range result { for _, filename := range result {
ret = append(ret, scraper.UserHandle(filepath.Base(filename[:len(filename)-len(".session")]))) ret = append(ret, UserHandle(filepath.Base(filename[:len(filename)-len(".session")])))
} }
return ret return ret
} }

View File

@ -3,11 +3,11 @@ package persistence
import ( import (
"fmt" "fmt"
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper" . "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
) )
// Save a Retweet. Do nothing if it already exists, because none of its parameters are modifiable. // Save a Retweet. Do nothing if it already exists, because none of its parameters are modifiable.
func (p Profile) SaveRetweet(r scraper.Retweet) error { func (p Profile) SaveRetweet(r Retweet) error {
_, err := p.DB.NamedExec(` _, err := p.DB.NamedExec(`
insert into retweets (retweet_id, tweet_id, retweeted_by, retweeted_at) insert into retweets (retweet_id, tweet_id, retweeted_by, retweeted_at)
values (:retweet_id, :tweet_id, :retweeted_by, :retweeted_at) values (:retweet_id, :tweet_id, :retweeted_by, :retweeted_at)
@ -22,8 +22,8 @@ func (p Profile) SaveRetweet(r scraper.Retweet) error {
} }
// Retrieve a Retweet by ID // Retrieve a Retweet by ID
func (p Profile) GetRetweetById(id scraper.TweetID) (scraper.Retweet, error) { func (p Profile) GetRetweetById(id TweetID) (Retweet, error) {
var r scraper.Retweet var r Retweet
err := p.DB.Get(&r, ` err := p.DB.Get(&r, `
select retweet_id, tweet_id, retweeted_by, retweeted_at select retweet_id, tweet_id, retweeted_by, retweeted_at
from retweets from retweets

View File

@ -2,13 +2,15 @@ package persistence
import ( import (
"encoding/json" "encoding/json"
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
"os" "os"
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
) )
func (p Profile) SaveSession(api scraper.API) { func (p Profile) SaveSession(api API) {
data, err := json.Marshal(api) data, err := json.Marshal(api)
if err != nil { if err != nil {
panic(err) panic(err)
@ -21,13 +23,13 @@ func (p Profile) SaveSession(api scraper.API) {
} }
} }
func (p Profile) LoadSession(userhandle scraper.UserHandle) scraper.API { func (p Profile) LoadSession(userhandle UserHandle) API {
data, err := os.ReadFile(p.ProfileDir + "/" + string(userhandle+".session")) data, err := os.ReadFile(p.ProfileDir + "/" + string(userhandle+".session"))
if err != nil { if err != nil {
panic(err) panic(err)
} }
var result scraper.API var result API
err = json.Unmarshal(data, &result) err = json.Unmarshal(data, &result)
if err != nil { if err != nil {
panic(err) panic(err)

View File

@ -2,7 +2,7 @@ package persistence_test
import ( import (
"fmt" "fmt"
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
"math/rand" "math/rand"
"net/http" "net/http"
"net/http/cookiejar" "net/http/cookiejar"
@ -10,6 +10,8 @@ import (
"time" "time"
"github.com/go-test/deep" "github.com/go-test/deep"
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
) )
// Save and load an API session; it should come back the same // Save and load an API session; it should come back the same
@ -22,7 +24,7 @@ func TestSaveAndLoadAuthenticatedSession(t *testing.T) {
panic(err) panic(err)
} }
api := scraper.API{ api := API{
UserHandle: "testUser", UserHandle: "testUser",
IsAuthenticated: true, IsAuthenticated: true,
Client: http.Client{ Client: http.Client{

View File

@ -4,16 +4,17 @@ import (
"database/sql" "database/sql"
"errors" "errors"
"fmt" "fmt"
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
) )
type SpaceParticipant struct { type SpaceParticipant struct {
UserID scraper.UserID `db:"user_id"` UserID UserID `db:"user_id"`
SpaceID scraper.SpaceID `db:"space_id"` SpaceID SpaceID `db:"space_id"`
} }
// Save a Space // Save a Space
func (p Profile) SaveSpace(s scraper.Space) error { func (p Profile) SaveSpace(s Space) error {
_, err := p.DB.NamedExec(` _, err := p.DB.NamedExec(`
insert into spaces (id, created_by_id, short_url, state, title, created_at, started_at, ended_at, updated_at, insert into spaces (id, created_by_id, short_url, state, title, created_at, started_at, ended_at, updated_at,
is_available_for_replay, replay_watch_count, live_listeners_count, is_details_fetched) is_available_for_replay, replay_watch_count, live_listeners_count, is_details_fetched)
@ -52,7 +53,7 @@ func (p Profile) SaveSpace(s scraper.Space) error {
} }
// Get a Space by ID // Get a Space by ID
func (p Profile) GetSpaceById(id scraper.SpaceID) (space scraper.Space, err error) { func (p Profile) GetSpaceById(id SpaceID) (space Space, err error) {
err = p.DB.Get(&space, err = p.DB.Get(&space,
`select id, ifnull(created_by_id, 0) created_by_id, short_url, state, title, created_at, started_at, ended_at, updated_at, `select id, ifnull(created_by_id, 0) created_by_id, short_url, state, title, created_at, started_at, ended_at, updated_at,
is_available_for_replay, replay_watch_count, live_listeners_count, is_details_fetched is_available_for_replay, replay_watch_count, live_listeners_count, is_details_fetched
@ -61,7 +62,7 @@ func (p Profile) GetSpaceById(id scraper.SpaceID) (space scraper.Space, err erro
if err != nil { if err != nil {
return return
} }
space.ParticipantIds = []scraper.UserID{} space.ParticipantIds = []UserID{}
rows, err := p.DB.Query(`select user_id from space_participants where space_id = ?`, id) rows, err := p.DB.Query(`select user_id from space_participants where space_id = ?`, id)
if errors.Is(err, sql.ErrNoRows) { if errors.Is(err, sql.ErrNoRows) {
err = nil err = nil
@ -70,7 +71,7 @@ func (p Profile) GetSpaceById(id scraper.SpaceID) (space scraper.Space, err erro
if err != nil { if err != nil {
panic(err) panic(err)
} }
var participant_id scraper.UserID var participant_id UserID
for rows.Next() { for rows.Next() {
err = rows.Scan(&participant_id) err = rows.Scan(&participant_id)
if err != nil { if err != nil {

View File

@ -5,10 +5,11 @@ import (
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
"github.com/go-test/deep" "github.com/go-test/deep"
"math/rand" "math/rand"
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
) )
// Create a Space, save it, reload it, and make sure it comes back the same // Create a Space, save it, reload it, and make sure it comes back the same
@ -41,8 +42,8 @@ func TestModifySpace(t *testing.T) {
// Modify and save // Modify and save
space.State = "Some other state" space.State = "Some other state"
space.UpdatedAt = scraper.TimestampFromUnix(9001) space.UpdatedAt = TimestampFromUnix(9001)
space.EndedAt = scraper.TimestampFromUnix(10001) space.EndedAt = TimestampFromUnix(10001)
space.ReplayWatchCount = 100 space.ReplayWatchCount = 100
space.LiveListenersCount = 50 space.LiveListenersCount = 50
space.IsDetailsFetched = true space.IsDetailsFetched = true
@ -51,8 +52,8 @@ func TestModifySpace(t *testing.T) {
new_space, err := profile.GetSpaceById(space.ID) new_space, err := profile.GetSpaceById(space.ID)
require.NoError(err) require.NoError(err)
assert.Equal(scraper.TimestampFromUnix(9001), new_space.UpdatedAt) assert.Equal(TimestampFromUnix(9001), new_space.UpdatedAt)
assert.Equal(scraper.TimestampFromUnix(10001), new_space.EndedAt) assert.Equal(TimestampFromUnix(10001), new_space.EndedAt)
assert.Equal(100, new_space.ReplayWatchCount) assert.Equal(100, new_space.ReplayWatchCount)
assert.Equal(50, new_space.LiveListenersCount) assert.Equal(50, new_space.LiveListenersCount)
assert.True(new_space.IsDetailsFetched) assert.True(new_space.IsDetailsFetched)
@ -68,9 +69,9 @@ func TestNoWorseningSpace(t *testing.T) {
space.ShortUrl = "Some Short Url" space.ShortUrl = "Some Short Url"
space.State = "Some State" space.State = "Some State"
space.Title = "Debating Somebody" space.Title = "Debating Somebody"
space.CreatedAt = scraper.TimestampFromUnix(1000) space.CreatedAt = TimestampFromUnix(1000)
space.UpdatedAt = scraper.TimestampFromUnix(2000) space.UpdatedAt = TimestampFromUnix(2000)
space.CreatedById = scraper.UserID(-1) space.CreatedById = UserID(-1)
space.LiveListenersCount = 100 space.LiveListenersCount = 100
space.IsDetailsFetched = true space.IsDetailsFetched = true
@ -82,9 +83,9 @@ func TestNoWorseningSpace(t *testing.T) {
space.ShortUrl = "" space.ShortUrl = ""
space.Title = "" space.Title = ""
space.State = "" space.State = ""
space.CreatedAt = scraper.TimestampFromUnix(0) space.CreatedAt = TimestampFromUnix(0)
space.UpdatedAt = scraper.TimestampFromUnix(0) space.UpdatedAt = TimestampFromUnix(0)
space.CreatedById = scraper.UserID(0) space.CreatedById = UserID(0)
space.LiveListenersCount = 0 space.LiveListenersCount = 0
space.IsDetailsFetched = false space.IsDetailsFetched = false
err = profile.SaveSpace(space) err = profile.SaveSpace(space)
@ -97,9 +98,9 @@ func TestNoWorseningSpace(t *testing.T) {
assert.Equal(new_space.ShortUrl, "Some Short Url") assert.Equal(new_space.ShortUrl, "Some Short Url")
assert.Equal(new_space.State, "Some State") assert.Equal(new_space.State, "Some State")
assert.Equal(new_space.Title, "Debating Somebody") assert.Equal(new_space.Title, "Debating Somebody")
assert.Equal(new_space.CreatedAt, scraper.TimestampFromUnix(1000)) assert.Equal(new_space.CreatedAt, TimestampFromUnix(1000))
assert.Equal(new_space.UpdatedAt, scraper.TimestampFromUnix(2000)) assert.Equal(new_space.UpdatedAt, TimestampFromUnix(2000))
assert.Equal(new_space.CreatedById, scraper.UserID(-1)) assert.Equal(new_space.CreatedById, UserID(-1))
assert.Equal(new_space.LiveListenersCount, 100) assert.Equal(new_space.LiveListenersCount, 100)
assert.True(new_space.IsDetailsFetched) assert.True(new_space.IsDetailsFetched)
} }

View File

@ -9,7 +9,7 @@ import (
"github.com/go-test/deep" "github.com/go-test/deep"
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/persistence" "gitlab.com/offline-twitter/twitter_offline_engine/pkg/persistence"
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper" . "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
) )
// Create a Tweet, save it, reload it, and make sure it comes back the same // Create a Tweet, save it, reload it, and make sure it comes back the same
@ -71,7 +71,7 @@ func TestNoWorseningTweet(t *testing.T) {
tweet.IsStub = false tweet.IsStub = false
tweet.IsConversationScraped = true tweet.IsConversationScraped = true
tweet.IsExpandable = true tweet.IsExpandable = true
tweet.LastScrapedAt = scraper.TimestampFromUnix(1000) tweet.LastScrapedAt = TimestampFromUnix(1000)
tweet.Text = "Yes text" tweet.Text = "Yes text"
tweet.NumLikes = 10 tweet.NumLikes = 10
tweet.NumRetweets = 11 tweet.NumRetweets = 11
@ -87,7 +87,7 @@ func TestNoWorseningTweet(t *testing.T) {
tweet.IsStub = true tweet.IsStub = true
tweet.IsConversationScraped = false tweet.IsConversationScraped = false
tweet.IsExpandable = false tweet.IsExpandable = false
tweet.LastScrapedAt = scraper.TimestampFromUnix(500) tweet.LastScrapedAt = TimestampFromUnix(500)
tweet.Text = "" tweet.Text = ""
err = profile.SaveTweet(tweet) err = profile.SaveTweet(tweet)
require.NoError(err) require.NoError(err)
@ -231,7 +231,7 @@ func TestModifyTweet(t *testing.T) {
tweet.IsStub = true tweet.IsStub = true
tweet.IsContentDownloaded = false tweet.IsContentDownloaded = false
tweet.IsConversationScraped = false tweet.IsConversationScraped = false
tweet.LastScrapedAt = scraper.TimestampFromUnix(1000) tweet.LastScrapedAt = TimestampFromUnix(1000)
err := profile.SaveTweet(tweet) err := profile.SaveTweet(tweet)
require.NoError(err) require.NoError(err)
@ -243,7 +243,7 @@ func TestModifyTweet(t *testing.T) {
tweet.IsStub = false tweet.IsStub = false
tweet.IsContentDownloaded = true tweet.IsContentDownloaded = true
tweet.IsConversationScraped = true tweet.IsConversationScraped = true
tweet.LastScrapedAt = scraper.TimestampFromUnix(2000) tweet.LastScrapedAt = TimestampFromUnix(2000)
tweet.TombstoneType = "deleted" tweet.TombstoneType = "deleted"
err = profile.SaveTweet(tweet) err = profile.SaveTweet(tweet)
@ -332,7 +332,7 @@ func TestLoadMissingTweet(t *testing.T) {
profile_path := "test_profiles/TestTweetQueries" profile_path := "test_profiles/TestTweetQueries"
profile := create_or_load_profile(profile_path) profile := create_or_load_profile(profile_path)
_, err := profile.GetTweetById(scraper.TweetID(6234234)) // Random number _, err := profile.GetTweetById(TweetID(6234234)) // Random number
require.Error(t, err) require.Error(t, err)
assert.ErrorIs(t, err, persistence.ErrNotInDatabase) assert.ErrorIs(t, err, persistence.ErrNotInDatabase)
} }

View File

@ -5,6 +5,8 @@ import (
"fmt" "fmt"
"path" "path"
log "github.com/sirupsen/logrus"
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper" . "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
) )
@ -18,7 +20,7 @@ func (p Profile) SaveTweetTrove(trove TweetTrove, should_download bool, api *API
// who were marked as deleted, and then let the callee re-scrape and re-save them. // who were marked as deleted, and then let the callee re-scrape and re-save them.
var conflict_err ErrConflictingUserHandle var conflict_err ErrConflictingUserHandle
if errors.As(err, &conflict_err) { if errors.As(err, &conflict_err) {
fmt.Printf( log.Warnf(
"Conflicting user handle found (ID %d); old user has been marked deleted. Rescraping them\n", "Conflicting user handle found (ID %d); old user has been marked deleted. Rescraping them\n",
conflict_err.ConflictingUserID, conflict_err.ConflictingUserID,
) )

View File

@ -9,11 +9,11 @@ import (
"github.com/jmoiron/sqlx" "github.com/jmoiron/sqlx"
"github.com/mattn/go-sqlite3" "github.com/mattn/go-sqlite3"
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper" . "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
) )
type ErrConflictingUserHandle struct { type ErrConflictingUserHandle struct {
ConflictingUserID scraper.UserID ConflictingUserID UserID
} }
func (e ErrConflictingUserHandle) Error() string { func (e ErrConflictingUserHandle) Error() string {
@ -46,7 +46,7 @@ const USERS_ALL_SQL_FIELDS = `
// 3. Mark the old user as deactivated, eliminating the conflict // 3. Mark the old user as deactivated, eliminating the conflict
// 4. Re-save the new user // 4. Re-save the new user
// 5. Return an ErrConflictingUserHandle, notifying the caller of the conflict // 5. Return an ErrConflictingUserHandle, notifying the caller of the conflict
func (p Profile) SaveUser(u *scraper.User) error { func (p Profile) SaveUser(u *User) error {
// First, check if the user needs a fake ID, and generate one if needed // First, check if the user needs a fake ID, and generate one if needed
if u.IsNeedingFakeID { if u.IsNeedingFakeID {
// User is fake; check if we already have them, in order to proceed // User is fake; check if we already have them, in order to proceed
@ -70,7 +70,7 @@ func (p Profile) SaveUser(u *scraper.User) error {
// We know the UNIQUE violation must be on `handle`, because we checked for users with this ID // We know the UNIQUE violation must be on `handle`, because we checked for users with this ID
// above (`update` query). // above (`update` query).
handle_conflict := func() error { handle_conflict := func() error {
var old_user scraper.User var old_user User
err := p.DB.Get(&old_user, err := p.DB.Get(&old_user,
`select id, is_id_fake from users where handle = ? and is_banned = 0 and is_deleted = 0`, `select id, is_id_fake from users where handle = ? and is_banned = 0 and is_deleted = 0`,
u.Handle, u.Handle,
@ -189,10 +189,10 @@ func (p Profile) SaveUser(u *scraper.User) error {
// //
// returns: // returns:
// - the User, if it exists // - the User, if it exists
func (p Profile) GetUserByHandle(handle scraper.UserHandle) (scraper.User, error) { func (p Profile) GetUserByHandle(handle UserHandle) (User, error) {
db := p.DB db := p.DB
var ret scraper.User var ret User
err := db.Get(&ret, ` err := db.Get(&ret, `
select `+USERS_ALL_SQL_FIELDS+` select `+USERS_ALL_SQL_FIELDS+`
from users_by_handle from users_by_handle
@ -212,10 +212,10 @@ func (p Profile) GetUserByHandle(handle scraper.UserHandle) (scraper.User, error
// //
// returns: // returns:
// - the User, if it exists // - the User, if it exists
func (p Profile) GetUserByID(id scraper.UserID) (scraper.User, error) { func (p Profile) GetUserByID(id UserID) (User, error) {
db := p.DB db := p.DB
var ret scraper.User var ret User
err := db.Get(&ret, ` err := db.Get(&ret, `
select `+USERS_ALL_SQL_FIELDS+` select `+USERS_ALL_SQL_FIELDS+`
@ -243,7 +243,7 @@ func (p Profile) GetUserByID(id scraper.UserID) (scraper.User, error) {
// //
// The `user` object will always have `is_content_downloaded` = false on every scrape. This is // The `user` object will always have `is_content_downloaded` = false on every scrape. This is
// why the No Worsening Principle is needed. // why the No Worsening Principle is needed.
func (p Profile) CheckUserContentDownloadNeeded(user scraper.User) bool { func (p Profile) CheckUserContentDownloadNeeded(user User) bool {
row := p.DB.QueryRow(`select is_content_downloaded, profile_image_url, banner_image_url from users where id = ?`, user.ID) row := p.DB.QueryRow(`select is_content_downloaded, profile_image_url, banner_image_url from users where id = ?`, user.ID)
var is_content_downloaded bool var is_content_downloaded bool
@ -271,7 +271,7 @@ func (p Profile) CheckUserContentDownloadNeeded(user scraper.User) bool {
} }
// Follow / unfollow a user. Update the given User object's IsFollowed field. // Follow / unfollow a user. Update the given User object's IsFollowed field.
func (p Profile) SetUserFollowed(user *scraper.User, is_followed bool) { func (p Profile) SetUserFollowed(user *User, is_followed bool) {
result, err := p.DB.Exec("update users set is_followed = ? where id = ?", is_followed, user.ID) result, err := p.DB.Exec("update users set is_followed = ? where id = ?", is_followed, user.ID)
if err != nil { if err != nil {
panic(fmt.Errorf("Error inserting user with handle %q:\n %w", user.Handle, err)) panic(fmt.Errorf("Error inserting user with handle %q:\n %w", user.Handle, err))
@ -286,12 +286,12 @@ func (p Profile) SetUserFollowed(user *scraper.User, is_followed bool) {
user.IsFollowed = is_followed user.IsFollowed = is_followed
} }
func (p Profile) NextFakeUserID() scraper.UserID { func (p Profile) NextFakeUserID() UserID {
_, err := p.DB.Exec("update fake_user_sequence set latest_fake_id = latest_fake_id + 1") _, err := p.DB.Exec("update fake_user_sequence set latest_fake_id = latest_fake_id + 1")
if err != nil { if err != nil {
panic(err) panic(err)
} }
var ret scraper.UserID var ret UserID
err = p.DB.QueryRow("select latest_fake_id from fake_user_sequence").Scan(&ret) err = p.DB.QueryRow("select latest_fake_id from fake_user_sequence").Scan(&ret)
if err != nil { if err != nil {
panic(err) panic(err)
@ -301,7 +301,7 @@ func (p Profile) NextFakeUserID() scraper.UserID {
// TODO: This is only used in checking whether the media downloader should get the big or small version of // TODO: This is only used in checking whether the media downloader should get the big or small version of
// a profile image. That should be rewritten // a profile image. That should be rewritten
func (p Profile) IsFollowing(user scraper.User) bool { func (p Profile) IsFollowing(user User) bool {
row := p.DB.QueryRow("select is_followed from users where id like ?", user.ID) row := p.DB.QueryRow("select is_followed from users where id like ?", user.ID)
var ret bool var ret bool
err := row.Scan(&ret) err := row.Scan(&ret)
@ -315,21 +315,21 @@ func (p Profile) IsFollowing(user scraper.User) bool {
} }
// Utility function to compute the path to save banner image to // Utility function to compute the path to save banner image to
func (p Profile) get_banner_image_output_path(u scraper.User) string { func (p Profile) get_banner_image_output_path(u User) string {
return path.Join(p.ProfileDir, "profile_images", u.BannerImageLocalPath) return path.Join(p.ProfileDir, "profile_images", u.BannerImageLocalPath)
} }
// Utility function to compute the path to save profile image to // Utility function to compute the path to save profile image to
func (p Profile) get_profile_image_output_path(u scraper.User) string { func (p Profile) get_profile_image_output_path(u User) string {
if u.ProfileImageUrl == "" { if u.ProfileImageUrl == "" {
return path.Join(p.ProfileDir, "profile_images", path.Base(scraper.DEFAULT_PROFILE_IMAGE_URL)) return path.Join(p.ProfileDir, "profile_images", path.Base(DEFAULT_PROFILE_IMAGE_URL))
} }
return path.Join(p.ProfileDir, "profile_images", u.ProfileImageLocalPath) return path.Join(p.ProfileDir, "profile_images", u.ProfileImageLocalPath)
} }
// Do a text search for users // Do a text search for users
func (p Profile) SearchUsers(s string) []scraper.User { func (p Profile) SearchUsers(s string) []User {
var ret []scraper.User var ret []User
q, args, err := sqlx.Named(` q, args, err := sqlx.Named(`
select `+USERS_ALL_SQL_FIELDS+` select `+USERS_ALL_SQL_FIELDS+`
from users from users

View File

@ -12,7 +12,7 @@ import (
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/persistence" "gitlab.com/offline-twitter/twitter_offline_engine/pkg/persistence"
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper" . "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
) )
// Create a user, save it, reload it, and make sure it comes back the same // Create a user, save it, reload it, and make sure it comes back the same
@ -53,19 +53,19 @@ func TestModifyUser(t *testing.T) {
user := create_dummy_user() user := create_dummy_user()
user.DisplayName = "Display Name 1" user.DisplayName = "Display Name 1"
user.Location = "location1" user.Location = "location1"
user.Handle = scraper.UserHandle(fmt.Sprintf("handle %d", rand.Int31())) user.Handle = UserHandle(fmt.Sprintf("handle %d", rand.Int31()))
user.IsPrivate = false user.IsPrivate = false
user.IsVerified = false user.IsVerified = false
user.FollowersCount = 1000 user.FollowersCount = 1000
user.JoinDate = scraper.TimestampFromUnix(1000) user.JoinDate = TimestampFromUnix(1000)
user.ProfileImageUrl = "asdf" user.ProfileImageUrl = "asdf"
user.IsContentDownloaded = true user.IsContentDownloaded = true
// Save the user so it can be modified // Save the user for the first time; should do insert
err := profile.SaveUser(&user) err := profile.SaveUser(&user)
require.NoError(err) require.NoError(err)
new_handle := scraper.UserHandle(fmt.Sprintf("handle %d", rand.Int31())) new_handle := UserHandle(fmt.Sprintf("handle %d", rand.Int31()))
user.DisplayName = "Display Name 2" user.DisplayName = "Display Name 2"
user.Location = "location2" user.Location = "location2"
@ -73,11 +73,11 @@ func TestModifyUser(t *testing.T) {
user.IsPrivate = true user.IsPrivate = true
user.IsVerified = true user.IsVerified = true
user.FollowersCount = 2000 user.FollowersCount = 2000
user.JoinDate = scraper.TimestampFromUnix(2000) user.JoinDate = TimestampFromUnix(2000)
user.ProfileImageUrl = "asdf2" user.ProfileImageUrl = "asdf2"
user.IsContentDownloaded = false // test No Worsening user.IsContentDownloaded = false // test No Worsening
// Save the modified user // Save the user for the second time; should do update
err = profile.SaveUser(&user) err = profile.SaveUser(&user)
require.NoError(err) require.NoError(err)
@ -107,9 +107,9 @@ func TestSetUserBannedDeleted(t *testing.T) {
user.DisplayName = "Display Name 1" user.DisplayName = "Display Name 1"
user.Location = "location1" user.Location = "location1"
user.Bio = "Some Bio" user.Bio = "Some Bio"
user.Handle = scraper.UserHandle(fmt.Sprintf("handle %d", rand.Int31())) user.Handle = UserHandle(fmt.Sprintf("handle %d", rand.Int31()))
user.FollowersCount = 1000 user.FollowersCount = 1000
user.JoinDate = scraper.TimestampFromUnix(1000) user.JoinDate = TimestampFromUnix(1000)
user.ProfileImageUrl = "asdf" user.ProfileImageUrl = "asdf"
user.IsContentDownloaded = true user.IsContentDownloaded = true
@ -118,7 +118,7 @@ func TestSetUserBannedDeleted(t *testing.T) {
require.NoError(err) require.NoError(err)
// Now the user deactivates // Now the user deactivates
err = profile.SaveUser(&scraper.User{ID: user.ID, IsDeleted: true}) err = profile.SaveUser(&User{ID: user.ID, IsDeleted: true})
require.NoError(err) require.NoError(err)
// Reload the modified user // Reload the modified user
new_user, err := profile.GetUserByID(user.ID) new_user, err := profile.GetUserByID(user.ID)
@ -141,9 +141,9 @@ func TestSaveAndLoadBannedDeletedUser(t *testing.T) {
profile_path := "test_profiles/TestUserQueries" profile_path := "test_profiles/TestUserQueries"
profile := create_or_load_profile(profile_path) profile := create_or_load_profile(profile_path)
user := scraper.User{ user := User{
ID: scraper.UserID(rand.Int31()), ID: UserID(rand.Int31()),
Handle: scraper.UserHandle(fmt.Sprintf("handle-%d", rand.Int31())), Handle: UserHandle(fmt.Sprintf("handle-%d", rand.Int31())),
IsBanned: true, IsBanned: true,
} }
@ -365,20 +365,20 @@ func TestCreateUnknownUserWithHandle(t *testing.T) {
next_id := profile.NextFakeUserID() next_id := profile.NextFakeUserID()
handle := scraper.UserHandle(fmt.Sprintf("UnknownUser%d", rand.Int31())) handle := UserHandle(fmt.Sprintf("UnknownUser%d", rand.Int31()))
user := scraper.GetUnknownUserWithHandle(handle) user := GetUnknownUserWithHandle(handle)
assert.Equal(scraper.UserID(0), user.ID) assert.Equal(UserID(0), user.ID)
assert.True(user.IsIdFake) assert.True(user.IsIdFake)
err := profile.SaveUser(&user) err := profile.SaveUser(&user)
assert.NoError(err) assert.NoError(err)
assert.Equal(scraper.UserID(next_id+1), user.ID) assert.Equal(UserID(next_id+1), user.ID)
// Ensure the change was persisted // Ensure the change was persisted
user_reloaded, err := profile.GetUserByHandle(user.Handle) user_reloaded, err := profile.GetUserByHandle(user.Handle)
require.NoError(t, err) require.NoError(t, err)
assert.Equal(handle, user_reloaded.Handle) // Verify it's the same user assert.Equal(handle, user_reloaded.Handle) // Verify it's the same user
assert.Equal(scraper.UserID(next_id+1), user_reloaded.ID) assert.Equal(UserID(next_id+1), user_reloaded.ID)
// Why not tack this test on here: make sure NextFakeUserID works as expected // Why not tack this test on here: make sure NextFakeUserID works as expected
assert.Equal(next_id+2, profile.NextFakeUserID()) assert.Equal(next_id+2, profile.NextFakeUserID())
@ -393,8 +393,8 @@ func TestCreateUnknownUserWithHandleThatAlreadyExists(t *testing.T) {
user := create_stable_user() user := create_stable_user()
unknown_user := scraper.GetUnknownUserWithHandle(user.Handle) unknown_user := GetUnknownUserWithHandle(user.Handle)
assert.Equal(scraper.UserID(0), unknown_user.ID) assert.Equal(UserID(0), unknown_user.ID)
err := profile.SaveUser(&unknown_user) err := profile.SaveUser(&unknown_user)
assert.NoError(err) assert.NoError(err)
@ -417,6 +417,6 @@ func TestSearchUsers(t *testing.T) {
users := profile.SearchUsers("no") users := profile.SearchUsers("no")
assert.Len(users, 2) assert.Len(users, 2)
assert.Equal(users[0].Handle, scraper.UserHandle("Cernovich")) assert.Equal(users[0].Handle, UserHandle("Cernovich"))
assert.Equal(users[1].Handle, scraper.UserHandle("CovfefeAnon")) assert.Equal(users[1].Handle, UserHandle("CovfefeAnon"))
} }

View File

@ -8,7 +8,7 @@ import (
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/persistence" "gitlab.com/offline-twitter/twitter_offline_engine/pkg/persistence"
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper" . "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
) )
func TestVersionUpgrade(t *testing.T) { func TestVersionUpgrade(t *testing.T) {
@ -21,7 +21,7 @@ func TestVersionUpgrade(t *testing.T) {
profile := create_or_load_profile(profile_path) profile := create_or_load_profile(profile_path)
test_migration := "insert into tweets (id, user_id, text) values (21250554358298342, -1, 'awefjk')" test_migration := "insert into tweets (id, user_id, text) values (21250554358298342, -1, 'awefjk')"
test_tweet_id := scraper.TweetID(21250554358298342) test_tweet_id := TweetID(21250554358298342)
require.False(profile.IsTweetInDatabase(test_tweet_id), "Test tweet shouldn't be in db yet") require.False(profile.IsTweetInDatabase(test_tweet_id), "Test tweet shouldn't be in db yet")