REFACTOR: in persistence package, import scraper as '.'
This commit is contained in:
parent
95a52906ba
commit
ac763a97a6
@ -1,5 +1,5 @@
|
||||
#!/bin/bash
|
||||
|
||||
sudo mount -t tmpfs -o size=100M tmpfs pkg/persistence/test_profiles
|
||||
sudo mount -t tmpfs -o size=100M tmpfs cmd/data
|
||||
sudo mount -t tmpfs -o size=500M tmpfs cmd/data
|
||||
sudo mount -t tmpfs -o size=1000M tmpfs sample_data/profile
|
||||
|
@ -3,10 +3,10 @@ package persistence
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||
)
|
||||
|
||||
func (p Profile) SaveBookmark(l scraper.Bookmark) error {
|
||||
func (p Profile) SaveBookmark(l Bookmark) error {
|
||||
_, err := p.DB.NamedExec(`
|
||||
insert into bookmarks (sort_order, user_id, tweet_id)
|
||||
values (:sort_order, :user_id, :tweet_id)
|
||||
@ -20,7 +20,7 @@ func (p Profile) SaveBookmark(l scraper.Bookmark) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p Profile) DeleteBookmark(l scraper.Bookmark) error {
|
||||
func (p Profile) DeleteBookmark(l Bookmark) error {
|
||||
_, err := p.DB.NamedExec(`delete from bookmarks where user_id = :user_id and tweet_id = :tweet_id`, l)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Error executing DeleteBookmark(%#v):\n %w", l, err)
|
||||
@ -28,8 +28,8 @@ func (p Profile) DeleteBookmark(l scraper.Bookmark) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p Profile) GetBookmarkBySortID(id scraper.BookmarkSortID) (scraper.Bookmark, error) {
|
||||
var l scraper.Bookmark
|
||||
func (p Profile) GetBookmarkBySortID(id BookmarkSortID) (Bookmark, error) {
|
||||
var l Bookmark
|
||||
err := p.DB.Get(&l, `
|
||||
select sort_order, user_id, tweet_id
|
||||
from bookmarks
|
||||
|
@ -7,7 +7,7 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||
)
|
||||
|
||||
type SortOrder int
|
||||
@ -91,7 +91,7 @@ func (o SortOrder) NextCursorValue(r CursorResult) int {
|
||||
panic(fmt.Sprintf("Invalid sort order: %d", o))
|
||||
}
|
||||
}
|
||||
func (o SortOrder) NextDMCursorValue(m scraper.DMMessage) int64 {
|
||||
func (o SortOrder) NextDMCursorValue(m DMMessage) int64 {
|
||||
switch o {
|
||||
case SORT_ORDER_NEWEST, SORT_ORDER_OLDEST:
|
||||
return m.SentAt.UnixMilli()
|
||||
@ -131,12 +131,12 @@ const (
|
||||
)
|
||||
|
||||
type CursorResult struct {
|
||||
scraper.Tweet
|
||||
scraper.Retweet
|
||||
Tweet
|
||||
Retweet
|
||||
Chrono int `db:"chrono"`
|
||||
LikeSortOrder int `db:"likes_sort_order"`
|
||||
BookmarkSortOrder int `db:"bookmarks_sort_order"`
|
||||
ByUserID scraper.UserID `db:"by_user_id"`
|
||||
ByUserID UserID `db:"by_user_id"`
|
||||
}
|
||||
|
||||
type Cursor struct {
|
||||
@ -147,16 +147,16 @@ type Cursor struct {
|
||||
|
||||
// Search params
|
||||
Keywords []string
|
||||
FromUserHandle scraper.UserHandle // Tweeted by this user
|
||||
RetweetedByUserHandle scraper.UserHandle // Retweeted by this user
|
||||
ByUserHandle scraper.UserHandle // Either tweeted or retweeted by this user
|
||||
ToUserHandles []scraper.UserHandle // In reply to these users
|
||||
LikedByUserHandle scraper.UserHandle // Liked by this user
|
||||
BookmarkedByUserHandle scraper.UserHandle // Bookmarked by this user
|
||||
ListID scraper.ListID // Either tweeted or retweeted by users from this List
|
||||
FollowedByUserHandle scraper.UserHandle // Either tweeted or retweeted by users followed by this user
|
||||
SinceTimestamp scraper.Timestamp
|
||||
UntilTimestamp scraper.Timestamp
|
||||
FromUserHandle UserHandle // Tweeted by this user
|
||||
RetweetedByUserHandle UserHandle // Retweeted by this user
|
||||
ByUserHandle UserHandle // Either tweeted or retweeted by this user
|
||||
ToUserHandles []UserHandle // In reply to these users
|
||||
LikedByUserHandle UserHandle // Liked by this user
|
||||
BookmarkedByUserHandle UserHandle // Bookmarked by this user
|
||||
ListID ListID // Either tweeted or retweeted by users from this List
|
||||
FollowedByUserHandle UserHandle // Either tweeted or retweeted by users followed by this user
|
||||
SinceTimestamp Timestamp
|
||||
UntilTimestamp Timestamp
|
||||
TombstoneType string
|
||||
FilterLinks Filter
|
||||
FilterImages Filter
|
||||
@ -173,9 +173,9 @@ type Cursor struct {
|
||||
func NewCursor() Cursor {
|
||||
return Cursor{
|
||||
Keywords: []string{},
|
||||
ToUserHandles: []scraper.UserHandle{},
|
||||
SinceTimestamp: scraper.TimestampFromUnix(0),
|
||||
UntilTimestamp: scraper.TimestampFromUnix(0),
|
||||
ToUserHandles: []UserHandle{},
|
||||
SinceTimestamp: TimestampFromUnix(0),
|
||||
UntilTimestamp: TimestampFromUnix(0),
|
||||
CursorPosition: CURSOR_START,
|
||||
CursorValue: 0,
|
||||
SortOrder: SORT_ORDER_NEWEST,
|
||||
@ -189,9 +189,9 @@ func NewCursor() Cursor {
|
||||
func NewTimelineCursor() Cursor {
|
||||
return Cursor{
|
||||
Keywords: []string{},
|
||||
ToUserHandles: []scraper.UserHandle{},
|
||||
SinceTimestamp: scraper.TimestampFromUnix(0),
|
||||
UntilTimestamp: scraper.TimestampFromUnix(0),
|
||||
ToUserHandles: []UserHandle{},
|
||||
SinceTimestamp: TimestampFromUnix(0),
|
||||
UntilTimestamp: TimestampFromUnix(0),
|
||||
CursorPosition: CURSOR_START,
|
||||
CursorValue: 0,
|
||||
SortOrder: SORT_ORDER_NEWEST,
|
||||
@ -202,13 +202,13 @@ func NewTimelineCursor() Cursor {
|
||||
}
|
||||
|
||||
// Generate a cursor appropriate for showing a List feed
|
||||
func NewListCursor(list_id scraper.ListID) Cursor {
|
||||
func NewListCursor(list_id ListID) Cursor {
|
||||
return Cursor{
|
||||
Keywords: []string{},
|
||||
ToUserHandles: []scraper.UserHandle{},
|
||||
ToUserHandles: []UserHandle{},
|
||||
ListID: list_id,
|
||||
SinceTimestamp: scraper.TimestampFromUnix(0),
|
||||
UntilTimestamp: scraper.TimestampFromUnix(0),
|
||||
SinceTimestamp: TimestampFromUnix(0),
|
||||
UntilTimestamp: TimestampFromUnix(0),
|
||||
CursorPosition: CURSOR_START,
|
||||
CursorValue: 0,
|
||||
SortOrder: SORT_ORDER_NEWEST,
|
||||
@ -217,12 +217,12 @@ func NewListCursor(list_id scraper.ListID) Cursor {
|
||||
}
|
||||
|
||||
// Generate a cursor appropriate for fetching a User Feed
|
||||
func NewUserFeedCursor(h scraper.UserHandle) Cursor {
|
||||
func NewUserFeedCursor(h UserHandle) Cursor {
|
||||
return Cursor{
|
||||
Keywords: []string{},
|
||||
ToUserHandles: []scraper.UserHandle{},
|
||||
SinceTimestamp: scraper.TimestampFromUnix(0),
|
||||
UntilTimestamp: scraper.TimestampFromUnix(0),
|
||||
ToUserHandles: []UserHandle{},
|
||||
SinceTimestamp: TimestampFromUnix(0),
|
||||
UntilTimestamp: TimestampFromUnix(0),
|
||||
CursorPosition: CURSOR_START,
|
||||
CursorValue: 0,
|
||||
SortOrder: SORT_ORDER_NEWEST,
|
||||
@ -233,12 +233,12 @@ func NewUserFeedCursor(h scraper.UserHandle) Cursor {
|
||||
}
|
||||
|
||||
// Generate a cursor appropriate for a user's Media tab
|
||||
func NewUserFeedMediaCursor(h scraper.UserHandle) Cursor {
|
||||
func NewUserFeedMediaCursor(h UserHandle) Cursor {
|
||||
return Cursor{
|
||||
Keywords: []string{},
|
||||
ToUserHandles: []scraper.UserHandle{},
|
||||
SinceTimestamp: scraper.TimestampFromUnix(0),
|
||||
UntilTimestamp: scraper.TimestampFromUnix(0),
|
||||
ToUserHandles: []UserHandle{},
|
||||
SinceTimestamp: TimestampFromUnix(0),
|
||||
UntilTimestamp: TimestampFromUnix(0),
|
||||
CursorPosition: CURSOR_START,
|
||||
CursorValue: 0,
|
||||
SortOrder: SORT_ORDER_NEWEST,
|
||||
@ -250,12 +250,12 @@ func NewUserFeedMediaCursor(h scraper.UserHandle) Cursor {
|
||||
}
|
||||
|
||||
// Generate a cursor for a User's Likes
|
||||
func NewUserFeedLikesCursor(h scraper.UserHandle) Cursor {
|
||||
func NewUserFeedLikesCursor(h UserHandle) Cursor {
|
||||
return Cursor{
|
||||
Keywords: []string{},
|
||||
ToUserHandles: []scraper.UserHandle{},
|
||||
SinceTimestamp: scraper.TimestampFromUnix(0),
|
||||
UntilTimestamp: scraper.TimestampFromUnix(0),
|
||||
ToUserHandles: []UserHandle{},
|
||||
SinceTimestamp: TimestampFromUnix(0),
|
||||
UntilTimestamp: TimestampFromUnix(0),
|
||||
CursorPosition: CURSOR_START,
|
||||
CursorValue: 0,
|
||||
SortOrder: SORT_ORDER_LIKED_AT,
|
||||
@ -266,12 +266,12 @@ func NewUserFeedLikesCursor(h scraper.UserHandle) Cursor {
|
||||
}
|
||||
|
||||
// Generate a cursor for a User's Bookmarks
|
||||
func NewUserFeedBookmarksCursor(h scraper.UserHandle) Cursor {
|
||||
func NewUserFeedBookmarksCursor(h UserHandle) Cursor {
|
||||
return Cursor{
|
||||
Keywords: []string{},
|
||||
ToUserHandles: []scraper.UserHandle{},
|
||||
SinceTimestamp: scraper.TimestampFromUnix(0),
|
||||
UntilTimestamp: scraper.TimestampFromUnix(0),
|
||||
ToUserHandles: []UserHandle{},
|
||||
SinceTimestamp: TimestampFromUnix(0),
|
||||
UntilTimestamp: TimestampFromUnix(0),
|
||||
CursorPosition: CURSOR_START,
|
||||
CursorValue: 0,
|
||||
SortOrder: SORT_ORDER_BOOKMARKED_AT,
|
||||
@ -343,24 +343,24 @@ func (c *Cursor) apply_token(token string) error {
|
||||
var err error
|
||||
switch parts[0] {
|
||||
case "from":
|
||||
c.FromUserHandle = scraper.UserHandle(parts[1])
|
||||
c.FromUserHandle = UserHandle(parts[1])
|
||||
case "to":
|
||||
c.ToUserHandles = append(c.ToUserHandles, scraper.UserHandle(parts[1]))
|
||||
c.ToUserHandles = append(c.ToUserHandles, UserHandle(parts[1]))
|
||||
case "retweeted_by":
|
||||
c.RetweetedByUserHandle = scraper.UserHandle(parts[1])
|
||||
c.RetweetedByUserHandle = UserHandle(parts[1])
|
||||
c.FilterRetweets = NONE // Clear the "exclude retweets" filter set by default in NewCursor
|
||||
case "liked_by":
|
||||
c.LikedByUserHandle = scraper.UserHandle(parts[1])
|
||||
c.LikedByUserHandle = UserHandle(parts[1])
|
||||
case "bookmarked_by":
|
||||
c.BookmarkedByUserHandle = scraper.UserHandle(parts[1])
|
||||
c.BookmarkedByUserHandle = UserHandle(parts[1])
|
||||
case "followed_by":
|
||||
c.FollowedByUserHandle = scraper.UserHandle(parts[1])
|
||||
c.FollowedByUserHandle = UserHandle(parts[1])
|
||||
case "list":
|
||||
i, err := strconv.Atoi(parts[1])
|
||||
if err != nil {
|
||||
return fmt.Errorf("%w: filter 'list:' must be a number (list ID), got %q", ErrInvalidQuery, parts[1])
|
||||
}
|
||||
c.ListID = scraper.ListID(i)
|
||||
c.ListID = ListID(i)
|
||||
case "since":
|
||||
c.SinceTimestamp.Time, err = time.Parse("2006-01-02", parts[1])
|
||||
case "until":
|
||||
@ -413,7 +413,7 @@ func (c *Cursor) apply_token(token string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p Profile) NextPage(c Cursor, current_user_id scraper.UserID) (Feed, error) {
|
||||
func (p Profile) NextPage(c Cursor, current_user_id UserID) (Feed, error) {
|
||||
where_clauses := []string{}
|
||||
bind_values := []interface{}{}
|
||||
|
||||
|
@ -3,10 +3,10 @@ package persistence
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||
)
|
||||
|
||||
func (p Profile) SaveLike(l scraper.Like) error {
|
||||
func (p Profile) SaveLike(l Like) error {
|
||||
_, err := p.DB.NamedExec(`
|
||||
insert into likes (sort_order, user_id, tweet_id)
|
||||
values (:sort_order, :user_id, :tweet_id)
|
||||
@ -20,7 +20,7 @@ func (p Profile) SaveLike(l scraper.Like) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p Profile) DeleteLike(l scraper.Like) error {
|
||||
func (p Profile) DeleteLike(l Like) error {
|
||||
_, err := p.DB.NamedExec(`delete from likes where user_id = :user_id and tweet_id = :tweet_id`, l)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Error executing DeleteLike(%#v):\n %w", l, err)
|
||||
@ -28,8 +28,8 @@ func (p Profile) DeleteLike(l scraper.Like) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p Profile) GetLikeBySortID(id scraper.LikeSortID) (scraper.Like, error) {
|
||||
var l scraper.Like
|
||||
func (p Profile) GetLikeBySortID(id LikeSortID) (Like, error) {
|
||||
var l Like
|
||||
err := p.DB.Get(&l, `
|
||||
select sort_order, user_id, tweet_id
|
||||
from likes
|
||||
|
@ -6,7 +6,7 @@ import (
|
||||
"os"
|
||||
"path"
|
||||
|
||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||
)
|
||||
|
||||
type MediaDownloader interface {
|
||||
@ -14,7 +14,7 @@ type MediaDownloader interface {
|
||||
}
|
||||
|
||||
type DefaultDownloader struct {
|
||||
*scraper.API
|
||||
*API
|
||||
}
|
||||
|
||||
// Download a file over HTTP and save it.
|
||||
@ -47,7 +47,7 @@ func (d DefaultDownloader) Curl(url string, outpath string) error {
|
||||
|
||||
// Downloads an Image, and if successful, marks it as downloaded in the DB
|
||||
// DUPE: download-image
|
||||
func (p Profile) download_tweet_image(img *scraper.Image, downloader MediaDownloader) error {
|
||||
func (p Profile) download_tweet_image(img *Image, downloader MediaDownloader) error {
|
||||
outfile := path.Join(p.ProfileDir, "images", img.LocalFilename)
|
||||
err := downloader.Curl(img.RemoteURL, outfile)
|
||||
if err != nil {
|
||||
@ -59,12 +59,12 @@ func (p Profile) download_tweet_image(img *scraper.Image, downloader MediaDownlo
|
||||
|
||||
// Downloads a Video and its thumbnail, and if successful, marks it as downloaded in the DB
|
||||
// DUPE: download-video
|
||||
func (p Profile) download_tweet_video(v *scraper.Video, downloader MediaDownloader) error {
|
||||
func (p Profile) download_tweet_video(v *Video, downloader MediaDownloader) error {
|
||||
// Download the video
|
||||
outfile := path.Join(p.ProfileDir, "videos", v.LocalFilename)
|
||||
err := downloader.Curl(v.RemoteURL, outfile)
|
||||
|
||||
if errors.Is(err, scraper.ErrorDMCA) {
|
||||
if errors.Is(err, ErrorDMCA) {
|
||||
v.IsDownloaded = false
|
||||
v.IsBlockedByDMCA = true
|
||||
} else if err != nil {
|
||||
@ -86,7 +86,7 @@ func (p Profile) download_tweet_video(v *scraper.Video, downloader MediaDownload
|
||||
|
||||
// Downloads an URL thumbnail image, and if successful, marks it as downloaded in the DB
|
||||
// DUPE: download-link-thumbnail
|
||||
func (p Profile) download_link_thumbnail(url *scraper.Url, downloader MediaDownloader) error {
|
||||
func (p Profile) download_link_thumbnail(url *Url, downloader MediaDownloader) error {
|
||||
if url.HasCard && url.HasThumbnail {
|
||||
outfile := path.Join(p.ProfileDir, "link_preview_images", url.ThumbnailLocalPath)
|
||||
err := downloader.Curl(url.ThumbnailRemoteUrl, outfile)
|
||||
@ -100,12 +100,12 @@ func (p Profile) download_link_thumbnail(url *scraper.Url, downloader MediaDownl
|
||||
|
||||
// Download a tweet's video and picture content.
|
||||
// Wraps the `DownloadTweetContentWithInjector` method with the default (i.e., real) downloader.
|
||||
func (p Profile) DownloadTweetContentFor(t *scraper.Tweet, api *scraper.API) error {
|
||||
func (p Profile) DownloadTweetContentFor(t *Tweet, api *API) error {
|
||||
return p.DownloadTweetContentWithInjector(t, DefaultDownloader{API: api})
|
||||
}
|
||||
|
||||
// Enable injecting a custom MediaDownloader (i.e., for testing)
|
||||
func (p Profile) DownloadTweetContentWithInjector(t *scraper.Tweet, downloader MediaDownloader) error {
|
||||
func (p Profile) DownloadTweetContentWithInjector(t *Tweet, downloader MediaDownloader) error {
|
||||
// Check if content needs to be downloaded; if not, just return
|
||||
if !p.CheckTweetContentDownloadNeeded(*t) {
|
||||
return nil
|
||||
@ -141,12 +141,12 @@ func (p Profile) DownloadTweetContentWithInjector(t *scraper.Tweet, downloader M
|
||||
}
|
||||
|
||||
// Download a user's banner and profile images
|
||||
func (p Profile) DownloadUserContentFor(u *scraper.User, api *scraper.API) error {
|
||||
func (p Profile) DownloadUserContentFor(u *User, api *API) error {
|
||||
return p.DownloadUserContentWithInjector(u, DefaultDownloader{API: api})
|
||||
}
|
||||
|
||||
// Enable injecting a custom MediaDownloader (i.e., for testing)
|
||||
func (p Profile) DownloadUserContentWithInjector(u *scraper.User, downloader MediaDownloader) error {
|
||||
func (p Profile) DownloadUserContentWithInjector(u *User, downloader MediaDownloader) error {
|
||||
if !p.CheckUserContentDownloadNeeded(*u) {
|
||||
return nil
|
||||
}
|
||||
@ -155,7 +155,7 @@ func (p Profile) DownloadUserContentWithInjector(u *scraper.User, downloader Med
|
||||
|
||||
var target_url string
|
||||
if u.ProfileImageUrl == "" {
|
||||
target_url = scraper.DEFAULT_PROFILE_IMAGE_URL
|
||||
target_url = DEFAULT_PROFILE_IMAGE_URL
|
||||
} else {
|
||||
target_url = u.ProfileImageUrl
|
||||
}
|
||||
@ -170,7 +170,7 @@ func (p Profile) DownloadUserContentWithInjector(u *scraper.User, downloader Med
|
||||
outfile = p.get_banner_image_output_path(*u)
|
||||
err = downloader.Curl(u.BannerImageUrl, outfile)
|
||||
|
||||
if errors.Is(err, scraper.ErrMediaDownload404) {
|
||||
if errors.Is(err, ErrMediaDownload404) {
|
||||
// Try adding "600x200". Not sure why this does this but sometimes it does.
|
||||
err = downloader.Curl(u.BannerImageUrl+"/600x200", outfile)
|
||||
}
|
||||
@ -186,7 +186,7 @@ func (p Profile) DownloadUserContentWithInjector(u *scraper.User, downloader Med
|
||||
// Download a User's tiny profile image, if it hasn't been downloaded yet.
|
||||
// If it has been downloaded, do nothing.
|
||||
// If this user should have a big profile picture, defer to the regular `DownloadUserContentFor` method.
|
||||
func (p Profile) DownloadUserProfileImageTiny(u *scraper.User, api *scraper.API) error {
|
||||
func (p Profile) DownloadUserProfileImageTiny(u *User, api *API) error {
|
||||
if p.IsFollowing(*u) {
|
||||
return p.DownloadUserContentFor(u, api)
|
||||
}
|
||||
|
@ -6,7 +6,7 @@ import (
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||
)
|
||||
|
||||
// Some types to spy on a MediaDownloader
|
||||
@ -38,7 +38,7 @@ func (d FakeDownloader) Contains(result SpyResult) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func test_all_downloaded(tweet scraper.Tweet, yes_or_no bool, t *testing.T) {
|
||||
func test_all_downloaded(tweet Tweet, yes_or_no bool, t *testing.T) {
|
||||
error_msg := map[bool]string{
|
||||
true: "Expected to be downloaded, but it wasn't",
|
||||
false: "Expected not to be downloaded, but it was",
|
||||
@ -147,7 +147,7 @@ func TestDownloadDefaultUserContent(t *testing.T) {
|
||||
// Check that the downloader was called with the appropriate stuff
|
||||
assert.Len(*fake_downloader.Spy, 1)
|
||||
assert.True(fake_downloader.Contains(SpyResult{
|
||||
scraper.DEFAULT_PROFILE_IMAGE_URL,
|
||||
DEFAULT_PROFILE_IMAGE_URL,
|
||||
"test_profiles/TestMediaQueries/profile_images/default_profile.png",
|
||||
}))
|
||||
}
|
||||
|
@ -3,14 +3,14 @@ package persistence
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||
)
|
||||
|
||||
// Save an Image
|
||||
//
|
||||
// args:
|
||||
// - img: the Image to save
|
||||
func (p Profile) SaveImage(img scraper.Image) error {
|
||||
func (p Profile) SaveImage(img Image) error {
|
||||
_, err := p.DB.NamedExec(`
|
||||
insert into images (id, tweet_id, width, height, remote_url, local_filename, is_downloaded)
|
||||
values (:id, :tweet_id, :width, :height, :remote_url, :local_filename, :is_downloaded)
|
||||
@ -29,7 +29,7 @@ func (p Profile) SaveImage(img scraper.Image) error {
|
||||
//
|
||||
// args:
|
||||
// - img: the Video to save
|
||||
func (p Profile) SaveVideo(vid scraper.Video) error {
|
||||
func (p Profile) SaveVideo(vid Video) error {
|
||||
_, err := p.DB.NamedExec(`
|
||||
insert into videos (id, tweet_id, width, height, remote_url, local_filename, thumbnail_remote_url, thumbnail_local_filename,
|
||||
duration, view_count, is_downloaded, is_blocked_by_dmca, is_gif)
|
||||
@ -49,7 +49,7 @@ func (p Profile) SaveVideo(vid scraper.Video) error {
|
||||
}
|
||||
|
||||
// Save an Url
|
||||
func (p Profile) SaveUrl(url scraper.Url) error {
|
||||
func (p Profile) SaveUrl(url Url) error {
|
||||
_, err := p.DB.NamedExec(`
|
||||
insert into urls (tweet_id, domain, text, short_text, title, description, creator_id, site_id, thumbnail_width, thumbnail_height,
|
||||
thumbnail_remote_url, thumbnail_local_path, has_card, has_thumbnail, is_content_downloaded)
|
||||
@ -68,7 +68,7 @@ func (p Profile) SaveUrl(url scraper.Url) error {
|
||||
}
|
||||
|
||||
// Save a Poll
|
||||
func (p Profile) SavePoll(poll scraper.Poll) error {
|
||||
func (p Profile) SavePoll(poll Poll) error {
|
||||
_, err := p.DB.NamedExec(`
|
||||
insert into polls (id, tweet_id, num_choices, choice1, choice1_votes, choice2, choice2_votes, choice3, choice3_votes, choice4,
|
||||
choice4_votes, voting_duration, voting_ends_at, last_scraped_at)
|
||||
@ -90,7 +90,7 @@ func (p Profile) SavePoll(poll scraper.Poll) error {
|
||||
}
|
||||
|
||||
// Get the list of images for a tweet
|
||||
func (p Profile) GetImagesForTweet(t scraper.Tweet) (imgs []scraper.Image, err error) {
|
||||
func (p Profile) GetImagesForTweet(t Tweet) (imgs []Image, err error) {
|
||||
err = p.DB.Select(&imgs,
|
||||
"select id, tweet_id, width, height, remote_url, local_filename, is_downloaded from images where tweet_id=?",
|
||||
t.ID)
|
||||
@ -98,7 +98,7 @@ func (p Profile) GetImagesForTweet(t scraper.Tweet) (imgs []scraper.Image, err e
|
||||
}
|
||||
|
||||
// Get the list of videos for a tweet
|
||||
func (p Profile) GetVideosForTweet(t scraper.Tweet) (vids []scraper.Video, err error) {
|
||||
func (p Profile) GetVideosForTweet(t Tweet) (vids []Video, err error) {
|
||||
err = p.DB.Select(&vids, `
|
||||
select id, tweet_id, width, height, remote_url, local_filename, thumbnail_remote_url, thumbnail_local_filename, duration,
|
||||
view_count, is_downloaded, is_blocked_by_dmca, is_gif
|
||||
@ -109,7 +109,7 @@ func (p Profile) GetVideosForTweet(t scraper.Tweet) (vids []scraper.Video, err e
|
||||
}
|
||||
|
||||
// Get the list of Urls for a Tweet
|
||||
func (p Profile) GetUrlsForTweet(t scraper.Tweet) (urls []scraper.Url, err error) {
|
||||
func (p Profile) GetUrlsForTweet(t Tweet) (urls []Url, err error) {
|
||||
err = p.DB.Select(&urls, `
|
||||
select tweet_id, domain, text, short_text, title, description, creator_id, site_id, thumbnail_width, thumbnail_height,
|
||||
thumbnail_remote_url, thumbnail_local_path, has_card, has_thumbnail, is_content_downloaded
|
||||
@ -121,7 +121,7 @@ func (p Profile) GetUrlsForTweet(t scraper.Tweet) (urls []scraper.Url, err error
|
||||
}
|
||||
|
||||
// Get the list of Polls for a Tweet
|
||||
func (p Profile) GetPollsForTweet(t scraper.Tweet) (polls []scraper.Poll, err error) {
|
||||
func (p Profile) GetPollsForTweet(t Tweet) (polls []Poll, err error) {
|
||||
err = p.DB.Select(&polls, `
|
||||
select id, tweet_id, num_choices, choice1, choice1_votes, choice2, choice2_votes, choice3, choice3_votes, choice4, choice4_votes,
|
||||
voting_duration, voting_ends_at, last_scraped_at
|
||||
|
@ -8,7 +8,7 @@ import (
|
||||
"github.com/go-test/deep"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||
)
|
||||
|
||||
// Create an Image, save it, reload it, and make sure it comes back the same
|
||||
@ -31,7 +31,7 @@ func TestSaveAndLoadImage(t *testing.T) {
|
||||
imgs, err := profile.GetImagesForTweet(tweet)
|
||||
require.NoError(err)
|
||||
|
||||
var new_img scraper.Image
|
||||
var new_img Image
|
||||
for index := range imgs {
|
||||
if imgs[index].ID == img.ID {
|
||||
new_img = imgs[index]
|
||||
@ -52,7 +52,7 @@ func TestModifyImage(t *testing.T) {
|
||||
tweet := create_stable_tweet()
|
||||
img := tweet.Images[0]
|
||||
|
||||
require.Equal(scraper.ImageID(-1), img.ID, "Got the wrong image back")
|
||||
require.Equal(ImageID(-1), img.ID, "Got the wrong image back")
|
||||
|
||||
img.IsDownloaded = true
|
||||
|
||||
@ -94,7 +94,7 @@ func TestSaveAndLoadVideo(t *testing.T) {
|
||||
vids, err := profile.GetVideosForTweet(tweet)
|
||||
require.NoError(err)
|
||||
|
||||
var new_vid scraper.Video
|
||||
var new_vid Video
|
||||
for index := range vids {
|
||||
if vids[index].ID == vid.ID {
|
||||
new_vid = vids[index]
|
||||
@ -115,7 +115,7 @@ func TestModifyVideo(t *testing.T) {
|
||||
|
||||
tweet := create_stable_tweet()
|
||||
vid := tweet.Videos[0]
|
||||
require.Equal(scraper.VideoID(-1), vid.ID, "Got the wrong video back")
|
||||
require.Equal(VideoID(-1), vid.ID, "Got the wrong video back")
|
||||
|
||||
vid.IsDownloaded = true
|
||||
vid.IsBlockedByDMCA = true
|
||||
@ -157,7 +157,7 @@ func TestSaveAndLoadUrl(t *testing.T) {
|
||||
urls, err := profile.GetUrlsForTweet(tweet)
|
||||
require.NoError(err)
|
||||
|
||||
var new_url scraper.Url
|
||||
var new_url Url
|
||||
for index := range urls {
|
||||
if urls[index].Text == url.Text {
|
||||
new_url = urls[index]
|
||||
@ -218,7 +218,7 @@ func TestSaveAndLoadPoll(t *testing.T) {
|
||||
polls, err := profile.GetPollsForTweet(tweet)
|
||||
require.NoError(err)
|
||||
|
||||
var new_poll scraper.Poll
|
||||
var new_poll Poll
|
||||
for index := range polls {
|
||||
if polls[index].ID == poll.ID {
|
||||
new_poll = polls[index]
|
||||
|
@ -4,6 +4,7 @@ import (
|
||||
"database/sql"
|
||||
"errors"
|
||||
"fmt"
|
||||
|
||||
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||
)
|
||||
|
||||
|
@ -9,7 +9,7 @@ import (
|
||||
sql "github.com/jmoiron/sqlx"
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
|
||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||
)
|
||||
|
||||
//go:embed schema.sql
|
||||
@ -125,14 +125,14 @@ func LoadProfile(profile_dir string) (Profile, error) {
|
||||
return ret, err
|
||||
}
|
||||
|
||||
func (p Profile) ListSessions() []scraper.UserHandle {
|
||||
func (p Profile) ListSessions() []UserHandle {
|
||||
result, err := filepath.Glob(filepath.Join(p.ProfileDir, "*.session"))
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
ret := []scraper.UserHandle{}
|
||||
ret := []UserHandle{}
|
||||
for _, filename := range result {
|
||||
ret = append(ret, scraper.UserHandle(filepath.Base(filename[:len(filename)-len(".session")])))
|
||||
ret = append(ret, UserHandle(filepath.Base(filename[:len(filename)-len(".session")])))
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
@ -3,11 +3,11 @@ package persistence
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||
)
|
||||
|
||||
// Save a Retweet. Do nothing if it already exists, because none of its parameters are modifiable.
|
||||
func (p Profile) SaveRetweet(r scraper.Retweet) error {
|
||||
func (p Profile) SaveRetweet(r Retweet) error {
|
||||
_, err := p.DB.NamedExec(`
|
||||
insert into retweets (retweet_id, tweet_id, retweeted_by, retweeted_at)
|
||||
values (:retweet_id, :tweet_id, :retweeted_by, :retweeted_at)
|
||||
@ -22,8 +22,8 @@ func (p Profile) SaveRetweet(r scraper.Retweet) error {
|
||||
}
|
||||
|
||||
// Retrieve a Retweet by ID
|
||||
func (p Profile) GetRetweetById(id scraper.TweetID) (scraper.Retweet, error) {
|
||||
var r scraper.Retweet
|
||||
func (p Profile) GetRetweetById(id TweetID) (Retweet, error) {
|
||||
var r Retweet
|
||||
err := p.DB.Get(&r, `
|
||||
select retweet_id, tweet_id, retweeted_by, retweeted_at
|
||||
from retweets
|
||||
|
@ -2,13 +2,15 @@ package persistence
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||
|
||||
"os"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||
)
|
||||
|
||||
func (p Profile) SaveSession(api scraper.API) {
|
||||
func (p Profile) SaveSession(api API) {
|
||||
data, err := json.Marshal(api)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
@ -21,13 +23,13 @@ func (p Profile) SaveSession(api scraper.API) {
|
||||
}
|
||||
}
|
||||
|
||||
func (p Profile) LoadSession(userhandle scraper.UserHandle) scraper.API {
|
||||
func (p Profile) LoadSession(userhandle UserHandle) API {
|
||||
data, err := os.ReadFile(p.ProfileDir + "/" + string(userhandle+".session"))
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
var result scraper.API
|
||||
var result API
|
||||
err = json.Unmarshal(data, &result)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
|
@ -2,7 +2,7 @@ package persistence_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||
|
||||
"math/rand"
|
||||
"net/http"
|
||||
"net/http/cookiejar"
|
||||
@ -10,6 +10,8 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/go-test/deep"
|
||||
|
||||
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||
)
|
||||
|
||||
// Save and load an API session; it should come back the same
|
||||
@ -22,7 +24,7 @@ func TestSaveAndLoadAuthenticatedSession(t *testing.T) {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
api := scraper.API{
|
||||
api := API{
|
||||
UserHandle: "testUser",
|
||||
IsAuthenticated: true,
|
||||
Client: http.Client{
|
||||
|
@ -4,16 +4,17 @@ import (
|
||||
"database/sql"
|
||||
"errors"
|
||||
"fmt"
|
||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||
|
||||
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||
)
|
||||
|
||||
type SpaceParticipant struct {
|
||||
UserID scraper.UserID `db:"user_id"`
|
||||
SpaceID scraper.SpaceID `db:"space_id"`
|
||||
UserID UserID `db:"user_id"`
|
||||
SpaceID SpaceID `db:"space_id"`
|
||||
}
|
||||
|
||||
// Save a Space
|
||||
func (p Profile) SaveSpace(s scraper.Space) error {
|
||||
func (p Profile) SaveSpace(s Space) error {
|
||||
_, err := p.DB.NamedExec(`
|
||||
insert into spaces (id, created_by_id, short_url, state, title, created_at, started_at, ended_at, updated_at,
|
||||
is_available_for_replay, replay_watch_count, live_listeners_count, is_details_fetched)
|
||||
@ -52,7 +53,7 @@ func (p Profile) SaveSpace(s scraper.Space) error {
|
||||
}
|
||||
|
||||
// Get a Space by ID
|
||||
func (p Profile) GetSpaceById(id scraper.SpaceID) (space scraper.Space, err error) {
|
||||
func (p Profile) GetSpaceById(id SpaceID) (space Space, err error) {
|
||||
err = p.DB.Get(&space,
|
||||
`select id, ifnull(created_by_id, 0) created_by_id, short_url, state, title, created_at, started_at, ended_at, updated_at,
|
||||
is_available_for_replay, replay_watch_count, live_listeners_count, is_details_fetched
|
||||
@ -61,7 +62,7 @@ func (p Profile) GetSpaceById(id scraper.SpaceID) (space scraper.Space, err erro
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
space.ParticipantIds = []scraper.UserID{}
|
||||
space.ParticipantIds = []UserID{}
|
||||
rows, err := p.DB.Query(`select user_id from space_participants where space_id = ?`, id)
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
err = nil
|
||||
@ -70,7 +71,7 @@ func (p Profile) GetSpaceById(id scraper.SpaceID) (space scraper.Space, err erro
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
var participant_id scraper.UserID
|
||||
var participant_id UserID
|
||||
for rows.Next() {
|
||||
err = rows.Scan(&participant_id)
|
||||
if err != nil {
|
||||
|
@ -5,10 +5,11 @@ import (
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||
|
||||
"github.com/go-test/deep"
|
||||
"math/rand"
|
||||
|
||||
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||
)
|
||||
|
||||
// Create a Space, save it, reload it, and make sure it comes back the same
|
||||
@ -41,8 +42,8 @@ func TestModifySpace(t *testing.T) {
|
||||
|
||||
// Modify and save
|
||||
space.State = "Some other state"
|
||||
space.UpdatedAt = scraper.TimestampFromUnix(9001)
|
||||
space.EndedAt = scraper.TimestampFromUnix(10001)
|
||||
space.UpdatedAt = TimestampFromUnix(9001)
|
||||
space.EndedAt = TimestampFromUnix(10001)
|
||||
space.ReplayWatchCount = 100
|
||||
space.LiveListenersCount = 50
|
||||
space.IsDetailsFetched = true
|
||||
@ -51,8 +52,8 @@ func TestModifySpace(t *testing.T) {
|
||||
|
||||
new_space, err := profile.GetSpaceById(space.ID)
|
||||
require.NoError(err)
|
||||
assert.Equal(scraper.TimestampFromUnix(9001), new_space.UpdatedAt)
|
||||
assert.Equal(scraper.TimestampFromUnix(10001), new_space.EndedAt)
|
||||
assert.Equal(TimestampFromUnix(9001), new_space.UpdatedAt)
|
||||
assert.Equal(TimestampFromUnix(10001), new_space.EndedAt)
|
||||
assert.Equal(100, new_space.ReplayWatchCount)
|
||||
assert.Equal(50, new_space.LiveListenersCount)
|
||||
assert.True(new_space.IsDetailsFetched)
|
||||
@ -68,9 +69,9 @@ func TestNoWorseningSpace(t *testing.T) {
|
||||
space.ShortUrl = "Some Short Url"
|
||||
space.State = "Some State"
|
||||
space.Title = "Debating Somebody"
|
||||
space.CreatedAt = scraper.TimestampFromUnix(1000)
|
||||
space.UpdatedAt = scraper.TimestampFromUnix(2000)
|
||||
space.CreatedById = scraper.UserID(-1)
|
||||
space.CreatedAt = TimestampFromUnix(1000)
|
||||
space.UpdatedAt = TimestampFromUnix(2000)
|
||||
space.CreatedById = UserID(-1)
|
||||
space.LiveListenersCount = 100
|
||||
space.IsDetailsFetched = true
|
||||
|
||||
@ -82,9 +83,9 @@ func TestNoWorseningSpace(t *testing.T) {
|
||||
space.ShortUrl = ""
|
||||
space.Title = ""
|
||||
space.State = ""
|
||||
space.CreatedAt = scraper.TimestampFromUnix(0)
|
||||
space.UpdatedAt = scraper.TimestampFromUnix(0)
|
||||
space.CreatedById = scraper.UserID(0)
|
||||
space.CreatedAt = TimestampFromUnix(0)
|
||||
space.UpdatedAt = TimestampFromUnix(0)
|
||||
space.CreatedById = UserID(0)
|
||||
space.LiveListenersCount = 0
|
||||
space.IsDetailsFetched = false
|
||||
err = profile.SaveSpace(space)
|
||||
@ -97,9 +98,9 @@ func TestNoWorseningSpace(t *testing.T) {
|
||||
assert.Equal(new_space.ShortUrl, "Some Short Url")
|
||||
assert.Equal(new_space.State, "Some State")
|
||||
assert.Equal(new_space.Title, "Debating Somebody")
|
||||
assert.Equal(new_space.CreatedAt, scraper.TimestampFromUnix(1000))
|
||||
assert.Equal(new_space.UpdatedAt, scraper.TimestampFromUnix(2000))
|
||||
assert.Equal(new_space.CreatedById, scraper.UserID(-1))
|
||||
assert.Equal(new_space.CreatedAt, TimestampFromUnix(1000))
|
||||
assert.Equal(new_space.UpdatedAt, TimestampFromUnix(2000))
|
||||
assert.Equal(new_space.CreatedById, UserID(-1))
|
||||
assert.Equal(new_space.LiveListenersCount, 100)
|
||||
assert.True(new_space.IsDetailsFetched)
|
||||
}
|
||||
|
@ -9,7 +9,7 @@ import (
|
||||
"github.com/go-test/deep"
|
||||
|
||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/persistence"
|
||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||
)
|
||||
|
||||
// Create a Tweet, save it, reload it, and make sure it comes back the same
|
||||
@ -71,7 +71,7 @@ func TestNoWorseningTweet(t *testing.T) {
|
||||
tweet.IsStub = false
|
||||
tweet.IsConversationScraped = true
|
||||
tweet.IsExpandable = true
|
||||
tweet.LastScrapedAt = scraper.TimestampFromUnix(1000)
|
||||
tweet.LastScrapedAt = TimestampFromUnix(1000)
|
||||
tweet.Text = "Yes text"
|
||||
tweet.NumLikes = 10
|
||||
tweet.NumRetweets = 11
|
||||
@ -87,7 +87,7 @@ func TestNoWorseningTweet(t *testing.T) {
|
||||
tweet.IsStub = true
|
||||
tweet.IsConversationScraped = false
|
||||
tweet.IsExpandable = false
|
||||
tweet.LastScrapedAt = scraper.TimestampFromUnix(500)
|
||||
tweet.LastScrapedAt = TimestampFromUnix(500)
|
||||
tweet.Text = ""
|
||||
err = profile.SaveTweet(tweet)
|
||||
require.NoError(err)
|
||||
@ -231,7 +231,7 @@ func TestModifyTweet(t *testing.T) {
|
||||
tweet.IsStub = true
|
||||
tweet.IsContentDownloaded = false
|
||||
tweet.IsConversationScraped = false
|
||||
tweet.LastScrapedAt = scraper.TimestampFromUnix(1000)
|
||||
tweet.LastScrapedAt = TimestampFromUnix(1000)
|
||||
|
||||
err := profile.SaveTweet(tweet)
|
||||
require.NoError(err)
|
||||
@ -243,7 +243,7 @@ func TestModifyTweet(t *testing.T) {
|
||||
tweet.IsStub = false
|
||||
tweet.IsContentDownloaded = true
|
||||
tweet.IsConversationScraped = true
|
||||
tweet.LastScrapedAt = scraper.TimestampFromUnix(2000)
|
||||
tweet.LastScrapedAt = TimestampFromUnix(2000)
|
||||
tweet.TombstoneType = "deleted"
|
||||
|
||||
err = profile.SaveTweet(tweet)
|
||||
@ -332,7 +332,7 @@ func TestLoadMissingTweet(t *testing.T) {
|
||||
profile_path := "test_profiles/TestTweetQueries"
|
||||
profile := create_or_load_profile(profile_path)
|
||||
|
||||
_, err := profile.GetTweetById(scraper.TweetID(6234234)) // Random number
|
||||
_, err := profile.GetTweetById(TweetID(6234234)) // Random number
|
||||
require.Error(t, err)
|
||||
assert.ErrorIs(t, err, persistence.ErrNotInDatabase)
|
||||
}
|
||||
|
@ -5,6 +5,8 @@ import (
|
||||
"fmt"
|
||||
"path"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||
)
|
||||
|
||||
@ -18,7 +20,7 @@ func (p Profile) SaveTweetTrove(trove TweetTrove, should_download bool, api *API
|
||||
// who were marked as deleted, and then let the callee re-scrape and re-save them.
|
||||
var conflict_err ErrConflictingUserHandle
|
||||
if errors.As(err, &conflict_err) {
|
||||
fmt.Printf(
|
||||
log.Warnf(
|
||||
"Conflicting user handle found (ID %d); old user has been marked deleted. Rescraping them\n",
|
||||
conflict_err.ConflictingUserID,
|
||||
)
|
||||
|
@ -9,11 +9,11 @@ import (
|
||||
"github.com/jmoiron/sqlx"
|
||||
"github.com/mattn/go-sqlite3"
|
||||
|
||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||
)
|
||||
|
||||
type ErrConflictingUserHandle struct {
|
||||
ConflictingUserID scraper.UserID
|
||||
ConflictingUserID UserID
|
||||
}
|
||||
|
||||
func (e ErrConflictingUserHandle) Error() string {
|
||||
@ -46,7 +46,7 @@ const USERS_ALL_SQL_FIELDS = `
|
||||
// 3. Mark the old user as deactivated, eliminating the conflict
|
||||
// 4. Re-save the new user
|
||||
// 5. Return an ErrConflictingUserHandle, notifying the caller of the conflict
|
||||
func (p Profile) SaveUser(u *scraper.User) error {
|
||||
func (p Profile) SaveUser(u *User) error {
|
||||
// First, check if the user needs a fake ID, and generate one if needed
|
||||
if u.IsNeedingFakeID {
|
||||
// User is fake; check if we already have them, in order to proceed
|
||||
@ -70,7 +70,7 @@ func (p Profile) SaveUser(u *scraper.User) error {
|
||||
// We know the UNIQUE violation must be on `handle`, because we checked for users with this ID
|
||||
// above (`update` query).
|
||||
handle_conflict := func() error {
|
||||
var old_user scraper.User
|
||||
var old_user User
|
||||
err := p.DB.Get(&old_user,
|
||||
`select id, is_id_fake from users where handle = ? and is_banned = 0 and is_deleted = 0`,
|
||||
u.Handle,
|
||||
@ -189,10 +189,10 @@ func (p Profile) SaveUser(u *scraper.User) error {
|
||||
//
|
||||
// returns:
|
||||
// - the User, if it exists
|
||||
func (p Profile) GetUserByHandle(handle scraper.UserHandle) (scraper.User, error) {
|
||||
func (p Profile) GetUserByHandle(handle UserHandle) (User, error) {
|
||||
db := p.DB
|
||||
|
||||
var ret scraper.User
|
||||
var ret User
|
||||
err := db.Get(&ret, `
|
||||
select `+USERS_ALL_SQL_FIELDS+`
|
||||
from users_by_handle
|
||||
@ -212,10 +212,10 @@ func (p Profile) GetUserByHandle(handle scraper.UserHandle) (scraper.User, error
|
||||
//
|
||||
// returns:
|
||||
// - the User, if it exists
|
||||
func (p Profile) GetUserByID(id scraper.UserID) (scraper.User, error) {
|
||||
func (p Profile) GetUserByID(id UserID) (User, error) {
|
||||
db := p.DB
|
||||
|
||||
var ret scraper.User
|
||||
var ret User
|
||||
|
||||
err := db.Get(&ret, `
|
||||
select `+USERS_ALL_SQL_FIELDS+`
|
||||
@ -243,7 +243,7 @@ func (p Profile) GetUserByID(id scraper.UserID) (scraper.User, error) {
|
||||
//
|
||||
// The `user` object will always have `is_content_downloaded` = false on every scrape. This is
|
||||
// why the No Worsening Principle is needed.
|
||||
func (p Profile) CheckUserContentDownloadNeeded(user scraper.User) bool {
|
||||
func (p Profile) CheckUserContentDownloadNeeded(user User) bool {
|
||||
row := p.DB.QueryRow(`select is_content_downloaded, profile_image_url, banner_image_url from users where id = ?`, user.ID)
|
||||
|
||||
var is_content_downloaded bool
|
||||
@ -271,7 +271,7 @@ func (p Profile) CheckUserContentDownloadNeeded(user scraper.User) bool {
|
||||
}
|
||||
|
||||
// Follow / unfollow a user. Update the given User object's IsFollowed field.
|
||||
func (p Profile) SetUserFollowed(user *scraper.User, is_followed bool) {
|
||||
func (p Profile) SetUserFollowed(user *User, is_followed bool) {
|
||||
result, err := p.DB.Exec("update users set is_followed = ? where id = ?", is_followed, user.ID)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("Error inserting user with handle %q:\n %w", user.Handle, err))
|
||||
@ -286,12 +286,12 @@ func (p Profile) SetUserFollowed(user *scraper.User, is_followed bool) {
|
||||
user.IsFollowed = is_followed
|
||||
}
|
||||
|
||||
func (p Profile) NextFakeUserID() scraper.UserID {
|
||||
func (p Profile) NextFakeUserID() UserID {
|
||||
_, err := p.DB.Exec("update fake_user_sequence set latest_fake_id = latest_fake_id + 1")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
var ret scraper.UserID
|
||||
var ret UserID
|
||||
err = p.DB.QueryRow("select latest_fake_id from fake_user_sequence").Scan(&ret)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
@ -301,7 +301,7 @@ func (p Profile) NextFakeUserID() scraper.UserID {
|
||||
|
||||
// TODO: This is only used in checking whether the media downloader should get the big or small version of
|
||||
// a profile image. That should be rewritten
|
||||
func (p Profile) IsFollowing(user scraper.User) bool {
|
||||
func (p Profile) IsFollowing(user User) bool {
|
||||
row := p.DB.QueryRow("select is_followed from users where id like ?", user.ID)
|
||||
var ret bool
|
||||
err := row.Scan(&ret)
|
||||
@ -315,21 +315,21 @@ func (p Profile) IsFollowing(user scraper.User) bool {
|
||||
}
|
||||
|
||||
// Utility function to compute the path to save banner image to
|
||||
func (p Profile) get_banner_image_output_path(u scraper.User) string {
|
||||
func (p Profile) get_banner_image_output_path(u User) string {
|
||||
return path.Join(p.ProfileDir, "profile_images", u.BannerImageLocalPath)
|
||||
}
|
||||
|
||||
// Utility function to compute the path to save profile image to
|
||||
func (p Profile) get_profile_image_output_path(u scraper.User) string {
|
||||
func (p Profile) get_profile_image_output_path(u User) string {
|
||||
if u.ProfileImageUrl == "" {
|
||||
return path.Join(p.ProfileDir, "profile_images", path.Base(scraper.DEFAULT_PROFILE_IMAGE_URL))
|
||||
return path.Join(p.ProfileDir, "profile_images", path.Base(DEFAULT_PROFILE_IMAGE_URL))
|
||||
}
|
||||
return path.Join(p.ProfileDir, "profile_images", u.ProfileImageLocalPath)
|
||||
}
|
||||
|
||||
// Do a text search for users
|
||||
func (p Profile) SearchUsers(s string) []scraper.User {
|
||||
var ret []scraper.User
|
||||
func (p Profile) SearchUsers(s string) []User {
|
||||
var ret []User
|
||||
q, args, err := sqlx.Named(`
|
||||
select `+USERS_ALL_SQL_FIELDS+`
|
||||
from users
|
||||
|
@ -12,7 +12,7 @@ import (
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/persistence"
|
||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||
)
|
||||
|
||||
// Create a user, save it, reload it, and make sure it comes back the same
|
||||
@ -53,19 +53,19 @@ func TestModifyUser(t *testing.T) {
|
||||
user := create_dummy_user()
|
||||
user.DisplayName = "Display Name 1"
|
||||
user.Location = "location1"
|
||||
user.Handle = scraper.UserHandle(fmt.Sprintf("handle %d", rand.Int31()))
|
||||
user.Handle = UserHandle(fmt.Sprintf("handle %d", rand.Int31()))
|
||||
user.IsPrivate = false
|
||||
user.IsVerified = false
|
||||
user.FollowersCount = 1000
|
||||
user.JoinDate = scraper.TimestampFromUnix(1000)
|
||||
user.JoinDate = TimestampFromUnix(1000)
|
||||
user.ProfileImageUrl = "asdf"
|
||||
user.IsContentDownloaded = true
|
||||
|
||||
// Save the user so it can be modified
|
||||
// Save the user for the first time; should do insert
|
||||
err := profile.SaveUser(&user)
|
||||
require.NoError(err)
|
||||
|
||||
new_handle := scraper.UserHandle(fmt.Sprintf("handle %d", rand.Int31()))
|
||||
new_handle := UserHandle(fmt.Sprintf("handle %d", rand.Int31()))
|
||||
|
||||
user.DisplayName = "Display Name 2"
|
||||
user.Location = "location2"
|
||||
@ -73,11 +73,11 @@ func TestModifyUser(t *testing.T) {
|
||||
user.IsPrivate = true
|
||||
user.IsVerified = true
|
||||
user.FollowersCount = 2000
|
||||
user.JoinDate = scraper.TimestampFromUnix(2000)
|
||||
user.JoinDate = TimestampFromUnix(2000)
|
||||
user.ProfileImageUrl = "asdf2"
|
||||
user.IsContentDownloaded = false // test No Worsening
|
||||
|
||||
// Save the modified user
|
||||
// Save the user for the second time; should do update
|
||||
err = profile.SaveUser(&user)
|
||||
require.NoError(err)
|
||||
|
||||
@ -107,9 +107,9 @@ func TestSetUserBannedDeleted(t *testing.T) {
|
||||
user.DisplayName = "Display Name 1"
|
||||
user.Location = "location1"
|
||||
user.Bio = "Some Bio"
|
||||
user.Handle = scraper.UserHandle(fmt.Sprintf("handle %d", rand.Int31()))
|
||||
user.Handle = UserHandle(fmt.Sprintf("handle %d", rand.Int31()))
|
||||
user.FollowersCount = 1000
|
||||
user.JoinDate = scraper.TimestampFromUnix(1000)
|
||||
user.JoinDate = TimestampFromUnix(1000)
|
||||
user.ProfileImageUrl = "asdf"
|
||||
user.IsContentDownloaded = true
|
||||
|
||||
@ -118,7 +118,7 @@ func TestSetUserBannedDeleted(t *testing.T) {
|
||||
require.NoError(err)
|
||||
|
||||
// Now the user deactivates
|
||||
err = profile.SaveUser(&scraper.User{ID: user.ID, IsDeleted: true})
|
||||
err = profile.SaveUser(&User{ID: user.ID, IsDeleted: true})
|
||||
require.NoError(err)
|
||||
// Reload the modified user
|
||||
new_user, err := profile.GetUserByID(user.ID)
|
||||
@ -141,9 +141,9 @@ func TestSaveAndLoadBannedDeletedUser(t *testing.T) {
|
||||
profile_path := "test_profiles/TestUserQueries"
|
||||
profile := create_or_load_profile(profile_path)
|
||||
|
||||
user := scraper.User{
|
||||
ID: scraper.UserID(rand.Int31()),
|
||||
Handle: scraper.UserHandle(fmt.Sprintf("handle-%d", rand.Int31())),
|
||||
user := User{
|
||||
ID: UserID(rand.Int31()),
|
||||
Handle: UserHandle(fmt.Sprintf("handle-%d", rand.Int31())),
|
||||
IsBanned: true,
|
||||
}
|
||||
|
||||
@ -365,20 +365,20 @@ func TestCreateUnknownUserWithHandle(t *testing.T) {
|
||||
|
||||
next_id := profile.NextFakeUserID()
|
||||
|
||||
handle := scraper.UserHandle(fmt.Sprintf("UnknownUser%d", rand.Int31()))
|
||||
user := scraper.GetUnknownUserWithHandle(handle)
|
||||
assert.Equal(scraper.UserID(0), user.ID)
|
||||
handle := UserHandle(fmt.Sprintf("UnknownUser%d", rand.Int31()))
|
||||
user := GetUnknownUserWithHandle(handle)
|
||||
assert.Equal(UserID(0), user.ID)
|
||||
assert.True(user.IsIdFake)
|
||||
|
||||
err := profile.SaveUser(&user)
|
||||
assert.NoError(err)
|
||||
assert.Equal(scraper.UserID(next_id+1), user.ID)
|
||||
assert.Equal(UserID(next_id+1), user.ID)
|
||||
|
||||
// Ensure the change was persisted
|
||||
user_reloaded, err := profile.GetUserByHandle(user.Handle)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(handle, user_reloaded.Handle) // Verify it's the same user
|
||||
assert.Equal(scraper.UserID(next_id+1), user_reloaded.ID)
|
||||
assert.Equal(UserID(next_id+1), user_reloaded.ID)
|
||||
|
||||
// Why not tack this test on here: make sure NextFakeUserID works as expected
|
||||
assert.Equal(next_id+2, profile.NextFakeUserID())
|
||||
@ -393,8 +393,8 @@ func TestCreateUnknownUserWithHandleThatAlreadyExists(t *testing.T) {
|
||||
|
||||
user := create_stable_user()
|
||||
|
||||
unknown_user := scraper.GetUnknownUserWithHandle(user.Handle)
|
||||
assert.Equal(scraper.UserID(0), unknown_user.ID)
|
||||
unknown_user := GetUnknownUserWithHandle(user.Handle)
|
||||
assert.Equal(UserID(0), unknown_user.ID)
|
||||
|
||||
err := profile.SaveUser(&unknown_user)
|
||||
assert.NoError(err)
|
||||
@ -417,6 +417,6 @@ func TestSearchUsers(t *testing.T) {
|
||||
|
||||
users := profile.SearchUsers("no")
|
||||
assert.Len(users, 2)
|
||||
assert.Equal(users[0].Handle, scraper.UserHandle("Cernovich"))
|
||||
assert.Equal(users[1].Handle, scraper.UserHandle("CovfefeAnon"))
|
||||
assert.Equal(users[0].Handle, UserHandle("Cernovich"))
|
||||
assert.Equal(users[1].Handle, UserHandle("CovfefeAnon"))
|
||||
}
|
||||
|
@ -8,7 +8,7 @@ import (
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/persistence"
|
||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||
)
|
||||
|
||||
func TestVersionUpgrade(t *testing.T) {
|
||||
@ -21,7 +21,7 @@ func TestVersionUpgrade(t *testing.T) {
|
||||
profile := create_or_load_profile(profile_path)
|
||||
|
||||
test_migration := "insert into tweets (id, user_id, text) values (21250554358298342, -1, 'awefjk')"
|
||||
test_tweet_id := scraper.TweetID(21250554358298342)
|
||||
test_tweet_id := TweetID(21250554358298342)
|
||||
|
||||
require.False(profile.IsTweetInDatabase(test_tweet_id), "Test tweet shouldn't be in db yet")
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user