REFACTOR: in persistence package, import scraper as '.'
This commit is contained in:
parent
95a52906ba
commit
ac763a97a6
@ -1,5 +1,5 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
sudo mount -t tmpfs -o size=100M tmpfs pkg/persistence/test_profiles
|
sudo mount -t tmpfs -o size=100M tmpfs pkg/persistence/test_profiles
|
||||||
sudo mount -t tmpfs -o size=100M tmpfs cmd/data
|
sudo mount -t tmpfs -o size=500M tmpfs cmd/data
|
||||||
sudo mount -t tmpfs -o size=1000M tmpfs sample_data/profile
|
sudo mount -t tmpfs -o size=1000M tmpfs sample_data/profile
|
||||||
|
@ -3,10 +3,10 @@ package persistence
|
|||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||||
)
|
)
|
||||||
|
|
||||||
func (p Profile) SaveBookmark(l scraper.Bookmark) error {
|
func (p Profile) SaveBookmark(l Bookmark) error {
|
||||||
_, err := p.DB.NamedExec(`
|
_, err := p.DB.NamedExec(`
|
||||||
insert into bookmarks (sort_order, user_id, tweet_id)
|
insert into bookmarks (sort_order, user_id, tweet_id)
|
||||||
values (:sort_order, :user_id, :tweet_id)
|
values (:sort_order, :user_id, :tweet_id)
|
||||||
@ -20,7 +20,7 @@ func (p Profile) SaveBookmark(l scraper.Bookmark) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p Profile) DeleteBookmark(l scraper.Bookmark) error {
|
func (p Profile) DeleteBookmark(l Bookmark) error {
|
||||||
_, err := p.DB.NamedExec(`delete from bookmarks where user_id = :user_id and tweet_id = :tweet_id`, l)
|
_, err := p.DB.NamedExec(`delete from bookmarks where user_id = :user_id and tweet_id = :tweet_id`, l)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("Error executing DeleteBookmark(%#v):\n %w", l, err)
|
return fmt.Errorf("Error executing DeleteBookmark(%#v):\n %w", l, err)
|
||||||
@ -28,8 +28,8 @@ func (p Profile) DeleteBookmark(l scraper.Bookmark) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p Profile) GetBookmarkBySortID(id scraper.BookmarkSortID) (scraper.Bookmark, error) {
|
func (p Profile) GetBookmarkBySortID(id BookmarkSortID) (Bookmark, error) {
|
||||||
var l scraper.Bookmark
|
var l Bookmark
|
||||||
err := p.DB.Get(&l, `
|
err := p.DB.Get(&l, `
|
||||||
select sort_order, user_id, tweet_id
|
select sort_order, user_id, tweet_id
|
||||||
from bookmarks
|
from bookmarks
|
||||||
|
@ -7,7 +7,7 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||||
)
|
)
|
||||||
|
|
||||||
type SortOrder int
|
type SortOrder int
|
||||||
@ -91,7 +91,7 @@ func (o SortOrder) NextCursorValue(r CursorResult) int {
|
|||||||
panic(fmt.Sprintf("Invalid sort order: %d", o))
|
panic(fmt.Sprintf("Invalid sort order: %d", o))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
func (o SortOrder) NextDMCursorValue(m scraper.DMMessage) int64 {
|
func (o SortOrder) NextDMCursorValue(m DMMessage) int64 {
|
||||||
switch o {
|
switch o {
|
||||||
case SORT_ORDER_NEWEST, SORT_ORDER_OLDEST:
|
case SORT_ORDER_NEWEST, SORT_ORDER_OLDEST:
|
||||||
return m.SentAt.UnixMilli()
|
return m.SentAt.UnixMilli()
|
||||||
@ -131,12 +131,12 @@ const (
|
|||||||
)
|
)
|
||||||
|
|
||||||
type CursorResult struct {
|
type CursorResult struct {
|
||||||
scraper.Tweet
|
Tweet
|
||||||
scraper.Retweet
|
Retweet
|
||||||
Chrono int `db:"chrono"`
|
Chrono int `db:"chrono"`
|
||||||
LikeSortOrder int `db:"likes_sort_order"`
|
LikeSortOrder int `db:"likes_sort_order"`
|
||||||
BookmarkSortOrder int `db:"bookmarks_sort_order"`
|
BookmarkSortOrder int `db:"bookmarks_sort_order"`
|
||||||
ByUserID scraper.UserID `db:"by_user_id"`
|
ByUserID UserID `db:"by_user_id"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type Cursor struct {
|
type Cursor struct {
|
||||||
@ -147,16 +147,16 @@ type Cursor struct {
|
|||||||
|
|
||||||
// Search params
|
// Search params
|
||||||
Keywords []string
|
Keywords []string
|
||||||
FromUserHandle scraper.UserHandle // Tweeted by this user
|
FromUserHandle UserHandle // Tweeted by this user
|
||||||
RetweetedByUserHandle scraper.UserHandle // Retweeted by this user
|
RetweetedByUserHandle UserHandle // Retweeted by this user
|
||||||
ByUserHandle scraper.UserHandle // Either tweeted or retweeted by this user
|
ByUserHandle UserHandle // Either tweeted or retweeted by this user
|
||||||
ToUserHandles []scraper.UserHandle // In reply to these users
|
ToUserHandles []UserHandle // In reply to these users
|
||||||
LikedByUserHandle scraper.UserHandle // Liked by this user
|
LikedByUserHandle UserHandle // Liked by this user
|
||||||
BookmarkedByUserHandle scraper.UserHandle // Bookmarked by this user
|
BookmarkedByUserHandle UserHandle // Bookmarked by this user
|
||||||
ListID scraper.ListID // Either tweeted or retweeted by users from this List
|
ListID ListID // Either tweeted or retweeted by users from this List
|
||||||
FollowedByUserHandle scraper.UserHandle // Either tweeted or retweeted by users followed by this user
|
FollowedByUserHandle UserHandle // Either tweeted or retweeted by users followed by this user
|
||||||
SinceTimestamp scraper.Timestamp
|
SinceTimestamp Timestamp
|
||||||
UntilTimestamp scraper.Timestamp
|
UntilTimestamp Timestamp
|
||||||
TombstoneType string
|
TombstoneType string
|
||||||
FilterLinks Filter
|
FilterLinks Filter
|
||||||
FilterImages Filter
|
FilterImages Filter
|
||||||
@ -173,9 +173,9 @@ type Cursor struct {
|
|||||||
func NewCursor() Cursor {
|
func NewCursor() Cursor {
|
||||||
return Cursor{
|
return Cursor{
|
||||||
Keywords: []string{},
|
Keywords: []string{},
|
||||||
ToUserHandles: []scraper.UserHandle{},
|
ToUserHandles: []UserHandle{},
|
||||||
SinceTimestamp: scraper.TimestampFromUnix(0),
|
SinceTimestamp: TimestampFromUnix(0),
|
||||||
UntilTimestamp: scraper.TimestampFromUnix(0),
|
UntilTimestamp: TimestampFromUnix(0),
|
||||||
CursorPosition: CURSOR_START,
|
CursorPosition: CURSOR_START,
|
||||||
CursorValue: 0,
|
CursorValue: 0,
|
||||||
SortOrder: SORT_ORDER_NEWEST,
|
SortOrder: SORT_ORDER_NEWEST,
|
||||||
@ -189,9 +189,9 @@ func NewCursor() Cursor {
|
|||||||
func NewTimelineCursor() Cursor {
|
func NewTimelineCursor() Cursor {
|
||||||
return Cursor{
|
return Cursor{
|
||||||
Keywords: []string{},
|
Keywords: []string{},
|
||||||
ToUserHandles: []scraper.UserHandle{},
|
ToUserHandles: []UserHandle{},
|
||||||
SinceTimestamp: scraper.TimestampFromUnix(0),
|
SinceTimestamp: TimestampFromUnix(0),
|
||||||
UntilTimestamp: scraper.TimestampFromUnix(0),
|
UntilTimestamp: TimestampFromUnix(0),
|
||||||
CursorPosition: CURSOR_START,
|
CursorPosition: CURSOR_START,
|
||||||
CursorValue: 0,
|
CursorValue: 0,
|
||||||
SortOrder: SORT_ORDER_NEWEST,
|
SortOrder: SORT_ORDER_NEWEST,
|
||||||
@ -202,13 +202,13 @@ func NewTimelineCursor() Cursor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Generate a cursor appropriate for showing a List feed
|
// Generate a cursor appropriate for showing a List feed
|
||||||
func NewListCursor(list_id scraper.ListID) Cursor {
|
func NewListCursor(list_id ListID) Cursor {
|
||||||
return Cursor{
|
return Cursor{
|
||||||
Keywords: []string{},
|
Keywords: []string{},
|
||||||
ToUserHandles: []scraper.UserHandle{},
|
ToUserHandles: []UserHandle{},
|
||||||
ListID: list_id,
|
ListID: list_id,
|
||||||
SinceTimestamp: scraper.TimestampFromUnix(0),
|
SinceTimestamp: TimestampFromUnix(0),
|
||||||
UntilTimestamp: scraper.TimestampFromUnix(0),
|
UntilTimestamp: TimestampFromUnix(0),
|
||||||
CursorPosition: CURSOR_START,
|
CursorPosition: CURSOR_START,
|
||||||
CursorValue: 0,
|
CursorValue: 0,
|
||||||
SortOrder: SORT_ORDER_NEWEST,
|
SortOrder: SORT_ORDER_NEWEST,
|
||||||
@ -217,12 +217,12 @@ func NewListCursor(list_id scraper.ListID) Cursor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Generate a cursor appropriate for fetching a User Feed
|
// Generate a cursor appropriate for fetching a User Feed
|
||||||
func NewUserFeedCursor(h scraper.UserHandle) Cursor {
|
func NewUserFeedCursor(h UserHandle) Cursor {
|
||||||
return Cursor{
|
return Cursor{
|
||||||
Keywords: []string{},
|
Keywords: []string{},
|
||||||
ToUserHandles: []scraper.UserHandle{},
|
ToUserHandles: []UserHandle{},
|
||||||
SinceTimestamp: scraper.TimestampFromUnix(0),
|
SinceTimestamp: TimestampFromUnix(0),
|
||||||
UntilTimestamp: scraper.TimestampFromUnix(0),
|
UntilTimestamp: TimestampFromUnix(0),
|
||||||
CursorPosition: CURSOR_START,
|
CursorPosition: CURSOR_START,
|
||||||
CursorValue: 0,
|
CursorValue: 0,
|
||||||
SortOrder: SORT_ORDER_NEWEST,
|
SortOrder: SORT_ORDER_NEWEST,
|
||||||
@ -233,12 +233,12 @@ func NewUserFeedCursor(h scraper.UserHandle) Cursor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Generate a cursor appropriate for a user's Media tab
|
// Generate a cursor appropriate for a user's Media tab
|
||||||
func NewUserFeedMediaCursor(h scraper.UserHandle) Cursor {
|
func NewUserFeedMediaCursor(h UserHandle) Cursor {
|
||||||
return Cursor{
|
return Cursor{
|
||||||
Keywords: []string{},
|
Keywords: []string{},
|
||||||
ToUserHandles: []scraper.UserHandle{},
|
ToUserHandles: []UserHandle{},
|
||||||
SinceTimestamp: scraper.TimestampFromUnix(0),
|
SinceTimestamp: TimestampFromUnix(0),
|
||||||
UntilTimestamp: scraper.TimestampFromUnix(0),
|
UntilTimestamp: TimestampFromUnix(0),
|
||||||
CursorPosition: CURSOR_START,
|
CursorPosition: CURSOR_START,
|
||||||
CursorValue: 0,
|
CursorValue: 0,
|
||||||
SortOrder: SORT_ORDER_NEWEST,
|
SortOrder: SORT_ORDER_NEWEST,
|
||||||
@ -250,12 +250,12 @@ func NewUserFeedMediaCursor(h scraper.UserHandle) Cursor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Generate a cursor for a User's Likes
|
// Generate a cursor for a User's Likes
|
||||||
func NewUserFeedLikesCursor(h scraper.UserHandle) Cursor {
|
func NewUserFeedLikesCursor(h UserHandle) Cursor {
|
||||||
return Cursor{
|
return Cursor{
|
||||||
Keywords: []string{},
|
Keywords: []string{},
|
||||||
ToUserHandles: []scraper.UserHandle{},
|
ToUserHandles: []UserHandle{},
|
||||||
SinceTimestamp: scraper.TimestampFromUnix(0),
|
SinceTimestamp: TimestampFromUnix(0),
|
||||||
UntilTimestamp: scraper.TimestampFromUnix(0),
|
UntilTimestamp: TimestampFromUnix(0),
|
||||||
CursorPosition: CURSOR_START,
|
CursorPosition: CURSOR_START,
|
||||||
CursorValue: 0,
|
CursorValue: 0,
|
||||||
SortOrder: SORT_ORDER_LIKED_AT,
|
SortOrder: SORT_ORDER_LIKED_AT,
|
||||||
@ -266,12 +266,12 @@ func NewUserFeedLikesCursor(h scraper.UserHandle) Cursor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Generate a cursor for a User's Bookmarks
|
// Generate a cursor for a User's Bookmarks
|
||||||
func NewUserFeedBookmarksCursor(h scraper.UserHandle) Cursor {
|
func NewUserFeedBookmarksCursor(h UserHandle) Cursor {
|
||||||
return Cursor{
|
return Cursor{
|
||||||
Keywords: []string{},
|
Keywords: []string{},
|
||||||
ToUserHandles: []scraper.UserHandle{},
|
ToUserHandles: []UserHandle{},
|
||||||
SinceTimestamp: scraper.TimestampFromUnix(0),
|
SinceTimestamp: TimestampFromUnix(0),
|
||||||
UntilTimestamp: scraper.TimestampFromUnix(0),
|
UntilTimestamp: TimestampFromUnix(0),
|
||||||
CursorPosition: CURSOR_START,
|
CursorPosition: CURSOR_START,
|
||||||
CursorValue: 0,
|
CursorValue: 0,
|
||||||
SortOrder: SORT_ORDER_BOOKMARKED_AT,
|
SortOrder: SORT_ORDER_BOOKMARKED_AT,
|
||||||
@ -343,24 +343,24 @@ func (c *Cursor) apply_token(token string) error {
|
|||||||
var err error
|
var err error
|
||||||
switch parts[0] {
|
switch parts[0] {
|
||||||
case "from":
|
case "from":
|
||||||
c.FromUserHandle = scraper.UserHandle(parts[1])
|
c.FromUserHandle = UserHandle(parts[1])
|
||||||
case "to":
|
case "to":
|
||||||
c.ToUserHandles = append(c.ToUserHandles, scraper.UserHandle(parts[1]))
|
c.ToUserHandles = append(c.ToUserHandles, UserHandle(parts[1]))
|
||||||
case "retweeted_by":
|
case "retweeted_by":
|
||||||
c.RetweetedByUserHandle = scraper.UserHandle(parts[1])
|
c.RetweetedByUserHandle = UserHandle(parts[1])
|
||||||
c.FilterRetweets = NONE // Clear the "exclude retweets" filter set by default in NewCursor
|
c.FilterRetweets = NONE // Clear the "exclude retweets" filter set by default in NewCursor
|
||||||
case "liked_by":
|
case "liked_by":
|
||||||
c.LikedByUserHandle = scraper.UserHandle(parts[1])
|
c.LikedByUserHandle = UserHandle(parts[1])
|
||||||
case "bookmarked_by":
|
case "bookmarked_by":
|
||||||
c.BookmarkedByUserHandle = scraper.UserHandle(parts[1])
|
c.BookmarkedByUserHandle = UserHandle(parts[1])
|
||||||
case "followed_by":
|
case "followed_by":
|
||||||
c.FollowedByUserHandle = scraper.UserHandle(parts[1])
|
c.FollowedByUserHandle = UserHandle(parts[1])
|
||||||
case "list":
|
case "list":
|
||||||
i, err := strconv.Atoi(parts[1])
|
i, err := strconv.Atoi(parts[1])
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("%w: filter 'list:' must be a number (list ID), got %q", ErrInvalidQuery, parts[1])
|
return fmt.Errorf("%w: filter 'list:' must be a number (list ID), got %q", ErrInvalidQuery, parts[1])
|
||||||
}
|
}
|
||||||
c.ListID = scraper.ListID(i)
|
c.ListID = ListID(i)
|
||||||
case "since":
|
case "since":
|
||||||
c.SinceTimestamp.Time, err = time.Parse("2006-01-02", parts[1])
|
c.SinceTimestamp.Time, err = time.Parse("2006-01-02", parts[1])
|
||||||
case "until":
|
case "until":
|
||||||
@ -413,7 +413,7 @@ func (c *Cursor) apply_token(token string) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p Profile) NextPage(c Cursor, current_user_id scraper.UserID) (Feed, error) {
|
func (p Profile) NextPage(c Cursor, current_user_id UserID) (Feed, error) {
|
||||||
where_clauses := []string{}
|
where_clauses := []string{}
|
||||||
bind_values := []interface{}{}
|
bind_values := []interface{}{}
|
||||||
|
|
||||||
|
@ -3,10 +3,10 @@ package persistence
|
|||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||||
)
|
)
|
||||||
|
|
||||||
func (p Profile) SaveLike(l scraper.Like) error {
|
func (p Profile) SaveLike(l Like) error {
|
||||||
_, err := p.DB.NamedExec(`
|
_, err := p.DB.NamedExec(`
|
||||||
insert into likes (sort_order, user_id, tweet_id)
|
insert into likes (sort_order, user_id, tweet_id)
|
||||||
values (:sort_order, :user_id, :tweet_id)
|
values (:sort_order, :user_id, :tweet_id)
|
||||||
@ -20,7 +20,7 @@ func (p Profile) SaveLike(l scraper.Like) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p Profile) DeleteLike(l scraper.Like) error {
|
func (p Profile) DeleteLike(l Like) error {
|
||||||
_, err := p.DB.NamedExec(`delete from likes where user_id = :user_id and tweet_id = :tweet_id`, l)
|
_, err := p.DB.NamedExec(`delete from likes where user_id = :user_id and tweet_id = :tweet_id`, l)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("Error executing DeleteLike(%#v):\n %w", l, err)
|
return fmt.Errorf("Error executing DeleteLike(%#v):\n %w", l, err)
|
||||||
@ -28,8 +28,8 @@ func (p Profile) DeleteLike(l scraper.Like) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p Profile) GetLikeBySortID(id scraper.LikeSortID) (scraper.Like, error) {
|
func (p Profile) GetLikeBySortID(id LikeSortID) (Like, error) {
|
||||||
var l scraper.Like
|
var l Like
|
||||||
err := p.DB.Get(&l, `
|
err := p.DB.Get(&l, `
|
||||||
select sort_order, user_id, tweet_id
|
select sort_order, user_id, tweet_id
|
||||||
from likes
|
from likes
|
||||||
|
@ -6,7 +6,7 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"path"
|
"path"
|
||||||
|
|
||||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||||
)
|
)
|
||||||
|
|
||||||
type MediaDownloader interface {
|
type MediaDownloader interface {
|
||||||
@ -14,7 +14,7 @@ type MediaDownloader interface {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type DefaultDownloader struct {
|
type DefaultDownloader struct {
|
||||||
*scraper.API
|
*API
|
||||||
}
|
}
|
||||||
|
|
||||||
// Download a file over HTTP and save it.
|
// Download a file over HTTP and save it.
|
||||||
@ -47,7 +47,7 @@ func (d DefaultDownloader) Curl(url string, outpath string) error {
|
|||||||
|
|
||||||
// Downloads an Image, and if successful, marks it as downloaded in the DB
|
// Downloads an Image, and if successful, marks it as downloaded in the DB
|
||||||
// DUPE: download-image
|
// DUPE: download-image
|
||||||
func (p Profile) download_tweet_image(img *scraper.Image, downloader MediaDownloader) error {
|
func (p Profile) download_tweet_image(img *Image, downloader MediaDownloader) error {
|
||||||
outfile := path.Join(p.ProfileDir, "images", img.LocalFilename)
|
outfile := path.Join(p.ProfileDir, "images", img.LocalFilename)
|
||||||
err := downloader.Curl(img.RemoteURL, outfile)
|
err := downloader.Curl(img.RemoteURL, outfile)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -59,12 +59,12 @@ func (p Profile) download_tweet_image(img *scraper.Image, downloader MediaDownlo
|
|||||||
|
|
||||||
// Downloads a Video and its thumbnail, and if successful, marks it as downloaded in the DB
|
// Downloads a Video and its thumbnail, and if successful, marks it as downloaded in the DB
|
||||||
// DUPE: download-video
|
// DUPE: download-video
|
||||||
func (p Profile) download_tweet_video(v *scraper.Video, downloader MediaDownloader) error {
|
func (p Profile) download_tweet_video(v *Video, downloader MediaDownloader) error {
|
||||||
// Download the video
|
// Download the video
|
||||||
outfile := path.Join(p.ProfileDir, "videos", v.LocalFilename)
|
outfile := path.Join(p.ProfileDir, "videos", v.LocalFilename)
|
||||||
err := downloader.Curl(v.RemoteURL, outfile)
|
err := downloader.Curl(v.RemoteURL, outfile)
|
||||||
|
|
||||||
if errors.Is(err, scraper.ErrorDMCA) {
|
if errors.Is(err, ErrorDMCA) {
|
||||||
v.IsDownloaded = false
|
v.IsDownloaded = false
|
||||||
v.IsBlockedByDMCA = true
|
v.IsBlockedByDMCA = true
|
||||||
} else if err != nil {
|
} else if err != nil {
|
||||||
@ -86,7 +86,7 @@ func (p Profile) download_tweet_video(v *scraper.Video, downloader MediaDownload
|
|||||||
|
|
||||||
// Downloads an URL thumbnail image, and if successful, marks it as downloaded in the DB
|
// Downloads an URL thumbnail image, and if successful, marks it as downloaded in the DB
|
||||||
// DUPE: download-link-thumbnail
|
// DUPE: download-link-thumbnail
|
||||||
func (p Profile) download_link_thumbnail(url *scraper.Url, downloader MediaDownloader) error {
|
func (p Profile) download_link_thumbnail(url *Url, downloader MediaDownloader) error {
|
||||||
if url.HasCard && url.HasThumbnail {
|
if url.HasCard && url.HasThumbnail {
|
||||||
outfile := path.Join(p.ProfileDir, "link_preview_images", url.ThumbnailLocalPath)
|
outfile := path.Join(p.ProfileDir, "link_preview_images", url.ThumbnailLocalPath)
|
||||||
err := downloader.Curl(url.ThumbnailRemoteUrl, outfile)
|
err := downloader.Curl(url.ThumbnailRemoteUrl, outfile)
|
||||||
@ -100,12 +100,12 @@ func (p Profile) download_link_thumbnail(url *scraper.Url, downloader MediaDownl
|
|||||||
|
|
||||||
// Download a tweet's video and picture content.
|
// Download a tweet's video and picture content.
|
||||||
// Wraps the `DownloadTweetContentWithInjector` method with the default (i.e., real) downloader.
|
// Wraps the `DownloadTweetContentWithInjector` method with the default (i.e., real) downloader.
|
||||||
func (p Profile) DownloadTweetContentFor(t *scraper.Tweet, api *scraper.API) error {
|
func (p Profile) DownloadTweetContentFor(t *Tweet, api *API) error {
|
||||||
return p.DownloadTweetContentWithInjector(t, DefaultDownloader{API: api})
|
return p.DownloadTweetContentWithInjector(t, DefaultDownloader{API: api})
|
||||||
}
|
}
|
||||||
|
|
||||||
// Enable injecting a custom MediaDownloader (i.e., for testing)
|
// Enable injecting a custom MediaDownloader (i.e., for testing)
|
||||||
func (p Profile) DownloadTweetContentWithInjector(t *scraper.Tweet, downloader MediaDownloader) error {
|
func (p Profile) DownloadTweetContentWithInjector(t *Tweet, downloader MediaDownloader) error {
|
||||||
// Check if content needs to be downloaded; if not, just return
|
// Check if content needs to be downloaded; if not, just return
|
||||||
if !p.CheckTweetContentDownloadNeeded(*t) {
|
if !p.CheckTweetContentDownloadNeeded(*t) {
|
||||||
return nil
|
return nil
|
||||||
@ -141,12 +141,12 @@ func (p Profile) DownloadTweetContentWithInjector(t *scraper.Tweet, downloader M
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Download a user's banner and profile images
|
// Download a user's banner and profile images
|
||||||
func (p Profile) DownloadUserContentFor(u *scraper.User, api *scraper.API) error {
|
func (p Profile) DownloadUserContentFor(u *User, api *API) error {
|
||||||
return p.DownloadUserContentWithInjector(u, DefaultDownloader{API: api})
|
return p.DownloadUserContentWithInjector(u, DefaultDownloader{API: api})
|
||||||
}
|
}
|
||||||
|
|
||||||
// Enable injecting a custom MediaDownloader (i.e., for testing)
|
// Enable injecting a custom MediaDownloader (i.e., for testing)
|
||||||
func (p Profile) DownloadUserContentWithInjector(u *scraper.User, downloader MediaDownloader) error {
|
func (p Profile) DownloadUserContentWithInjector(u *User, downloader MediaDownloader) error {
|
||||||
if !p.CheckUserContentDownloadNeeded(*u) {
|
if !p.CheckUserContentDownloadNeeded(*u) {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@ -155,7 +155,7 @@ func (p Profile) DownloadUserContentWithInjector(u *scraper.User, downloader Med
|
|||||||
|
|
||||||
var target_url string
|
var target_url string
|
||||||
if u.ProfileImageUrl == "" {
|
if u.ProfileImageUrl == "" {
|
||||||
target_url = scraper.DEFAULT_PROFILE_IMAGE_URL
|
target_url = DEFAULT_PROFILE_IMAGE_URL
|
||||||
} else {
|
} else {
|
||||||
target_url = u.ProfileImageUrl
|
target_url = u.ProfileImageUrl
|
||||||
}
|
}
|
||||||
@ -170,7 +170,7 @@ func (p Profile) DownloadUserContentWithInjector(u *scraper.User, downloader Med
|
|||||||
outfile = p.get_banner_image_output_path(*u)
|
outfile = p.get_banner_image_output_path(*u)
|
||||||
err = downloader.Curl(u.BannerImageUrl, outfile)
|
err = downloader.Curl(u.BannerImageUrl, outfile)
|
||||||
|
|
||||||
if errors.Is(err, scraper.ErrMediaDownload404) {
|
if errors.Is(err, ErrMediaDownload404) {
|
||||||
// Try adding "600x200". Not sure why this does this but sometimes it does.
|
// Try adding "600x200". Not sure why this does this but sometimes it does.
|
||||||
err = downloader.Curl(u.BannerImageUrl+"/600x200", outfile)
|
err = downloader.Curl(u.BannerImageUrl+"/600x200", outfile)
|
||||||
}
|
}
|
||||||
@ -186,7 +186,7 @@ func (p Profile) DownloadUserContentWithInjector(u *scraper.User, downloader Med
|
|||||||
// Download a User's tiny profile image, if it hasn't been downloaded yet.
|
// Download a User's tiny profile image, if it hasn't been downloaded yet.
|
||||||
// If it has been downloaded, do nothing.
|
// If it has been downloaded, do nothing.
|
||||||
// If this user should have a big profile picture, defer to the regular `DownloadUserContentFor` method.
|
// If this user should have a big profile picture, defer to the regular `DownloadUserContentFor` method.
|
||||||
func (p Profile) DownloadUserProfileImageTiny(u *scraper.User, api *scraper.API) error {
|
func (p Profile) DownloadUserProfileImageTiny(u *User, api *API) error {
|
||||||
if p.IsFollowing(*u) {
|
if p.IsFollowing(*u) {
|
||||||
return p.DownloadUserContentFor(u, api)
|
return p.DownloadUserContentFor(u, api)
|
||||||
}
|
}
|
||||||
|
@ -6,7 +6,7 @@ import (
|
|||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Some types to spy on a MediaDownloader
|
// Some types to spy on a MediaDownloader
|
||||||
@ -38,7 +38,7 @@ func (d FakeDownloader) Contains(result SpyResult) bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
func test_all_downloaded(tweet scraper.Tweet, yes_or_no bool, t *testing.T) {
|
func test_all_downloaded(tweet Tweet, yes_or_no bool, t *testing.T) {
|
||||||
error_msg := map[bool]string{
|
error_msg := map[bool]string{
|
||||||
true: "Expected to be downloaded, but it wasn't",
|
true: "Expected to be downloaded, but it wasn't",
|
||||||
false: "Expected not to be downloaded, but it was",
|
false: "Expected not to be downloaded, but it was",
|
||||||
@ -147,7 +147,7 @@ func TestDownloadDefaultUserContent(t *testing.T) {
|
|||||||
// Check that the downloader was called with the appropriate stuff
|
// Check that the downloader was called with the appropriate stuff
|
||||||
assert.Len(*fake_downloader.Spy, 1)
|
assert.Len(*fake_downloader.Spy, 1)
|
||||||
assert.True(fake_downloader.Contains(SpyResult{
|
assert.True(fake_downloader.Contains(SpyResult{
|
||||||
scraper.DEFAULT_PROFILE_IMAGE_URL,
|
DEFAULT_PROFILE_IMAGE_URL,
|
||||||
"test_profiles/TestMediaQueries/profile_images/default_profile.png",
|
"test_profiles/TestMediaQueries/profile_images/default_profile.png",
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
@ -3,14 +3,14 @@ package persistence
|
|||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Save an Image
|
// Save an Image
|
||||||
//
|
//
|
||||||
// args:
|
// args:
|
||||||
// - img: the Image to save
|
// - img: the Image to save
|
||||||
func (p Profile) SaveImage(img scraper.Image) error {
|
func (p Profile) SaveImage(img Image) error {
|
||||||
_, err := p.DB.NamedExec(`
|
_, err := p.DB.NamedExec(`
|
||||||
insert into images (id, tweet_id, width, height, remote_url, local_filename, is_downloaded)
|
insert into images (id, tweet_id, width, height, remote_url, local_filename, is_downloaded)
|
||||||
values (:id, :tweet_id, :width, :height, :remote_url, :local_filename, :is_downloaded)
|
values (:id, :tweet_id, :width, :height, :remote_url, :local_filename, :is_downloaded)
|
||||||
@ -29,7 +29,7 @@ func (p Profile) SaveImage(img scraper.Image) error {
|
|||||||
//
|
//
|
||||||
// args:
|
// args:
|
||||||
// - img: the Video to save
|
// - img: the Video to save
|
||||||
func (p Profile) SaveVideo(vid scraper.Video) error {
|
func (p Profile) SaveVideo(vid Video) error {
|
||||||
_, err := p.DB.NamedExec(`
|
_, err := p.DB.NamedExec(`
|
||||||
insert into videos (id, tweet_id, width, height, remote_url, local_filename, thumbnail_remote_url, thumbnail_local_filename,
|
insert into videos (id, tweet_id, width, height, remote_url, local_filename, thumbnail_remote_url, thumbnail_local_filename,
|
||||||
duration, view_count, is_downloaded, is_blocked_by_dmca, is_gif)
|
duration, view_count, is_downloaded, is_blocked_by_dmca, is_gif)
|
||||||
@ -49,7 +49,7 @@ func (p Profile) SaveVideo(vid scraper.Video) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Save an Url
|
// Save an Url
|
||||||
func (p Profile) SaveUrl(url scraper.Url) error {
|
func (p Profile) SaveUrl(url Url) error {
|
||||||
_, err := p.DB.NamedExec(`
|
_, err := p.DB.NamedExec(`
|
||||||
insert into urls (tweet_id, domain, text, short_text, title, description, creator_id, site_id, thumbnail_width, thumbnail_height,
|
insert into urls (tweet_id, domain, text, short_text, title, description, creator_id, site_id, thumbnail_width, thumbnail_height,
|
||||||
thumbnail_remote_url, thumbnail_local_path, has_card, has_thumbnail, is_content_downloaded)
|
thumbnail_remote_url, thumbnail_local_path, has_card, has_thumbnail, is_content_downloaded)
|
||||||
@ -68,7 +68,7 @@ func (p Profile) SaveUrl(url scraper.Url) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Save a Poll
|
// Save a Poll
|
||||||
func (p Profile) SavePoll(poll scraper.Poll) error {
|
func (p Profile) SavePoll(poll Poll) error {
|
||||||
_, err := p.DB.NamedExec(`
|
_, err := p.DB.NamedExec(`
|
||||||
insert into polls (id, tweet_id, num_choices, choice1, choice1_votes, choice2, choice2_votes, choice3, choice3_votes, choice4,
|
insert into polls (id, tweet_id, num_choices, choice1, choice1_votes, choice2, choice2_votes, choice3, choice3_votes, choice4,
|
||||||
choice4_votes, voting_duration, voting_ends_at, last_scraped_at)
|
choice4_votes, voting_duration, voting_ends_at, last_scraped_at)
|
||||||
@ -90,7 +90,7 @@ func (p Profile) SavePoll(poll scraper.Poll) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Get the list of images for a tweet
|
// Get the list of images for a tweet
|
||||||
func (p Profile) GetImagesForTweet(t scraper.Tweet) (imgs []scraper.Image, err error) {
|
func (p Profile) GetImagesForTweet(t Tweet) (imgs []Image, err error) {
|
||||||
err = p.DB.Select(&imgs,
|
err = p.DB.Select(&imgs,
|
||||||
"select id, tweet_id, width, height, remote_url, local_filename, is_downloaded from images where tweet_id=?",
|
"select id, tweet_id, width, height, remote_url, local_filename, is_downloaded from images where tweet_id=?",
|
||||||
t.ID)
|
t.ID)
|
||||||
@ -98,7 +98,7 @@ func (p Profile) GetImagesForTweet(t scraper.Tweet) (imgs []scraper.Image, err e
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Get the list of videos for a tweet
|
// Get the list of videos for a tweet
|
||||||
func (p Profile) GetVideosForTweet(t scraper.Tweet) (vids []scraper.Video, err error) {
|
func (p Profile) GetVideosForTweet(t Tweet) (vids []Video, err error) {
|
||||||
err = p.DB.Select(&vids, `
|
err = p.DB.Select(&vids, `
|
||||||
select id, tweet_id, width, height, remote_url, local_filename, thumbnail_remote_url, thumbnail_local_filename, duration,
|
select id, tweet_id, width, height, remote_url, local_filename, thumbnail_remote_url, thumbnail_local_filename, duration,
|
||||||
view_count, is_downloaded, is_blocked_by_dmca, is_gif
|
view_count, is_downloaded, is_blocked_by_dmca, is_gif
|
||||||
@ -109,7 +109,7 @@ func (p Profile) GetVideosForTweet(t scraper.Tweet) (vids []scraper.Video, err e
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Get the list of Urls for a Tweet
|
// Get the list of Urls for a Tweet
|
||||||
func (p Profile) GetUrlsForTweet(t scraper.Tweet) (urls []scraper.Url, err error) {
|
func (p Profile) GetUrlsForTweet(t Tweet) (urls []Url, err error) {
|
||||||
err = p.DB.Select(&urls, `
|
err = p.DB.Select(&urls, `
|
||||||
select tweet_id, domain, text, short_text, title, description, creator_id, site_id, thumbnail_width, thumbnail_height,
|
select tweet_id, domain, text, short_text, title, description, creator_id, site_id, thumbnail_width, thumbnail_height,
|
||||||
thumbnail_remote_url, thumbnail_local_path, has_card, has_thumbnail, is_content_downloaded
|
thumbnail_remote_url, thumbnail_local_path, has_card, has_thumbnail, is_content_downloaded
|
||||||
@ -121,7 +121,7 @@ func (p Profile) GetUrlsForTweet(t scraper.Tweet) (urls []scraper.Url, err error
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Get the list of Polls for a Tweet
|
// Get the list of Polls for a Tweet
|
||||||
func (p Profile) GetPollsForTweet(t scraper.Tweet) (polls []scraper.Poll, err error) {
|
func (p Profile) GetPollsForTweet(t Tweet) (polls []Poll, err error) {
|
||||||
err = p.DB.Select(&polls, `
|
err = p.DB.Select(&polls, `
|
||||||
select id, tweet_id, num_choices, choice1, choice1_votes, choice2, choice2_votes, choice3, choice3_votes, choice4, choice4_votes,
|
select id, tweet_id, num_choices, choice1, choice1_votes, choice2, choice2_votes, choice3, choice3_votes, choice4, choice4_votes,
|
||||||
voting_duration, voting_ends_at, last_scraped_at
|
voting_duration, voting_ends_at, last_scraped_at
|
||||||
|
@ -8,7 +8,7 @@ import (
|
|||||||
"github.com/go-test/deep"
|
"github.com/go-test/deep"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Create an Image, save it, reload it, and make sure it comes back the same
|
// Create an Image, save it, reload it, and make sure it comes back the same
|
||||||
@ -31,7 +31,7 @@ func TestSaveAndLoadImage(t *testing.T) {
|
|||||||
imgs, err := profile.GetImagesForTweet(tweet)
|
imgs, err := profile.GetImagesForTweet(tweet)
|
||||||
require.NoError(err)
|
require.NoError(err)
|
||||||
|
|
||||||
var new_img scraper.Image
|
var new_img Image
|
||||||
for index := range imgs {
|
for index := range imgs {
|
||||||
if imgs[index].ID == img.ID {
|
if imgs[index].ID == img.ID {
|
||||||
new_img = imgs[index]
|
new_img = imgs[index]
|
||||||
@ -52,7 +52,7 @@ func TestModifyImage(t *testing.T) {
|
|||||||
tweet := create_stable_tweet()
|
tweet := create_stable_tweet()
|
||||||
img := tweet.Images[0]
|
img := tweet.Images[0]
|
||||||
|
|
||||||
require.Equal(scraper.ImageID(-1), img.ID, "Got the wrong image back")
|
require.Equal(ImageID(-1), img.ID, "Got the wrong image back")
|
||||||
|
|
||||||
img.IsDownloaded = true
|
img.IsDownloaded = true
|
||||||
|
|
||||||
@ -94,7 +94,7 @@ func TestSaveAndLoadVideo(t *testing.T) {
|
|||||||
vids, err := profile.GetVideosForTweet(tweet)
|
vids, err := profile.GetVideosForTweet(tweet)
|
||||||
require.NoError(err)
|
require.NoError(err)
|
||||||
|
|
||||||
var new_vid scraper.Video
|
var new_vid Video
|
||||||
for index := range vids {
|
for index := range vids {
|
||||||
if vids[index].ID == vid.ID {
|
if vids[index].ID == vid.ID {
|
||||||
new_vid = vids[index]
|
new_vid = vids[index]
|
||||||
@ -115,7 +115,7 @@ func TestModifyVideo(t *testing.T) {
|
|||||||
|
|
||||||
tweet := create_stable_tweet()
|
tweet := create_stable_tweet()
|
||||||
vid := tweet.Videos[0]
|
vid := tweet.Videos[0]
|
||||||
require.Equal(scraper.VideoID(-1), vid.ID, "Got the wrong video back")
|
require.Equal(VideoID(-1), vid.ID, "Got the wrong video back")
|
||||||
|
|
||||||
vid.IsDownloaded = true
|
vid.IsDownloaded = true
|
||||||
vid.IsBlockedByDMCA = true
|
vid.IsBlockedByDMCA = true
|
||||||
@ -157,7 +157,7 @@ func TestSaveAndLoadUrl(t *testing.T) {
|
|||||||
urls, err := profile.GetUrlsForTweet(tweet)
|
urls, err := profile.GetUrlsForTweet(tweet)
|
||||||
require.NoError(err)
|
require.NoError(err)
|
||||||
|
|
||||||
var new_url scraper.Url
|
var new_url Url
|
||||||
for index := range urls {
|
for index := range urls {
|
||||||
if urls[index].Text == url.Text {
|
if urls[index].Text == url.Text {
|
||||||
new_url = urls[index]
|
new_url = urls[index]
|
||||||
@ -218,7 +218,7 @@ func TestSaveAndLoadPoll(t *testing.T) {
|
|||||||
polls, err := profile.GetPollsForTweet(tweet)
|
polls, err := profile.GetPollsForTweet(tweet)
|
||||||
require.NoError(err)
|
require.NoError(err)
|
||||||
|
|
||||||
var new_poll scraper.Poll
|
var new_poll Poll
|
||||||
for index := range polls {
|
for index := range polls {
|
||||||
if polls[index].ID == poll.ID {
|
if polls[index].ID == poll.ID {
|
||||||
new_poll = polls[index]
|
new_poll = polls[index]
|
||||||
|
@ -4,6 +4,7 @@ import (
|
|||||||
"database/sql"
|
"database/sql"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -9,7 +9,7 @@ import (
|
|||||||
sql "github.com/jmoiron/sqlx"
|
sql "github.com/jmoiron/sqlx"
|
||||||
_ "github.com/mattn/go-sqlite3"
|
_ "github.com/mattn/go-sqlite3"
|
||||||
|
|
||||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||||
)
|
)
|
||||||
|
|
||||||
//go:embed schema.sql
|
//go:embed schema.sql
|
||||||
@ -125,14 +125,14 @@ func LoadProfile(profile_dir string) (Profile, error) {
|
|||||||
return ret, err
|
return ret, err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p Profile) ListSessions() []scraper.UserHandle {
|
func (p Profile) ListSessions() []UserHandle {
|
||||||
result, err := filepath.Glob(filepath.Join(p.ProfileDir, "*.session"))
|
result, err := filepath.Glob(filepath.Join(p.ProfileDir, "*.session"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
ret := []scraper.UserHandle{}
|
ret := []UserHandle{}
|
||||||
for _, filename := range result {
|
for _, filename := range result {
|
||||||
ret = append(ret, scraper.UserHandle(filepath.Base(filename[:len(filename)-len(".session")])))
|
ret = append(ret, UserHandle(filepath.Base(filename[:len(filename)-len(".session")])))
|
||||||
}
|
}
|
||||||
return ret
|
return ret
|
||||||
}
|
}
|
||||||
|
@ -3,11 +3,11 @@ package persistence
|
|||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Save a Retweet. Do nothing if it already exists, because none of its parameters are modifiable.
|
// Save a Retweet. Do nothing if it already exists, because none of its parameters are modifiable.
|
||||||
func (p Profile) SaveRetweet(r scraper.Retweet) error {
|
func (p Profile) SaveRetweet(r Retweet) error {
|
||||||
_, err := p.DB.NamedExec(`
|
_, err := p.DB.NamedExec(`
|
||||||
insert into retweets (retweet_id, tweet_id, retweeted_by, retweeted_at)
|
insert into retweets (retweet_id, tweet_id, retweeted_by, retweeted_at)
|
||||||
values (:retweet_id, :tweet_id, :retweeted_by, :retweeted_at)
|
values (:retweet_id, :tweet_id, :retweeted_by, :retweeted_at)
|
||||||
@ -22,8 +22,8 @@ func (p Profile) SaveRetweet(r scraper.Retweet) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Retrieve a Retweet by ID
|
// Retrieve a Retweet by ID
|
||||||
func (p Profile) GetRetweetById(id scraper.TweetID) (scraper.Retweet, error) {
|
func (p Profile) GetRetweetById(id TweetID) (Retweet, error) {
|
||||||
var r scraper.Retweet
|
var r Retweet
|
||||||
err := p.DB.Get(&r, `
|
err := p.DB.Get(&r, `
|
||||||
select retweet_id, tweet_id, retweeted_by, retweeted_at
|
select retweet_id, tweet_id, retweeted_by, retweeted_at
|
||||||
from retweets
|
from retweets
|
||||||
|
@ -2,13 +2,15 @@ package persistence
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
|
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||||
)
|
)
|
||||||
|
|
||||||
func (p Profile) SaveSession(api scraper.API) {
|
func (p Profile) SaveSession(api API) {
|
||||||
data, err := json.Marshal(api)
|
data, err := json.Marshal(api)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
@ -21,13 +23,13 @@ func (p Profile) SaveSession(api scraper.API) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p Profile) LoadSession(userhandle scraper.UserHandle) scraper.API {
|
func (p Profile) LoadSession(userhandle UserHandle) API {
|
||||||
data, err := os.ReadFile(p.ProfileDir + "/" + string(userhandle+".session"))
|
data, err := os.ReadFile(p.ProfileDir + "/" + string(userhandle+".session"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
var result scraper.API
|
var result API
|
||||||
err = json.Unmarshal(data, &result)
|
err = json.Unmarshal(data, &result)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
|
@ -2,7 +2,7 @@ package persistence_test
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
|
||||||
"math/rand"
|
"math/rand"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/http/cookiejar"
|
"net/http/cookiejar"
|
||||||
@ -10,6 +10,8 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/go-test/deep"
|
"github.com/go-test/deep"
|
||||||
|
|
||||||
|
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Save and load an API session; it should come back the same
|
// Save and load an API session; it should come back the same
|
||||||
@ -22,7 +24,7 @@ func TestSaveAndLoadAuthenticatedSession(t *testing.T) {
|
|||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
api := scraper.API{
|
api := API{
|
||||||
UserHandle: "testUser",
|
UserHandle: "testUser",
|
||||||
IsAuthenticated: true,
|
IsAuthenticated: true,
|
||||||
Client: http.Client{
|
Client: http.Client{
|
||||||
|
@ -4,16 +4,17 @@ import (
|
|||||||
"database/sql"
|
"database/sql"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
|
||||||
|
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||||
)
|
)
|
||||||
|
|
||||||
type SpaceParticipant struct {
|
type SpaceParticipant struct {
|
||||||
UserID scraper.UserID `db:"user_id"`
|
UserID UserID `db:"user_id"`
|
||||||
SpaceID scraper.SpaceID `db:"space_id"`
|
SpaceID SpaceID `db:"space_id"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// Save a Space
|
// Save a Space
|
||||||
func (p Profile) SaveSpace(s scraper.Space) error {
|
func (p Profile) SaveSpace(s Space) error {
|
||||||
_, err := p.DB.NamedExec(`
|
_, err := p.DB.NamedExec(`
|
||||||
insert into spaces (id, created_by_id, short_url, state, title, created_at, started_at, ended_at, updated_at,
|
insert into spaces (id, created_by_id, short_url, state, title, created_at, started_at, ended_at, updated_at,
|
||||||
is_available_for_replay, replay_watch_count, live_listeners_count, is_details_fetched)
|
is_available_for_replay, replay_watch_count, live_listeners_count, is_details_fetched)
|
||||||
@ -52,7 +53,7 @@ func (p Profile) SaveSpace(s scraper.Space) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Get a Space by ID
|
// Get a Space by ID
|
||||||
func (p Profile) GetSpaceById(id scraper.SpaceID) (space scraper.Space, err error) {
|
func (p Profile) GetSpaceById(id SpaceID) (space Space, err error) {
|
||||||
err = p.DB.Get(&space,
|
err = p.DB.Get(&space,
|
||||||
`select id, ifnull(created_by_id, 0) created_by_id, short_url, state, title, created_at, started_at, ended_at, updated_at,
|
`select id, ifnull(created_by_id, 0) created_by_id, short_url, state, title, created_at, started_at, ended_at, updated_at,
|
||||||
is_available_for_replay, replay_watch_count, live_listeners_count, is_details_fetched
|
is_available_for_replay, replay_watch_count, live_listeners_count, is_details_fetched
|
||||||
@ -61,7 +62,7 @@ func (p Profile) GetSpaceById(id scraper.SpaceID) (space scraper.Space, err erro
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
space.ParticipantIds = []scraper.UserID{}
|
space.ParticipantIds = []UserID{}
|
||||||
rows, err := p.DB.Query(`select user_id from space_participants where space_id = ?`, id)
|
rows, err := p.DB.Query(`select user_id from space_participants where space_id = ?`, id)
|
||||||
if errors.Is(err, sql.ErrNoRows) {
|
if errors.Is(err, sql.ErrNoRows) {
|
||||||
err = nil
|
err = nil
|
||||||
@ -70,7 +71,7 @@ func (p Profile) GetSpaceById(id scraper.SpaceID) (space scraper.Space, err erro
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
var participant_id scraper.UserID
|
var participant_id UserID
|
||||||
for rows.Next() {
|
for rows.Next() {
|
||||||
err = rows.Scan(&participant_id)
|
err = rows.Scan(&participant_id)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -5,10 +5,11 @@ import (
|
|||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
|
||||||
|
|
||||||
"github.com/go-test/deep"
|
"github.com/go-test/deep"
|
||||||
"math/rand"
|
"math/rand"
|
||||||
|
|
||||||
|
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Create a Space, save it, reload it, and make sure it comes back the same
|
// Create a Space, save it, reload it, and make sure it comes back the same
|
||||||
@ -41,8 +42,8 @@ func TestModifySpace(t *testing.T) {
|
|||||||
|
|
||||||
// Modify and save
|
// Modify and save
|
||||||
space.State = "Some other state"
|
space.State = "Some other state"
|
||||||
space.UpdatedAt = scraper.TimestampFromUnix(9001)
|
space.UpdatedAt = TimestampFromUnix(9001)
|
||||||
space.EndedAt = scraper.TimestampFromUnix(10001)
|
space.EndedAt = TimestampFromUnix(10001)
|
||||||
space.ReplayWatchCount = 100
|
space.ReplayWatchCount = 100
|
||||||
space.LiveListenersCount = 50
|
space.LiveListenersCount = 50
|
||||||
space.IsDetailsFetched = true
|
space.IsDetailsFetched = true
|
||||||
@ -51,8 +52,8 @@ func TestModifySpace(t *testing.T) {
|
|||||||
|
|
||||||
new_space, err := profile.GetSpaceById(space.ID)
|
new_space, err := profile.GetSpaceById(space.ID)
|
||||||
require.NoError(err)
|
require.NoError(err)
|
||||||
assert.Equal(scraper.TimestampFromUnix(9001), new_space.UpdatedAt)
|
assert.Equal(TimestampFromUnix(9001), new_space.UpdatedAt)
|
||||||
assert.Equal(scraper.TimestampFromUnix(10001), new_space.EndedAt)
|
assert.Equal(TimestampFromUnix(10001), new_space.EndedAt)
|
||||||
assert.Equal(100, new_space.ReplayWatchCount)
|
assert.Equal(100, new_space.ReplayWatchCount)
|
||||||
assert.Equal(50, new_space.LiveListenersCount)
|
assert.Equal(50, new_space.LiveListenersCount)
|
||||||
assert.True(new_space.IsDetailsFetched)
|
assert.True(new_space.IsDetailsFetched)
|
||||||
@ -68,9 +69,9 @@ func TestNoWorseningSpace(t *testing.T) {
|
|||||||
space.ShortUrl = "Some Short Url"
|
space.ShortUrl = "Some Short Url"
|
||||||
space.State = "Some State"
|
space.State = "Some State"
|
||||||
space.Title = "Debating Somebody"
|
space.Title = "Debating Somebody"
|
||||||
space.CreatedAt = scraper.TimestampFromUnix(1000)
|
space.CreatedAt = TimestampFromUnix(1000)
|
||||||
space.UpdatedAt = scraper.TimestampFromUnix(2000)
|
space.UpdatedAt = TimestampFromUnix(2000)
|
||||||
space.CreatedById = scraper.UserID(-1)
|
space.CreatedById = UserID(-1)
|
||||||
space.LiveListenersCount = 100
|
space.LiveListenersCount = 100
|
||||||
space.IsDetailsFetched = true
|
space.IsDetailsFetched = true
|
||||||
|
|
||||||
@ -82,9 +83,9 @@ func TestNoWorseningSpace(t *testing.T) {
|
|||||||
space.ShortUrl = ""
|
space.ShortUrl = ""
|
||||||
space.Title = ""
|
space.Title = ""
|
||||||
space.State = ""
|
space.State = ""
|
||||||
space.CreatedAt = scraper.TimestampFromUnix(0)
|
space.CreatedAt = TimestampFromUnix(0)
|
||||||
space.UpdatedAt = scraper.TimestampFromUnix(0)
|
space.UpdatedAt = TimestampFromUnix(0)
|
||||||
space.CreatedById = scraper.UserID(0)
|
space.CreatedById = UserID(0)
|
||||||
space.LiveListenersCount = 0
|
space.LiveListenersCount = 0
|
||||||
space.IsDetailsFetched = false
|
space.IsDetailsFetched = false
|
||||||
err = profile.SaveSpace(space)
|
err = profile.SaveSpace(space)
|
||||||
@ -97,9 +98,9 @@ func TestNoWorseningSpace(t *testing.T) {
|
|||||||
assert.Equal(new_space.ShortUrl, "Some Short Url")
|
assert.Equal(new_space.ShortUrl, "Some Short Url")
|
||||||
assert.Equal(new_space.State, "Some State")
|
assert.Equal(new_space.State, "Some State")
|
||||||
assert.Equal(new_space.Title, "Debating Somebody")
|
assert.Equal(new_space.Title, "Debating Somebody")
|
||||||
assert.Equal(new_space.CreatedAt, scraper.TimestampFromUnix(1000))
|
assert.Equal(new_space.CreatedAt, TimestampFromUnix(1000))
|
||||||
assert.Equal(new_space.UpdatedAt, scraper.TimestampFromUnix(2000))
|
assert.Equal(new_space.UpdatedAt, TimestampFromUnix(2000))
|
||||||
assert.Equal(new_space.CreatedById, scraper.UserID(-1))
|
assert.Equal(new_space.CreatedById, UserID(-1))
|
||||||
assert.Equal(new_space.LiveListenersCount, 100)
|
assert.Equal(new_space.LiveListenersCount, 100)
|
||||||
assert.True(new_space.IsDetailsFetched)
|
assert.True(new_space.IsDetailsFetched)
|
||||||
}
|
}
|
||||||
|
@ -9,7 +9,7 @@ import (
|
|||||||
"github.com/go-test/deep"
|
"github.com/go-test/deep"
|
||||||
|
|
||||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/persistence"
|
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/persistence"
|
||||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Create a Tweet, save it, reload it, and make sure it comes back the same
|
// Create a Tweet, save it, reload it, and make sure it comes back the same
|
||||||
@ -71,7 +71,7 @@ func TestNoWorseningTweet(t *testing.T) {
|
|||||||
tweet.IsStub = false
|
tweet.IsStub = false
|
||||||
tweet.IsConversationScraped = true
|
tweet.IsConversationScraped = true
|
||||||
tweet.IsExpandable = true
|
tweet.IsExpandable = true
|
||||||
tweet.LastScrapedAt = scraper.TimestampFromUnix(1000)
|
tweet.LastScrapedAt = TimestampFromUnix(1000)
|
||||||
tweet.Text = "Yes text"
|
tweet.Text = "Yes text"
|
||||||
tweet.NumLikes = 10
|
tweet.NumLikes = 10
|
||||||
tweet.NumRetweets = 11
|
tweet.NumRetweets = 11
|
||||||
@ -87,7 +87,7 @@ func TestNoWorseningTweet(t *testing.T) {
|
|||||||
tweet.IsStub = true
|
tweet.IsStub = true
|
||||||
tweet.IsConversationScraped = false
|
tweet.IsConversationScraped = false
|
||||||
tweet.IsExpandable = false
|
tweet.IsExpandable = false
|
||||||
tweet.LastScrapedAt = scraper.TimestampFromUnix(500)
|
tweet.LastScrapedAt = TimestampFromUnix(500)
|
||||||
tweet.Text = ""
|
tweet.Text = ""
|
||||||
err = profile.SaveTweet(tweet)
|
err = profile.SaveTweet(tweet)
|
||||||
require.NoError(err)
|
require.NoError(err)
|
||||||
@ -231,7 +231,7 @@ func TestModifyTweet(t *testing.T) {
|
|||||||
tweet.IsStub = true
|
tweet.IsStub = true
|
||||||
tweet.IsContentDownloaded = false
|
tweet.IsContentDownloaded = false
|
||||||
tweet.IsConversationScraped = false
|
tweet.IsConversationScraped = false
|
||||||
tweet.LastScrapedAt = scraper.TimestampFromUnix(1000)
|
tweet.LastScrapedAt = TimestampFromUnix(1000)
|
||||||
|
|
||||||
err := profile.SaveTweet(tweet)
|
err := profile.SaveTweet(tweet)
|
||||||
require.NoError(err)
|
require.NoError(err)
|
||||||
@ -243,7 +243,7 @@ func TestModifyTweet(t *testing.T) {
|
|||||||
tweet.IsStub = false
|
tweet.IsStub = false
|
||||||
tweet.IsContentDownloaded = true
|
tweet.IsContentDownloaded = true
|
||||||
tweet.IsConversationScraped = true
|
tweet.IsConversationScraped = true
|
||||||
tweet.LastScrapedAt = scraper.TimestampFromUnix(2000)
|
tweet.LastScrapedAt = TimestampFromUnix(2000)
|
||||||
tweet.TombstoneType = "deleted"
|
tweet.TombstoneType = "deleted"
|
||||||
|
|
||||||
err = profile.SaveTweet(tweet)
|
err = profile.SaveTweet(tweet)
|
||||||
@ -332,7 +332,7 @@ func TestLoadMissingTweet(t *testing.T) {
|
|||||||
profile_path := "test_profiles/TestTweetQueries"
|
profile_path := "test_profiles/TestTweetQueries"
|
||||||
profile := create_or_load_profile(profile_path)
|
profile := create_or_load_profile(profile_path)
|
||||||
|
|
||||||
_, err := profile.GetTweetById(scraper.TweetID(6234234)) // Random number
|
_, err := profile.GetTweetById(TweetID(6234234)) // Random number
|
||||||
require.Error(t, err)
|
require.Error(t, err)
|
||||||
assert.ErrorIs(t, err, persistence.ErrNotInDatabase)
|
assert.ErrorIs(t, err, persistence.ErrNotInDatabase)
|
||||||
}
|
}
|
||||||
|
@ -5,6 +5,8 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"path"
|
"path"
|
||||||
|
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -18,7 +20,7 @@ func (p Profile) SaveTweetTrove(trove TweetTrove, should_download bool, api *API
|
|||||||
// who were marked as deleted, and then let the callee re-scrape and re-save them.
|
// who were marked as deleted, and then let the callee re-scrape and re-save them.
|
||||||
var conflict_err ErrConflictingUserHandle
|
var conflict_err ErrConflictingUserHandle
|
||||||
if errors.As(err, &conflict_err) {
|
if errors.As(err, &conflict_err) {
|
||||||
fmt.Printf(
|
log.Warnf(
|
||||||
"Conflicting user handle found (ID %d); old user has been marked deleted. Rescraping them\n",
|
"Conflicting user handle found (ID %d); old user has been marked deleted. Rescraping them\n",
|
||||||
conflict_err.ConflictingUserID,
|
conflict_err.ConflictingUserID,
|
||||||
)
|
)
|
||||||
|
@ -9,11 +9,11 @@ import (
|
|||||||
"github.com/jmoiron/sqlx"
|
"github.com/jmoiron/sqlx"
|
||||||
"github.com/mattn/go-sqlite3"
|
"github.com/mattn/go-sqlite3"
|
||||||
|
|
||||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||||
)
|
)
|
||||||
|
|
||||||
type ErrConflictingUserHandle struct {
|
type ErrConflictingUserHandle struct {
|
||||||
ConflictingUserID scraper.UserID
|
ConflictingUserID UserID
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e ErrConflictingUserHandle) Error() string {
|
func (e ErrConflictingUserHandle) Error() string {
|
||||||
@ -46,7 +46,7 @@ const USERS_ALL_SQL_FIELDS = `
|
|||||||
// 3. Mark the old user as deactivated, eliminating the conflict
|
// 3. Mark the old user as deactivated, eliminating the conflict
|
||||||
// 4. Re-save the new user
|
// 4. Re-save the new user
|
||||||
// 5. Return an ErrConflictingUserHandle, notifying the caller of the conflict
|
// 5. Return an ErrConflictingUserHandle, notifying the caller of the conflict
|
||||||
func (p Profile) SaveUser(u *scraper.User) error {
|
func (p Profile) SaveUser(u *User) error {
|
||||||
// First, check if the user needs a fake ID, and generate one if needed
|
// First, check if the user needs a fake ID, and generate one if needed
|
||||||
if u.IsNeedingFakeID {
|
if u.IsNeedingFakeID {
|
||||||
// User is fake; check if we already have them, in order to proceed
|
// User is fake; check if we already have them, in order to proceed
|
||||||
@ -70,7 +70,7 @@ func (p Profile) SaveUser(u *scraper.User) error {
|
|||||||
// We know the UNIQUE violation must be on `handle`, because we checked for users with this ID
|
// We know the UNIQUE violation must be on `handle`, because we checked for users with this ID
|
||||||
// above (`update` query).
|
// above (`update` query).
|
||||||
handle_conflict := func() error {
|
handle_conflict := func() error {
|
||||||
var old_user scraper.User
|
var old_user User
|
||||||
err := p.DB.Get(&old_user,
|
err := p.DB.Get(&old_user,
|
||||||
`select id, is_id_fake from users where handle = ? and is_banned = 0 and is_deleted = 0`,
|
`select id, is_id_fake from users where handle = ? and is_banned = 0 and is_deleted = 0`,
|
||||||
u.Handle,
|
u.Handle,
|
||||||
@ -189,10 +189,10 @@ func (p Profile) SaveUser(u *scraper.User) error {
|
|||||||
//
|
//
|
||||||
// returns:
|
// returns:
|
||||||
// - the User, if it exists
|
// - the User, if it exists
|
||||||
func (p Profile) GetUserByHandle(handle scraper.UserHandle) (scraper.User, error) {
|
func (p Profile) GetUserByHandle(handle UserHandle) (User, error) {
|
||||||
db := p.DB
|
db := p.DB
|
||||||
|
|
||||||
var ret scraper.User
|
var ret User
|
||||||
err := db.Get(&ret, `
|
err := db.Get(&ret, `
|
||||||
select `+USERS_ALL_SQL_FIELDS+`
|
select `+USERS_ALL_SQL_FIELDS+`
|
||||||
from users_by_handle
|
from users_by_handle
|
||||||
@ -212,10 +212,10 @@ func (p Profile) GetUserByHandle(handle scraper.UserHandle) (scraper.User, error
|
|||||||
//
|
//
|
||||||
// returns:
|
// returns:
|
||||||
// - the User, if it exists
|
// - the User, if it exists
|
||||||
func (p Profile) GetUserByID(id scraper.UserID) (scraper.User, error) {
|
func (p Profile) GetUserByID(id UserID) (User, error) {
|
||||||
db := p.DB
|
db := p.DB
|
||||||
|
|
||||||
var ret scraper.User
|
var ret User
|
||||||
|
|
||||||
err := db.Get(&ret, `
|
err := db.Get(&ret, `
|
||||||
select `+USERS_ALL_SQL_FIELDS+`
|
select `+USERS_ALL_SQL_FIELDS+`
|
||||||
@ -243,7 +243,7 @@ func (p Profile) GetUserByID(id scraper.UserID) (scraper.User, error) {
|
|||||||
//
|
//
|
||||||
// The `user` object will always have `is_content_downloaded` = false on every scrape. This is
|
// The `user` object will always have `is_content_downloaded` = false on every scrape. This is
|
||||||
// why the No Worsening Principle is needed.
|
// why the No Worsening Principle is needed.
|
||||||
func (p Profile) CheckUserContentDownloadNeeded(user scraper.User) bool {
|
func (p Profile) CheckUserContentDownloadNeeded(user User) bool {
|
||||||
row := p.DB.QueryRow(`select is_content_downloaded, profile_image_url, banner_image_url from users where id = ?`, user.ID)
|
row := p.DB.QueryRow(`select is_content_downloaded, profile_image_url, banner_image_url from users where id = ?`, user.ID)
|
||||||
|
|
||||||
var is_content_downloaded bool
|
var is_content_downloaded bool
|
||||||
@ -271,7 +271,7 @@ func (p Profile) CheckUserContentDownloadNeeded(user scraper.User) bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Follow / unfollow a user. Update the given User object's IsFollowed field.
|
// Follow / unfollow a user. Update the given User object's IsFollowed field.
|
||||||
func (p Profile) SetUserFollowed(user *scraper.User, is_followed bool) {
|
func (p Profile) SetUserFollowed(user *User, is_followed bool) {
|
||||||
result, err := p.DB.Exec("update users set is_followed = ? where id = ?", is_followed, user.ID)
|
result, err := p.DB.Exec("update users set is_followed = ? where id = ?", is_followed, user.ID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(fmt.Errorf("Error inserting user with handle %q:\n %w", user.Handle, err))
|
panic(fmt.Errorf("Error inserting user with handle %q:\n %w", user.Handle, err))
|
||||||
@ -286,12 +286,12 @@ func (p Profile) SetUserFollowed(user *scraper.User, is_followed bool) {
|
|||||||
user.IsFollowed = is_followed
|
user.IsFollowed = is_followed
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p Profile) NextFakeUserID() scraper.UserID {
|
func (p Profile) NextFakeUserID() UserID {
|
||||||
_, err := p.DB.Exec("update fake_user_sequence set latest_fake_id = latest_fake_id + 1")
|
_, err := p.DB.Exec("update fake_user_sequence set latest_fake_id = latest_fake_id + 1")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
var ret scraper.UserID
|
var ret UserID
|
||||||
err = p.DB.QueryRow("select latest_fake_id from fake_user_sequence").Scan(&ret)
|
err = p.DB.QueryRow("select latest_fake_id from fake_user_sequence").Scan(&ret)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
@ -301,7 +301,7 @@ func (p Profile) NextFakeUserID() scraper.UserID {
|
|||||||
|
|
||||||
// TODO: This is only used in checking whether the media downloader should get the big or small version of
|
// TODO: This is only used in checking whether the media downloader should get the big or small version of
|
||||||
// a profile image. That should be rewritten
|
// a profile image. That should be rewritten
|
||||||
func (p Profile) IsFollowing(user scraper.User) bool {
|
func (p Profile) IsFollowing(user User) bool {
|
||||||
row := p.DB.QueryRow("select is_followed from users where id like ?", user.ID)
|
row := p.DB.QueryRow("select is_followed from users where id like ?", user.ID)
|
||||||
var ret bool
|
var ret bool
|
||||||
err := row.Scan(&ret)
|
err := row.Scan(&ret)
|
||||||
@ -315,21 +315,21 @@ func (p Profile) IsFollowing(user scraper.User) bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Utility function to compute the path to save banner image to
|
// Utility function to compute the path to save banner image to
|
||||||
func (p Profile) get_banner_image_output_path(u scraper.User) string {
|
func (p Profile) get_banner_image_output_path(u User) string {
|
||||||
return path.Join(p.ProfileDir, "profile_images", u.BannerImageLocalPath)
|
return path.Join(p.ProfileDir, "profile_images", u.BannerImageLocalPath)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Utility function to compute the path to save profile image to
|
// Utility function to compute the path to save profile image to
|
||||||
func (p Profile) get_profile_image_output_path(u scraper.User) string {
|
func (p Profile) get_profile_image_output_path(u User) string {
|
||||||
if u.ProfileImageUrl == "" {
|
if u.ProfileImageUrl == "" {
|
||||||
return path.Join(p.ProfileDir, "profile_images", path.Base(scraper.DEFAULT_PROFILE_IMAGE_URL))
|
return path.Join(p.ProfileDir, "profile_images", path.Base(DEFAULT_PROFILE_IMAGE_URL))
|
||||||
}
|
}
|
||||||
return path.Join(p.ProfileDir, "profile_images", u.ProfileImageLocalPath)
|
return path.Join(p.ProfileDir, "profile_images", u.ProfileImageLocalPath)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Do a text search for users
|
// Do a text search for users
|
||||||
func (p Profile) SearchUsers(s string) []scraper.User {
|
func (p Profile) SearchUsers(s string) []User {
|
||||||
var ret []scraper.User
|
var ret []User
|
||||||
q, args, err := sqlx.Named(`
|
q, args, err := sqlx.Named(`
|
||||||
select `+USERS_ALL_SQL_FIELDS+`
|
select `+USERS_ALL_SQL_FIELDS+`
|
||||||
from users
|
from users
|
||||||
|
@ -12,7 +12,7 @@ import (
|
|||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/persistence"
|
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/persistence"
|
||||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Create a user, save it, reload it, and make sure it comes back the same
|
// Create a user, save it, reload it, and make sure it comes back the same
|
||||||
@ -53,19 +53,19 @@ func TestModifyUser(t *testing.T) {
|
|||||||
user := create_dummy_user()
|
user := create_dummy_user()
|
||||||
user.DisplayName = "Display Name 1"
|
user.DisplayName = "Display Name 1"
|
||||||
user.Location = "location1"
|
user.Location = "location1"
|
||||||
user.Handle = scraper.UserHandle(fmt.Sprintf("handle %d", rand.Int31()))
|
user.Handle = UserHandle(fmt.Sprintf("handle %d", rand.Int31()))
|
||||||
user.IsPrivate = false
|
user.IsPrivate = false
|
||||||
user.IsVerified = false
|
user.IsVerified = false
|
||||||
user.FollowersCount = 1000
|
user.FollowersCount = 1000
|
||||||
user.JoinDate = scraper.TimestampFromUnix(1000)
|
user.JoinDate = TimestampFromUnix(1000)
|
||||||
user.ProfileImageUrl = "asdf"
|
user.ProfileImageUrl = "asdf"
|
||||||
user.IsContentDownloaded = true
|
user.IsContentDownloaded = true
|
||||||
|
|
||||||
// Save the user so it can be modified
|
// Save the user for the first time; should do insert
|
||||||
err := profile.SaveUser(&user)
|
err := profile.SaveUser(&user)
|
||||||
require.NoError(err)
|
require.NoError(err)
|
||||||
|
|
||||||
new_handle := scraper.UserHandle(fmt.Sprintf("handle %d", rand.Int31()))
|
new_handle := UserHandle(fmt.Sprintf("handle %d", rand.Int31()))
|
||||||
|
|
||||||
user.DisplayName = "Display Name 2"
|
user.DisplayName = "Display Name 2"
|
||||||
user.Location = "location2"
|
user.Location = "location2"
|
||||||
@ -73,11 +73,11 @@ func TestModifyUser(t *testing.T) {
|
|||||||
user.IsPrivate = true
|
user.IsPrivate = true
|
||||||
user.IsVerified = true
|
user.IsVerified = true
|
||||||
user.FollowersCount = 2000
|
user.FollowersCount = 2000
|
||||||
user.JoinDate = scraper.TimestampFromUnix(2000)
|
user.JoinDate = TimestampFromUnix(2000)
|
||||||
user.ProfileImageUrl = "asdf2"
|
user.ProfileImageUrl = "asdf2"
|
||||||
user.IsContentDownloaded = false // test No Worsening
|
user.IsContentDownloaded = false // test No Worsening
|
||||||
|
|
||||||
// Save the modified user
|
// Save the user for the second time; should do update
|
||||||
err = profile.SaveUser(&user)
|
err = profile.SaveUser(&user)
|
||||||
require.NoError(err)
|
require.NoError(err)
|
||||||
|
|
||||||
@ -107,9 +107,9 @@ func TestSetUserBannedDeleted(t *testing.T) {
|
|||||||
user.DisplayName = "Display Name 1"
|
user.DisplayName = "Display Name 1"
|
||||||
user.Location = "location1"
|
user.Location = "location1"
|
||||||
user.Bio = "Some Bio"
|
user.Bio = "Some Bio"
|
||||||
user.Handle = scraper.UserHandle(fmt.Sprintf("handle %d", rand.Int31()))
|
user.Handle = UserHandle(fmt.Sprintf("handle %d", rand.Int31()))
|
||||||
user.FollowersCount = 1000
|
user.FollowersCount = 1000
|
||||||
user.JoinDate = scraper.TimestampFromUnix(1000)
|
user.JoinDate = TimestampFromUnix(1000)
|
||||||
user.ProfileImageUrl = "asdf"
|
user.ProfileImageUrl = "asdf"
|
||||||
user.IsContentDownloaded = true
|
user.IsContentDownloaded = true
|
||||||
|
|
||||||
@ -118,7 +118,7 @@ func TestSetUserBannedDeleted(t *testing.T) {
|
|||||||
require.NoError(err)
|
require.NoError(err)
|
||||||
|
|
||||||
// Now the user deactivates
|
// Now the user deactivates
|
||||||
err = profile.SaveUser(&scraper.User{ID: user.ID, IsDeleted: true})
|
err = profile.SaveUser(&User{ID: user.ID, IsDeleted: true})
|
||||||
require.NoError(err)
|
require.NoError(err)
|
||||||
// Reload the modified user
|
// Reload the modified user
|
||||||
new_user, err := profile.GetUserByID(user.ID)
|
new_user, err := profile.GetUserByID(user.ID)
|
||||||
@ -141,9 +141,9 @@ func TestSaveAndLoadBannedDeletedUser(t *testing.T) {
|
|||||||
profile_path := "test_profiles/TestUserQueries"
|
profile_path := "test_profiles/TestUserQueries"
|
||||||
profile := create_or_load_profile(profile_path)
|
profile := create_or_load_profile(profile_path)
|
||||||
|
|
||||||
user := scraper.User{
|
user := User{
|
||||||
ID: scraper.UserID(rand.Int31()),
|
ID: UserID(rand.Int31()),
|
||||||
Handle: scraper.UserHandle(fmt.Sprintf("handle-%d", rand.Int31())),
|
Handle: UserHandle(fmt.Sprintf("handle-%d", rand.Int31())),
|
||||||
IsBanned: true,
|
IsBanned: true,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -365,20 +365,20 @@ func TestCreateUnknownUserWithHandle(t *testing.T) {
|
|||||||
|
|
||||||
next_id := profile.NextFakeUserID()
|
next_id := profile.NextFakeUserID()
|
||||||
|
|
||||||
handle := scraper.UserHandle(fmt.Sprintf("UnknownUser%d", rand.Int31()))
|
handle := UserHandle(fmt.Sprintf("UnknownUser%d", rand.Int31()))
|
||||||
user := scraper.GetUnknownUserWithHandle(handle)
|
user := GetUnknownUserWithHandle(handle)
|
||||||
assert.Equal(scraper.UserID(0), user.ID)
|
assert.Equal(UserID(0), user.ID)
|
||||||
assert.True(user.IsIdFake)
|
assert.True(user.IsIdFake)
|
||||||
|
|
||||||
err := profile.SaveUser(&user)
|
err := profile.SaveUser(&user)
|
||||||
assert.NoError(err)
|
assert.NoError(err)
|
||||||
assert.Equal(scraper.UserID(next_id+1), user.ID)
|
assert.Equal(UserID(next_id+1), user.ID)
|
||||||
|
|
||||||
// Ensure the change was persisted
|
// Ensure the change was persisted
|
||||||
user_reloaded, err := profile.GetUserByHandle(user.Handle)
|
user_reloaded, err := profile.GetUserByHandle(user.Handle)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
assert.Equal(handle, user_reloaded.Handle) // Verify it's the same user
|
assert.Equal(handle, user_reloaded.Handle) // Verify it's the same user
|
||||||
assert.Equal(scraper.UserID(next_id+1), user_reloaded.ID)
|
assert.Equal(UserID(next_id+1), user_reloaded.ID)
|
||||||
|
|
||||||
// Why not tack this test on here: make sure NextFakeUserID works as expected
|
// Why not tack this test on here: make sure NextFakeUserID works as expected
|
||||||
assert.Equal(next_id+2, profile.NextFakeUserID())
|
assert.Equal(next_id+2, profile.NextFakeUserID())
|
||||||
@ -393,8 +393,8 @@ func TestCreateUnknownUserWithHandleThatAlreadyExists(t *testing.T) {
|
|||||||
|
|
||||||
user := create_stable_user()
|
user := create_stable_user()
|
||||||
|
|
||||||
unknown_user := scraper.GetUnknownUserWithHandle(user.Handle)
|
unknown_user := GetUnknownUserWithHandle(user.Handle)
|
||||||
assert.Equal(scraper.UserID(0), unknown_user.ID)
|
assert.Equal(UserID(0), unknown_user.ID)
|
||||||
|
|
||||||
err := profile.SaveUser(&unknown_user)
|
err := profile.SaveUser(&unknown_user)
|
||||||
assert.NoError(err)
|
assert.NoError(err)
|
||||||
@ -417,6 +417,6 @@ func TestSearchUsers(t *testing.T) {
|
|||||||
|
|
||||||
users := profile.SearchUsers("no")
|
users := profile.SearchUsers("no")
|
||||||
assert.Len(users, 2)
|
assert.Len(users, 2)
|
||||||
assert.Equal(users[0].Handle, scraper.UserHandle("Cernovich"))
|
assert.Equal(users[0].Handle, UserHandle("Cernovich"))
|
||||||
assert.Equal(users[1].Handle, scraper.UserHandle("CovfefeAnon"))
|
assert.Equal(users[1].Handle, UserHandle("CovfefeAnon"))
|
||||||
}
|
}
|
||||||
|
@ -8,7 +8,7 @@ import (
|
|||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/persistence"
|
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/persistence"
|
||||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestVersionUpgrade(t *testing.T) {
|
func TestVersionUpgrade(t *testing.T) {
|
||||||
@ -21,7 +21,7 @@ func TestVersionUpgrade(t *testing.T) {
|
|||||||
profile := create_or_load_profile(profile_path)
|
profile := create_or_load_profile(profile_path)
|
||||||
|
|
||||||
test_migration := "insert into tweets (id, user_id, text) values (21250554358298342, -1, 'awefjk')"
|
test_migration := "insert into tweets (id, user_id, text) values (21250554358298342, -1, 'awefjk')"
|
||||||
test_tweet_id := scraper.TweetID(21250554358298342)
|
test_tweet_id := TweetID(21250554358298342)
|
||||||
|
|
||||||
require.False(profile.IsTweetInDatabase(test_tweet_id), "Test tweet shouldn't be in db yet")
|
require.False(profile.IsTweetInDatabase(test_tweet_id), "Test tweet shouldn't be in db yet")
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user