Add persistence module
This commit is contained in:
parent
12b1aeaccd
commit
545346e635
24
.build.yml
24
.build.yml
@ -23,10 +23,16 @@ tasks:
|
||||
duration=$SECONDS
|
||||
echo "Task completed in $(($duration / 60))m$(($duration % 60))s."
|
||||
|
||||
- run_tests: |
|
||||
- test_scraper: |
|
||||
cd twitter_offline_engine/scraper
|
||||
|
||||
go test -bench=.
|
||||
go test -bench=. -cover
|
||||
|
||||
- test_persistence: |
|
||||
cd twitter_offline_engine/persistence
|
||||
|
||||
mkdir test_profiles/
|
||||
go test -bench=. -cover
|
||||
|
||||
- install_golangci-lint: |
|
||||
SECONDS=0
|
||||
@ -37,7 +43,7 @@ tasks:
|
||||
duration=$SECONDS
|
||||
echo "Task completed in $(($duration / 60))m$(($duration % 60))s."
|
||||
|
||||
- run_lint: |
|
||||
- lint_scraper: |
|
||||
SECONDS=0
|
||||
|
||||
cd twitter_offline_engine/scraper
|
||||
@ -48,3 +54,15 @@ tasks:
|
||||
|
||||
duration=$SECONDS
|
||||
echo "Task completed in $(($duration / 60))m$(($duration % 60))s."
|
||||
|
||||
- lint_persistence: |
|
||||
SECONDS=0
|
||||
|
||||
cd twitter_offline_engine/persistence
|
||||
golangci-lint run
|
||||
|
||||
cd ../cmd
|
||||
golangci-lint run
|
||||
|
||||
duration=$SECONDS
|
||||
echo "Task completed in $(($duration / 60))m$(($duration % 60))s."
|
||||
|
1
persistence/.gitignore
vendored
Normal file
1
persistence/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
test_profiles
|
143
persistence/profile.go
Normal file
143
persistence/profile.go
Normal file
@ -0,0 +1,143 @@
|
||||
package persistence
|
||||
|
||||
import (
|
||||
_ "embed"
|
||||
"fmt"
|
||||
"os"
|
||||
"path"
|
||||
"database/sql"
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
"gopkg.in/yaml.v2"
|
||||
|
||||
"offline_twitter/scraper"
|
||||
)
|
||||
|
||||
//go:embed schema.sql
|
||||
var sql_init string
|
||||
|
||||
type Settings struct {}
|
||||
|
||||
type Profile struct {
|
||||
ProfileDir string
|
||||
UsersList []scraper.UserHandle
|
||||
Settings Settings
|
||||
DB *sql.DB
|
||||
}
|
||||
|
||||
|
||||
// Create a new profile in the given location.
|
||||
// `path` is a directory
|
||||
func NewProfile(target_dir string) (Profile, error) {
|
||||
user_list_file := path.Join(target_dir, "users.txt")
|
||||
settings_file := path.Join(target_dir, "settings.yaml")
|
||||
sqlite_file := path.Join(target_dir, "twitter.db")
|
||||
profile_images_dir := path.Join(target_dir, "profile_images")
|
||||
images_dir := path.Join(target_dir, "images")
|
||||
videos_dir := path.Join(target_dir, "videos")
|
||||
|
||||
|
||||
for _, file := range []string{
|
||||
user_list_file,
|
||||
settings_file,
|
||||
sqlite_file,
|
||||
profile_images_dir,
|
||||
images_dir,
|
||||
videos_dir,
|
||||
} {
|
||||
if file_exists(file) {
|
||||
return Profile{}, fmt.Errorf("File already exists: %s", file)
|
||||
}
|
||||
}
|
||||
|
||||
// Create `twitter.db`
|
||||
fmt.Printf("Creating %s\n", sqlite_file)
|
||||
db, err := sql.Open("sqlite3", sqlite_file)
|
||||
if err != nil {
|
||||
return Profile{}, err
|
||||
}
|
||||
_, err = db.Exec(sql_init)
|
||||
if err != nil {
|
||||
return Profile{}, err
|
||||
}
|
||||
|
||||
// Create `users.txt`
|
||||
fmt.Printf("Creating %s\n", user_list_file)
|
||||
err = os.WriteFile(user_list_file, []byte{}, os.FileMode(0644))
|
||||
if err != nil {
|
||||
return Profile{}, err
|
||||
}
|
||||
|
||||
// Create `settings.yaml`
|
||||
fmt.Printf("Creating %s\n", settings_file)
|
||||
settings := Settings{}
|
||||
data, err := yaml.Marshal(&settings)
|
||||
if err != nil {
|
||||
return Profile{}, err
|
||||
}
|
||||
err = os.WriteFile(settings_file, data, os.FileMode(0644))
|
||||
if err != nil {
|
||||
return Profile{}, err
|
||||
}
|
||||
|
||||
// Create `profile_images`
|
||||
fmt.Printf("Creating %s/\n", profile_images_dir)
|
||||
err = os.Mkdir(profile_images_dir, os.FileMode(0755))
|
||||
if err != nil {
|
||||
return Profile{}, err
|
||||
}
|
||||
|
||||
// Create `images`
|
||||
fmt.Printf("Creating %s/\n", images_dir)
|
||||
err = os.Mkdir(images_dir, os.FileMode(0755))
|
||||
if err != nil {
|
||||
return Profile{}, err
|
||||
}
|
||||
|
||||
// Create `videos`
|
||||
fmt.Printf("Creating %s/\n", videos_dir)
|
||||
err = os.Mkdir(videos_dir, os.FileMode(0755))
|
||||
if err != nil {
|
||||
return Profile{}, err
|
||||
}
|
||||
|
||||
return Profile{target_dir, []scraper.UserHandle{}, settings, db}, nil
|
||||
}
|
||||
|
||||
|
||||
func LoadProfile(profile_dir string) (Profile, error) {
|
||||
user_list_file := path.Join(profile_dir, "users.txt")
|
||||
settings_file := path.Join(profile_dir, "settings.yaml")
|
||||
sqlite_file := path.Join(profile_dir, "twitter.db")
|
||||
|
||||
for _, file := range []string{
|
||||
user_list_file,
|
||||
settings_file,
|
||||
sqlite_file,
|
||||
} {
|
||||
if !file_exists(file) {
|
||||
return Profile{}, fmt.Errorf("Invalid profile, could not find file: %s", file)
|
||||
}
|
||||
}
|
||||
|
||||
users_data, err := os.ReadFile(user_list_file)
|
||||
if err != nil {
|
||||
return Profile{}, err
|
||||
}
|
||||
users_list := parse_users_file(users_data)
|
||||
|
||||
settings_data, err := os.ReadFile(settings_file)
|
||||
if err != nil {
|
||||
return Profile{}, err
|
||||
}
|
||||
settings := Settings{}
|
||||
err = yaml.Unmarshal(settings_data, &settings)
|
||||
if err != nil {
|
||||
return Profile{}, err
|
||||
}
|
||||
db, err := sql.Open("sqlite3", sqlite_file)
|
||||
if err != nil {
|
||||
return Profile{}, err
|
||||
}
|
||||
|
||||
return Profile{profile_dir, users_list, settings, db}, nil
|
||||
}
|
130
persistence/profile_test.go
Normal file
130
persistence/profile_test.go
Normal file
@ -0,0 +1,130 @@
|
||||
package persistence_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"os"
|
||||
"path"
|
||||
"errors"
|
||||
|
||||
"offline_twitter/persistence"
|
||||
)
|
||||
|
||||
// DUPE 1
|
||||
func file_exists(path string) bool {
|
||||
_, err := os.Stat(path)
|
||||
if err == nil {
|
||||
return true
|
||||
} else if errors.Is(err, os.ErrNotExist) {
|
||||
return false
|
||||
} else {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
func isdir_map(is_dir bool) string {
|
||||
if is_dir {
|
||||
return "directory"
|
||||
}
|
||||
return "file"
|
||||
}
|
||||
|
||||
|
||||
func TestNewProfile(t *testing.T) {
|
||||
profile_path := "test_profiles/TestNewProfile"
|
||||
if !file_exists(profile_path) {
|
||||
err := os.Mkdir(profile_path, 0755)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
contents, err := os.ReadDir(profile_path)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
if len(contents) != 0 {
|
||||
t.Fatalf("test_profile not empty at start of test!")
|
||||
}
|
||||
|
||||
profile, err := persistence.NewProfile(profile_path)
|
||||
if err != nil {
|
||||
t.Fatalf(err.Error())
|
||||
}
|
||||
|
||||
if profile.ProfileDir != profile_path {
|
||||
t.Errorf("ProfileDir should be %s, but it is %s", profile_path, profile.ProfileDir)
|
||||
}
|
||||
if len(profile.UsersList) != 0 {
|
||||
t.Errorf("Expected empty users list, got %v instead", profile.UsersList)
|
||||
}
|
||||
|
||||
// Check files were created
|
||||
contents, err = os.ReadDir(profile_path)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
if len(contents) != 6 {
|
||||
t.Fatalf("Expected 6 contents, got %d instead", len(contents))
|
||||
}
|
||||
|
||||
expected_files := []struct {
|
||||
filename string
|
||||
isDir bool
|
||||
} {
|
||||
{"images", true},
|
||||
{"profile_images", true},
|
||||
{"settings.yaml", false},
|
||||
{"twitter.db", false},
|
||||
{"users.txt", false},
|
||||
{"videos", true},
|
||||
}
|
||||
|
||||
for i, v := range expected_files {
|
||||
if contents[i].Name() != v.filename || contents[i].IsDir() != v.isDir {
|
||||
t.Fatalf("Expected `%s` to be a %s, but got %s [%s]", v.filename, isdir_map(v.isDir), contents[i].Name(), isdir_map(contents[i].IsDir()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadProfile(t *testing.T) {
|
||||
profile_path := "test_profiles/TestLoadProfile"
|
||||
if !file_exists(profile_path) {
|
||||
err := os.Mkdir(profile_path, 0755)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
contents, err := os.ReadDir(profile_path)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
if len(contents) != 0 {
|
||||
t.Fatalf("test_profile not empty at start of test!")
|
||||
}
|
||||
|
||||
_, err = persistence.NewProfile(profile_path)
|
||||
if err != nil {
|
||||
t.Fatalf(err.Error())
|
||||
}
|
||||
|
||||
// Create some users
|
||||
err = os.WriteFile(path.Join(profile_path, "users.txt"), []byte("user1\nuser2\n"), 0644)
|
||||
if err != nil {
|
||||
t.Fatalf(err.Error())
|
||||
}
|
||||
|
||||
profile, err := persistence.LoadProfile(profile_path)
|
||||
if err != nil {
|
||||
t.Fatalf(err.Error())
|
||||
}
|
||||
|
||||
if profile.ProfileDir != profile_path {
|
||||
t.Errorf("Expected profile path to be %q, but got %q", profile_path, profile.ProfileDir)
|
||||
}
|
||||
|
||||
if len(profile.UsersList) != 2 {
|
||||
t.Errorf("Expected 2 users, got %v", profile.UsersList)
|
||||
}
|
||||
|
||||
}
|
71
persistence/schema.sql
Normal file
71
persistence/schema.sql
Normal file
@ -0,0 +1,71 @@
|
||||
PRAGMA foreign_keys = on;
|
||||
|
||||
create table users (rowid integer primary key,
|
||||
id integer unique not null,
|
||||
display_name text not null,
|
||||
handle text unique not null,
|
||||
bio text,
|
||||
following_count integer not null,
|
||||
followers_count integer not null,
|
||||
location text,
|
||||
website text,
|
||||
join_date integer,
|
||||
is_private boolean default 0,
|
||||
is_verified boolean default 0,
|
||||
profile_image_url text,
|
||||
banner_image_url text,
|
||||
pinned_tweet integer
|
||||
);
|
||||
|
||||
create table tweets (rowid integer primary key,
|
||||
id integer unique not null,
|
||||
user integer not null,
|
||||
text text not null,
|
||||
posted_at integer,
|
||||
num_likes integer,
|
||||
num_retweets integer,
|
||||
num_replies integer,
|
||||
num_quote_tweets integer,
|
||||
has_video boolean,
|
||||
in_reply_to integer,
|
||||
quoted_tweet integer,
|
||||
mentions text, -- comma-separated
|
||||
hashtags text, -- comma-separated
|
||||
|
||||
foreign key(user) references users(id),
|
||||
foreign key(in_reply_to) references tweets(id),
|
||||
foreign key(quoted_tweet) references tweets(id)
|
||||
);
|
||||
|
||||
create table retweets(rowid integer primary key,
|
||||
retweet_id integer not null,
|
||||
tweet_id integer not null,
|
||||
retweeted_by integer not null,
|
||||
retweeted_at integer not null,
|
||||
foreign key(tweet_id) references tweets(id)
|
||||
foreign key(retweeted_by) references users(id)
|
||||
);
|
||||
|
||||
create table urls (rowid integer primary key,
|
||||
tweet_id integer not null,
|
||||
text text not null,
|
||||
|
||||
unique (tweet_id, text)
|
||||
foreign key(tweet_id) references tweets(id)
|
||||
);
|
||||
|
||||
create table images (rowid integer primary key,
|
||||
tweet_id integer not null,
|
||||
filename text not null,
|
||||
|
||||
unique (tweet_id, filename)
|
||||
foreign key(tweet_id) references tweets(id)
|
||||
);
|
||||
|
||||
create table hashtags (rowid integer primary key,
|
||||
tweet_id integer not null,
|
||||
text text not null,
|
||||
|
||||
unique (tweet_id, text)
|
||||
foreign key(tweet_id) references tweets(id)
|
||||
);
|
34
persistence/utils.go
Normal file
34
persistence/utils.go
Normal file
@ -0,0 +1,34 @@
|
||||
package persistence
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"offline_twitter/scraper"
|
||||
)
|
||||
|
||||
|
||||
// DUPE 1
|
||||
func file_exists(path string) bool {
|
||||
_, err := os.Stat(path)
|
||||
if err == nil {
|
||||
return true
|
||||
} else if errors.Is(err, os.ErrNotExist) {
|
||||
return false
|
||||
} else {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
func parse_users_file(data []byte) []scraper.UserHandle {
|
||||
users := strings.Split(string(data), "\n")
|
||||
ret := []scraper.UserHandle{}
|
||||
for _, u := range users {
|
||||
if u != "" {
|
||||
ret = append(ret, scraper.UserHandle(u))
|
||||
}
|
||||
}
|
||||
return ret
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user