Add persistence module

This commit is contained in:
Alessio 2021-06-27 13:31:30 -07:00
parent 12b1aeaccd
commit 545346e635
6 changed files with 400 additions and 3 deletions

View File

@ -23,10 +23,16 @@ tasks:
duration=$SECONDS
echo "Task completed in $(($duration / 60))m$(($duration % 60))s."
- run_tests: |
- test_scraper: |
cd twitter_offline_engine/scraper
go test -bench=.
go test -bench=. -cover
- test_persistence: |
cd twitter_offline_engine/persistence
mkdir test_profiles/
go test -bench=. -cover
- install_golangci-lint: |
SECONDS=0
@ -37,7 +43,7 @@ tasks:
duration=$SECONDS
echo "Task completed in $(($duration / 60))m$(($duration % 60))s."
- run_lint: |
- lint_scraper: |
SECONDS=0
cd twitter_offline_engine/scraper
@ -48,3 +54,15 @@ tasks:
duration=$SECONDS
echo "Task completed in $(($duration / 60))m$(($duration % 60))s."
- lint_persistence: |
SECONDS=0
cd twitter_offline_engine/persistence
golangci-lint run
cd ../cmd
golangci-lint run
duration=$SECONDS
echo "Task completed in $(($duration / 60))m$(($duration % 60))s."

1
persistence/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
test_profiles

143
persistence/profile.go Normal file
View File

@ -0,0 +1,143 @@
package persistence
import (
_ "embed"
"fmt"
"os"
"path"
"database/sql"
_ "github.com/mattn/go-sqlite3"
"gopkg.in/yaml.v2"
"offline_twitter/scraper"
)
//go:embed schema.sql
var sql_init string
type Settings struct {}
type Profile struct {
ProfileDir string
UsersList []scraper.UserHandle
Settings Settings
DB *sql.DB
}
// Create a new profile in the given location.
// `path` is a directory
func NewProfile(target_dir string) (Profile, error) {
user_list_file := path.Join(target_dir, "users.txt")
settings_file := path.Join(target_dir, "settings.yaml")
sqlite_file := path.Join(target_dir, "twitter.db")
profile_images_dir := path.Join(target_dir, "profile_images")
images_dir := path.Join(target_dir, "images")
videos_dir := path.Join(target_dir, "videos")
for _, file := range []string{
user_list_file,
settings_file,
sqlite_file,
profile_images_dir,
images_dir,
videos_dir,
} {
if file_exists(file) {
return Profile{}, fmt.Errorf("File already exists: %s", file)
}
}
// Create `twitter.db`
fmt.Printf("Creating %s\n", sqlite_file)
db, err := sql.Open("sqlite3", sqlite_file)
if err != nil {
return Profile{}, err
}
_, err = db.Exec(sql_init)
if err != nil {
return Profile{}, err
}
// Create `users.txt`
fmt.Printf("Creating %s\n", user_list_file)
err = os.WriteFile(user_list_file, []byte{}, os.FileMode(0644))
if err != nil {
return Profile{}, err
}
// Create `settings.yaml`
fmt.Printf("Creating %s\n", settings_file)
settings := Settings{}
data, err := yaml.Marshal(&settings)
if err != nil {
return Profile{}, err
}
err = os.WriteFile(settings_file, data, os.FileMode(0644))
if err != nil {
return Profile{}, err
}
// Create `profile_images`
fmt.Printf("Creating %s/\n", profile_images_dir)
err = os.Mkdir(profile_images_dir, os.FileMode(0755))
if err != nil {
return Profile{}, err
}
// Create `images`
fmt.Printf("Creating %s/\n", images_dir)
err = os.Mkdir(images_dir, os.FileMode(0755))
if err != nil {
return Profile{}, err
}
// Create `videos`
fmt.Printf("Creating %s/\n", videos_dir)
err = os.Mkdir(videos_dir, os.FileMode(0755))
if err != nil {
return Profile{}, err
}
return Profile{target_dir, []scraper.UserHandle{}, settings, db}, nil
}
func LoadProfile(profile_dir string) (Profile, error) {
user_list_file := path.Join(profile_dir, "users.txt")
settings_file := path.Join(profile_dir, "settings.yaml")
sqlite_file := path.Join(profile_dir, "twitter.db")
for _, file := range []string{
user_list_file,
settings_file,
sqlite_file,
} {
if !file_exists(file) {
return Profile{}, fmt.Errorf("Invalid profile, could not find file: %s", file)
}
}
users_data, err := os.ReadFile(user_list_file)
if err != nil {
return Profile{}, err
}
users_list := parse_users_file(users_data)
settings_data, err := os.ReadFile(settings_file)
if err != nil {
return Profile{}, err
}
settings := Settings{}
err = yaml.Unmarshal(settings_data, &settings)
if err != nil {
return Profile{}, err
}
db, err := sql.Open("sqlite3", sqlite_file)
if err != nil {
return Profile{}, err
}
return Profile{profile_dir, users_list, settings, db}, nil
}

130
persistence/profile_test.go Normal file
View File

@ -0,0 +1,130 @@
package persistence_test
import (
"testing"
"os"
"path"
"errors"
"offline_twitter/persistence"
)
// DUPE 1
func file_exists(path string) bool {
_, err := os.Stat(path)
if err == nil {
return true
} else if errors.Is(err, os.ErrNotExist) {
return false
} else {
panic(err)
}
}
func isdir_map(is_dir bool) string {
if is_dir {
return "directory"
}
return "file"
}
func TestNewProfile(t *testing.T) {
profile_path := "test_profiles/TestNewProfile"
if !file_exists(profile_path) {
err := os.Mkdir(profile_path, 0755)
if err != nil {
panic(err)
}
}
contents, err := os.ReadDir(profile_path)
if err != nil {
panic(err)
}
if len(contents) != 0 {
t.Fatalf("test_profile not empty at start of test!")
}
profile, err := persistence.NewProfile(profile_path)
if err != nil {
t.Fatalf(err.Error())
}
if profile.ProfileDir != profile_path {
t.Errorf("ProfileDir should be %s, but it is %s", profile_path, profile.ProfileDir)
}
if len(profile.UsersList) != 0 {
t.Errorf("Expected empty users list, got %v instead", profile.UsersList)
}
// Check files were created
contents, err = os.ReadDir(profile_path)
if err != nil {
panic(err)
}
if len(contents) != 6 {
t.Fatalf("Expected 6 contents, got %d instead", len(contents))
}
expected_files := []struct {
filename string
isDir bool
} {
{"images", true},
{"profile_images", true},
{"settings.yaml", false},
{"twitter.db", false},
{"users.txt", false},
{"videos", true},
}
for i, v := range expected_files {
if contents[i].Name() != v.filename || contents[i].IsDir() != v.isDir {
t.Fatalf("Expected `%s` to be a %s, but got %s [%s]", v.filename, isdir_map(v.isDir), contents[i].Name(), isdir_map(contents[i].IsDir()))
}
}
}
func TestLoadProfile(t *testing.T) {
profile_path := "test_profiles/TestLoadProfile"
if !file_exists(profile_path) {
err := os.Mkdir(profile_path, 0755)
if err != nil {
panic(err)
}
}
contents, err := os.ReadDir(profile_path)
if err != nil {
panic(err)
}
if len(contents) != 0 {
t.Fatalf("test_profile not empty at start of test!")
}
_, err = persistence.NewProfile(profile_path)
if err != nil {
t.Fatalf(err.Error())
}
// Create some users
err = os.WriteFile(path.Join(profile_path, "users.txt"), []byte("user1\nuser2\n"), 0644)
if err != nil {
t.Fatalf(err.Error())
}
profile, err := persistence.LoadProfile(profile_path)
if err != nil {
t.Fatalf(err.Error())
}
if profile.ProfileDir != profile_path {
t.Errorf("Expected profile path to be %q, but got %q", profile_path, profile.ProfileDir)
}
if len(profile.UsersList) != 2 {
t.Errorf("Expected 2 users, got %v", profile.UsersList)
}
}

71
persistence/schema.sql Normal file
View File

@ -0,0 +1,71 @@
PRAGMA foreign_keys = on;
create table users (rowid integer primary key,
id integer unique not null,
display_name text not null,
handle text unique not null,
bio text,
following_count integer not null,
followers_count integer not null,
location text,
website text,
join_date integer,
is_private boolean default 0,
is_verified boolean default 0,
profile_image_url text,
banner_image_url text,
pinned_tweet integer
);
create table tweets (rowid integer primary key,
id integer unique not null,
user integer not null,
text text not null,
posted_at integer,
num_likes integer,
num_retweets integer,
num_replies integer,
num_quote_tweets integer,
has_video boolean,
in_reply_to integer,
quoted_tweet integer,
mentions text, -- comma-separated
hashtags text, -- comma-separated
foreign key(user) references users(id),
foreign key(in_reply_to) references tweets(id),
foreign key(quoted_tweet) references tweets(id)
);
create table retweets(rowid integer primary key,
retweet_id integer not null,
tweet_id integer not null,
retweeted_by integer not null,
retweeted_at integer not null,
foreign key(tweet_id) references tweets(id)
foreign key(retweeted_by) references users(id)
);
create table urls (rowid integer primary key,
tweet_id integer not null,
text text not null,
unique (tweet_id, text)
foreign key(tweet_id) references tweets(id)
);
create table images (rowid integer primary key,
tweet_id integer not null,
filename text not null,
unique (tweet_id, filename)
foreign key(tweet_id) references tweets(id)
);
create table hashtags (rowid integer primary key,
tweet_id integer not null,
text text not null,
unique (tweet_id, text)
foreign key(tweet_id) references tweets(id)
);

34
persistence/utils.go Normal file
View File

@ -0,0 +1,34 @@
package persistence
import (
"errors"
"os"
"strings"
"offline_twitter/scraper"
)
// DUPE 1
func file_exists(path string) bool {
_, err := os.Stat(path)
if err == nil {
return true
} else if errors.Is(err, os.ErrNotExist) {
return false
} else {
panic(err)
}
}
func parse_users_file(data []byte) []scraper.UserHandle {
users := strings.Split(string(data), "\n")
ret := []scraper.UserHandle{}
for _, u := range users {
if u != "" {
ret = append(ret, scraper.UserHandle(u))
}
}
return ret
}