From 545346e635988981a9ba2e91c6cce5121faf22dc Mon Sep 17 00:00:00 2001 From: Alessio Date: Sun, 27 Jun 2021 13:31:30 -0700 Subject: [PATCH] Add persistence module --- .build.yml | 24 +++++- persistence/.gitignore | 1 + persistence/profile.go | 143 ++++++++++++++++++++++++++++++++++++ persistence/profile_test.go | 130 ++++++++++++++++++++++++++++++++ persistence/schema.sql | 71 ++++++++++++++++++ persistence/utils.go | 34 +++++++++ 6 files changed, 400 insertions(+), 3 deletions(-) create mode 100644 persistence/.gitignore create mode 100644 persistence/profile.go create mode 100644 persistence/profile_test.go create mode 100644 persistence/schema.sql create mode 100644 persistence/utils.go diff --git a/.build.yml b/.build.yml index 87bb7cf..6bfe8cb 100644 --- a/.build.yml +++ b/.build.yml @@ -23,10 +23,16 @@ tasks: duration=$SECONDS echo "Task completed in $(($duration / 60))m$(($duration % 60))s." - - run_tests: | + - test_scraper: | cd twitter_offline_engine/scraper - go test -bench=. + go test -bench=. -cover + + - test_persistence: | + cd twitter_offline_engine/persistence + + mkdir test_profiles/ + go test -bench=. -cover - install_golangci-lint: | SECONDS=0 @@ -37,7 +43,7 @@ tasks: duration=$SECONDS echo "Task completed in $(($duration / 60))m$(($duration % 60))s." - - run_lint: | + - lint_scraper: | SECONDS=0 cd twitter_offline_engine/scraper @@ -48,3 +54,15 @@ tasks: duration=$SECONDS echo "Task completed in $(($duration / 60))m$(($duration % 60))s." + + - lint_persistence: | + SECONDS=0 + + cd twitter_offline_engine/persistence + golangci-lint run + + cd ../cmd + golangci-lint run + + duration=$SECONDS + echo "Task completed in $(($duration / 60))m$(($duration % 60))s." diff --git a/persistence/.gitignore b/persistence/.gitignore new file mode 100644 index 0000000..060e271 --- /dev/null +++ b/persistence/.gitignore @@ -0,0 +1 @@ +test_profiles diff --git a/persistence/profile.go b/persistence/profile.go new file mode 100644 index 0000000..efbb067 --- /dev/null +++ b/persistence/profile.go @@ -0,0 +1,143 @@ +package persistence + +import ( + _ "embed" + "fmt" + "os" + "path" + "database/sql" + _ "github.com/mattn/go-sqlite3" + "gopkg.in/yaml.v2" + + "offline_twitter/scraper" +) + +//go:embed schema.sql +var sql_init string + +type Settings struct {} + +type Profile struct { + ProfileDir string + UsersList []scraper.UserHandle + Settings Settings + DB *sql.DB +} + + +// Create a new profile in the given location. +// `path` is a directory +func NewProfile(target_dir string) (Profile, error) { + user_list_file := path.Join(target_dir, "users.txt") + settings_file := path.Join(target_dir, "settings.yaml") + sqlite_file := path.Join(target_dir, "twitter.db") + profile_images_dir := path.Join(target_dir, "profile_images") + images_dir := path.Join(target_dir, "images") + videos_dir := path.Join(target_dir, "videos") + + + for _, file := range []string{ + user_list_file, + settings_file, + sqlite_file, + profile_images_dir, + images_dir, + videos_dir, + } { + if file_exists(file) { + return Profile{}, fmt.Errorf("File already exists: %s", file) + } + } + + // Create `twitter.db` + fmt.Printf("Creating %s\n", sqlite_file) + db, err := sql.Open("sqlite3", sqlite_file) + if err != nil { + return Profile{}, err + } + _, err = db.Exec(sql_init) + if err != nil { + return Profile{}, err + } + + // Create `users.txt` + fmt.Printf("Creating %s\n", user_list_file) + err = os.WriteFile(user_list_file, []byte{}, os.FileMode(0644)) + if err != nil { + return Profile{}, err + } + + // Create `settings.yaml` + fmt.Printf("Creating %s\n", settings_file) + settings := Settings{} + data, err := yaml.Marshal(&settings) + if err != nil { + return Profile{}, err + } + err = os.WriteFile(settings_file, data, os.FileMode(0644)) + if err != nil { + return Profile{}, err + } + + // Create `profile_images` + fmt.Printf("Creating %s/\n", profile_images_dir) + err = os.Mkdir(profile_images_dir, os.FileMode(0755)) + if err != nil { + return Profile{}, err + } + + // Create `images` + fmt.Printf("Creating %s/\n", images_dir) + err = os.Mkdir(images_dir, os.FileMode(0755)) + if err != nil { + return Profile{}, err + } + + // Create `videos` + fmt.Printf("Creating %s/\n", videos_dir) + err = os.Mkdir(videos_dir, os.FileMode(0755)) + if err != nil { + return Profile{}, err + } + + return Profile{target_dir, []scraper.UserHandle{}, settings, db}, nil +} + + +func LoadProfile(profile_dir string) (Profile, error) { + user_list_file := path.Join(profile_dir, "users.txt") + settings_file := path.Join(profile_dir, "settings.yaml") + sqlite_file := path.Join(profile_dir, "twitter.db") + + for _, file := range []string{ + user_list_file, + settings_file, + sqlite_file, + } { + if !file_exists(file) { + return Profile{}, fmt.Errorf("Invalid profile, could not find file: %s", file) + } + } + + users_data, err := os.ReadFile(user_list_file) + if err != nil { + return Profile{}, err + } + users_list := parse_users_file(users_data) + + settings_data, err := os.ReadFile(settings_file) + if err != nil { + return Profile{}, err + } + settings := Settings{} + err = yaml.Unmarshal(settings_data, &settings) + if err != nil { + return Profile{}, err + } + db, err := sql.Open("sqlite3", sqlite_file) + if err != nil { + return Profile{}, err + } + + return Profile{profile_dir, users_list, settings, db}, nil +} diff --git a/persistence/profile_test.go b/persistence/profile_test.go new file mode 100644 index 0000000..896d3d2 --- /dev/null +++ b/persistence/profile_test.go @@ -0,0 +1,130 @@ +package persistence_test + +import ( + "testing" + "os" + "path" + "errors" + + "offline_twitter/persistence" +) + +// DUPE 1 +func file_exists(path string) bool { + _, err := os.Stat(path) + if err == nil { + return true + } else if errors.Is(err, os.ErrNotExist) { + return false + } else { + panic(err) + } +} + +func isdir_map(is_dir bool) string { + if is_dir { + return "directory" + } + return "file" +} + + +func TestNewProfile(t *testing.T) { + profile_path := "test_profiles/TestNewProfile" + if !file_exists(profile_path) { + err := os.Mkdir(profile_path, 0755) + if err != nil { + panic(err) + } + } + + contents, err := os.ReadDir(profile_path) + if err != nil { + panic(err) + } + if len(contents) != 0 { + t.Fatalf("test_profile not empty at start of test!") + } + + profile, err := persistence.NewProfile(profile_path) + if err != nil { + t.Fatalf(err.Error()) + } + + if profile.ProfileDir != profile_path { + t.Errorf("ProfileDir should be %s, but it is %s", profile_path, profile.ProfileDir) + } + if len(profile.UsersList) != 0 { + t.Errorf("Expected empty users list, got %v instead", profile.UsersList) + } + + // Check files were created + contents, err = os.ReadDir(profile_path) + if err != nil { + panic(err) + } + if len(contents) != 6 { + t.Fatalf("Expected 6 contents, got %d instead", len(contents)) + } + + expected_files := []struct { + filename string + isDir bool + } { + {"images", true}, + {"profile_images", true}, + {"settings.yaml", false}, + {"twitter.db", false}, + {"users.txt", false}, + {"videos", true}, + } + + for i, v := range expected_files { + if contents[i].Name() != v.filename || contents[i].IsDir() != v.isDir { + t.Fatalf("Expected `%s` to be a %s, but got %s [%s]", v.filename, isdir_map(v.isDir), contents[i].Name(), isdir_map(contents[i].IsDir())) + } + } +} + +func TestLoadProfile(t *testing.T) { + profile_path := "test_profiles/TestLoadProfile" + if !file_exists(profile_path) { + err := os.Mkdir(profile_path, 0755) + if err != nil { + panic(err) + } + } + + contents, err := os.ReadDir(profile_path) + if err != nil { + panic(err) + } + if len(contents) != 0 { + t.Fatalf("test_profile not empty at start of test!") + } + + _, err = persistence.NewProfile(profile_path) + if err != nil { + t.Fatalf(err.Error()) + } + + // Create some users + err = os.WriteFile(path.Join(profile_path, "users.txt"), []byte("user1\nuser2\n"), 0644) + if err != nil { + t.Fatalf(err.Error()) + } + + profile, err := persistence.LoadProfile(profile_path) + if err != nil { + t.Fatalf(err.Error()) + } + + if profile.ProfileDir != profile_path { + t.Errorf("Expected profile path to be %q, but got %q", profile_path, profile.ProfileDir) + } + + if len(profile.UsersList) != 2 { + t.Errorf("Expected 2 users, got %v", profile.UsersList) + } + +} diff --git a/persistence/schema.sql b/persistence/schema.sql new file mode 100644 index 0000000..1758a2a --- /dev/null +++ b/persistence/schema.sql @@ -0,0 +1,71 @@ +PRAGMA foreign_keys = on; + +create table users (rowid integer primary key, + id integer unique not null, + display_name text not null, + handle text unique not null, + bio text, + following_count integer not null, + followers_count integer not null, + location text, + website text, + join_date integer, + is_private boolean default 0, + is_verified boolean default 0, + profile_image_url text, + banner_image_url text, + pinned_tweet integer +); + +create table tweets (rowid integer primary key, + id integer unique not null, + user integer not null, + text text not null, + posted_at integer, + num_likes integer, + num_retweets integer, + num_replies integer, + num_quote_tweets integer, + has_video boolean, + in_reply_to integer, + quoted_tweet integer, + mentions text, -- comma-separated + hashtags text, -- comma-separated + + foreign key(user) references users(id), + foreign key(in_reply_to) references tweets(id), + foreign key(quoted_tweet) references tweets(id) +); + +create table retweets(rowid integer primary key, + retweet_id integer not null, + tweet_id integer not null, + retweeted_by integer not null, + retweeted_at integer not null, + foreign key(tweet_id) references tweets(id) + foreign key(retweeted_by) references users(id) +); + +create table urls (rowid integer primary key, + tweet_id integer not null, + text text not null, + + unique (tweet_id, text) + foreign key(tweet_id) references tweets(id) +); + +create table images (rowid integer primary key, + tweet_id integer not null, + filename text not null, + + unique (tweet_id, filename) + foreign key(tweet_id) references tweets(id) +); + +create table hashtags (rowid integer primary key, + tweet_id integer not null, + text text not null, + + unique (tweet_id, text) + foreign key(tweet_id) references tweets(id) +); diff --git a/persistence/utils.go b/persistence/utils.go new file mode 100644 index 0000000..f0d374f --- /dev/null +++ b/persistence/utils.go @@ -0,0 +1,34 @@ +package persistence + +import ( + "errors" + "os" + "strings" + + "offline_twitter/scraper" +) + + +// DUPE 1 +func file_exists(path string) bool { + _, err := os.Stat(path) + if err == nil { + return true + } else if errors.Is(err, os.ErrNotExist) { + return false + } else { + panic(err) + } +} + + +func parse_users_file(data []byte) []scraper.UserHandle { + users := strings.Split(string(data), "\n") + ret := []scraper.UserHandle{} + for _, u := range users { + if u != "" { + ret = append(ret, scraper.UserHandle(u)) + } + } + return ret +}