Make user handles only unique among active users

This commit is contained in:
Alessio 2024-09-17 18:30:11 -07:00
parent 8c390444d6
commit 79033cfc79
5 changed files with 103 additions and 11 deletions

View File

@ -360,7 +360,7 @@ tw fetch_tweet_only https://twitter.com/PandasAndVidya/status/156271472796842803
test "$(sqlite3 twitter.db "select count(*) from tweets where id = 156271472796842803")" == "0" test "$(sqlite3 twitter.db "select count(*) from tweets where id = 156271472796842803")" == "0"
# Test that you can pass a session with the `.session` file extension too # Test that you can pass a session with the `.session` file extension too
tw --session Offline_Twatter.session list_followed > /dev/null # Dummy operation tw --session Offline_Twatter.session fetch_user Offline_Twatter # Dummy operation
# Test search # Test search

View File

@ -425,7 +425,7 @@ func (p Profile) NextPage(c Cursor, current_user_id scraper.UserID) (Feed, error
// From, to, by, and RT'd by user handles // From, to, by, and RT'd by user handles
if c.FromUserHandle != "" { if c.FromUserHandle != "" {
where_clauses = append(where_clauses, "tweets.user_id = (select id from users where handle like ?)") where_clauses = append(where_clauses, "tweets.user_id = (select id from users_by_handle where handle like ?)")
bind_values = append(bind_values, c.FromUserHandle) bind_values = append(bind_values, c.FromUserHandle)
} }
for _, to_user := range c.ToUserHandles { for _, to_user := range c.ToUserHandles {
@ -433,11 +433,11 @@ func (p Profile) NextPage(c Cursor, current_user_id scraper.UserID) (Feed, error
bind_values = append(bind_values, fmt.Sprintf("%%%s%%", to_user)) bind_values = append(bind_values, fmt.Sprintf("%%%s%%", to_user))
} }
if c.RetweetedByUserHandle != "" { if c.RetweetedByUserHandle != "" {
where_clauses = append(where_clauses, "retweeted_by = (select id from users where handle like ?)") where_clauses = append(where_clauses, "retweeted_by = (select id from users_by_handle where handle like ?)")
bind_values = append(bind_values, c.RetweetedByUserHandle) bind_values = append(bind_values, c.RetweetedByUserHandle)
} }
if c.ByUserHandle != "" { if c.ByUserHandle != "" {
where_clauses = append(where_clauses, "by_user_id = (select id from users where handle like ?)") where_clauses = append(where_clauses, "by_user_id = (select id from users_by_handle where handle like ?)")
bind_values = append(bind_values, c.ByUserHandle) bind_values = append(bind_values, c.ByUserHandle)
} }
if c.ListID != 0 { if c.ListID != 0 {
@ -446,7 +446,7 @@ func (p Profile) NextPage(c Cursor, current_user_id scraper.UserID) (Feed, error
} }
if c.FollowedByUserHandle != "" { if c.FollowedByUserHandle != "" {
where_clauses = append(where_clauses, where_clauses = append(where_clauses,
"by_user_id in (select followee_id from follows where follower_id = (select id from users where handle like ?))") "by_user_id in (select followee_id from follows where follower_id = (select id from users_by_handle where handle like ?))")
bind_values = append(bind_values, c.FollowedByUserHandle) bind_values = append(bind_values, c.FollowedByUserHandle)
} }
@ -532,7 +532,7 @@ func (p Profile) NextPage(c Cursor, current_user_id scraper.UserID) (Feed, error
likes_sort_order_field := "" likes_sort_order_field := ""
if c.LikedByUserHandle != "" { if c.LikedByUserHandle != "" {
liked_by_filter_join_clause = " join likes filter_likes on tweets.id = filter_likes.tweet_id " liked_by_filter_join_clause = " join likes filter_likes on tweets.id = filter_likes.tweet_id "
where_clauses = append(where_clauses, "filter_likes.user_id = (select id from users where handle like ?) ") where_clauses = append(where_clauses, "filter_likes.user_id = (select id from users_by_handle where handle like ?) ")
bind_values = append(bind_values, c.LikedByUserHandle) bind_values = append(bind_values, c.LikedByUserHandle)
likes_sort_order_field = ", coalesce(filter_likes.sort_order, -1) likes_sort_order " likes_sort_order_field = ", coalesce(filter_likes.sort_order, -1) likes_sort_order "
@ -545,7 +545,7 @@ func (p Profile) NextPage(c Cursor, current_user_id scraper.UserID) (Feed, error
bookmarks_sort_order_field := "" bookmarks_sort_order_field := ""
if c.BookmarkedByUserHandle != "" { if c.BookmarkedByUserHandle != "" {
bookmarked_by_filter_join_clause = " join bookmarks filter_bookmarks on tweets.id = filter_bookmarks.tweet_id " bookmarked_by_filter_join_clause = " join bookmarks filter_bookmarks on tweets.id = filter_bookmarks.tweet_id "
where_clauses = append(where_clauses, "filter_bookmarks.user_id = (select id from users where handle like ?) ") where_clauses = append(where_clauses, "filter_bookmarks.user_id = (select id from users_by_handle where handle like ?) ")
bind_values = append(bind_values, c.BookmarkedByUserHandle) bind_values = append(bind_values, c.BookmarkedByUserHandle)
bookmarks_sort_order_field = ", coalesce(filter_bookmarks.sort_order, -1) bookmarks_sort_order " bookmarks_sort_order_field = ", coalesce(filter_bookmarks.sort_order, -1) bookmarks_sort_order "

View File

@ -7,7 +7,7 @@ PRAGMA foreign_keys = on;
create table users (rowid integer primary key, create table users (rowid integer primary key,
id integer unique not null check(typeof(id) = 'integer'), id integer unique not null check(typeof(id) = 'integer'),
display_name text not null, display_name text not null,
handle text unique not null, handle text not null,
bio text, bio text,
following_count integer, following_count integer,
followers_count integer, followers_count integer,
@ -28,6 +28,29 @@ create table users (rowid integer primary key,
is_id_fake boolean default 0, is_id_fake boolean default 0,
is_content_downloaded boolean default 0 is_content_downloaded boolean default 0
); );
create unique index index_active_users_handle_unique on users (handle) where is_banned = 0 and is_deleted = 0;
create view users_by_handle as
with active_users as (
select * from users where is_banned = 0 and is_deleted = 0
), inactive_users as (
select * from users where is_banned = 1 or is_deleted = 1
), inactive_users_with_no_shadowing_active_user as (
select * from inactive_users where not exists (
-- Ensure no active user exists for this handle
select 1 from active_users where active_users.handle = inactive_users.handle
)
)
select * from users
where is_banned = 0 and is_deleted = 0 -- select active users directly
or users.id = (
-- select the inactive user with the highest ID if no active user exists for the handle
select max(id)
from inactive_users_with_no_shadowing_active_user
where users.handle = inactive_users_with_no_shadowing_active_user.handle
)
group by handle having id = max(id);
create table fake_user_sequence(latest_fake_id integer not null); create table fake_user_sequence(latest_fake_id integer not null);
insert into fake_user_sequence values(0x4000000000000000); insert into fake_user_sequence values(0x4000000000000000);

View File

@ -23,7 +23,8 @@ const USERS_ALL_SQL_FIELDS = `
// - u: the User // - u: the User
func (p Profile) SaveUser(u *scraper.User) error { func (p Profile) SaveUser(u *scraper.User) error {
if u.IsNeedingFakeID { if u.IsNeedingFakeID {
err := p.DB.QueryRow("select id from users where lower(handle) = lower(?)", u.Handle).Scan(&u.ID) // User is fake; check if we already have them, in order to proceed
err := p.DB.QueryRow("select id from users_by_handle where lower(handle) = lower(?)", u.Handle).Scan(&u.ID)
if errors.Is(err, sql.ErrNoRows) { if errors.Is(err, sql.ErrNoRows) {
// We need to continue-- create a new fake user // We need to continue-- create a new fake user
u.ID = p.NextFakeUserID() u.ID = p.NextFakeUserID()
@ -86,7 +87,7 @@ func (p Profile) GetUserByHandle(handle scraper.UserHandle) (scraper.User, error
var ret scraper.User var ret scraper.User
err := db.Get(&ret, ` err := db.Get(&ret, `
select `+USERS_ALL_SQL_FIELDS+` select `+USERS_ALL_SQL_FIELDS+`
from users from users_by_handle
where lower(handle) = lower(?) where lower(handle) = lower(?)
`, handle) `, handle)

View File

@ -21,8 +21,9 @@ Please upgrade this application to a newer version to use this profile. Or down
) )
} }
// The Nth entry is the migration that moves you from version N to version N+1. // Database starts at version 0. First migration brings us to version 1
var MIGRATIONS = []string{ var MIGRATIONS = []string{
// Version 1
`create table polls (rowid integer primary key, `create table polls (rowid integer primary key,
id integer unique not null check(typeof(id) = 'integer'), id integer unique not null check(typeof(id) = 'integer'),
tweet_id integer not null, tweet_id integer not null,
@ -297,6 +298,7 @@ var MIGRATIONS = []string{
foreign key(chat_message_id) references chat_messages(id) foreign key(chat_message_id) references chat_messages(id)
); );
create index if not exists index_chat_message_urls_chat_message_id on chat_message_urls (chat_message_id);`, create index if not exists index_chat_message_urls_chat_message_id on chat_message_urls (chat_message_id);`,
// 30
`create table bookmarks(rowid integer primary key, `create table bookmarks(rowid integer primary key,
sort_order integer not null, -- Can't be unique because "-1" is used as "unknown" value sort_order integer not null, -- Can't be unique because "-1" is used as "unknown" value
user_id integer not null, user_id integer not null,
@ -354,6 +356,72 @@ var MIGRATIONS = []string{
user_id not null references users(id), user_id not null references users(id),
unique(notification_id, user_id) unique(notification_id, user_id)
);`, );`,
`pragma foreign_keys = OFF;
begin exclusive transaction;
create table users_new (rowid integer primary key,
id integer unique not null check(typeof(id) = 'integer'),
display_name text not null,
handle text not null,
bio text,
following_count integer,
followers_count integer,
location text,
website text,
join_date integer,
is_private boolean default 0,
is_verified boolean default 0,
is_banned boolean default 0,
is_deleted boolean default 0,
profile_image_url text,
profile_image_local_path text,
banner_image_url text,
banner_image_local_path text,
pinned_tweet_id integer check(typeof(pinned_tweet_id) = 'integer' or pinned_tweet_id = ''),
is_followed boolean default 0,
is_id_fake boolean default 0,
is_content_downloaded boolean default 0
);
create unique index index_active_users_handle_unique on users_new (handle) where is_banned = 0 and is_deleted = 0;
INSERT INTO users_new (rowid, id, display_name, handle, bio, following_count, followers_count, location,
website, join_date, is_private, is_verified, is_banned, is_deleted, profile_image_url,
profile_image_local_path, banner_image_url, banner_image_local_path, pinned_tweet_id, is_followed,
is_id_fake, is_content_downloaded)
SELECT rowid, id, display_name, handle, bio, following_count, followers_count, location,
website, join_date, is_private, is_verified, is_banned, is_deleted, profile_image_url,
profile_image_local_path, banner_image_url, banner_image_local_path, pinned_tweet_id, is_followed,
is_id_fake, is_content_downloaded
FROM users;
drop table users;
alter table users_new rename to users;
create view users_by_handle as
with active_users as (
select * from users where is_banned = 0 and is_deleted = 0
), inactive_users as (
select * from users where is_banned = 1 or is_deleted = 1
), inactive_users_with_no_shadowing_active_user as (
select * from inactive_users where not exists (
-- Ensure no active user exists for this handle
select 1 from active_users where active_users.handle = inactive_users.handle
)
)
select * from users
where is_banned = 0 and is_deleted = 0 -- select active users directly
or users.id = (
-- select the inactive user with the highest ID if no active user exists for the handle
select max(id)
from inactive_users_with_no_shadowing_active_user
where users.handle = inactive_users_with_no_shadowing_active_user.handle
)
group by handle having id = max(id);
commit;
pragma foreign_keys = ON;
vacuum;
`,
} }
var ENGINE_DATABASE_VERSION = len(MIGRATIONS) var ENGINE_DATABASE_VERSION = len(MIGRATIONS)