Make user handles only unique among active users

This commit is contained in:
Alessio 2024-09-17 18:30:11 -07:00
parent 8c390444d6
commit 79033cfc79
5 changed files with 103 additions and 11 deletions

View File

@ -360,7 +360,7 @@ tw fetch_tweet_only https://twitter.com/PandasAndVidya/status/156271472796842803
test "$(sqlite3 twitter.db "select count(*) from tweets where id = 156271472796842803")" == "0"
# Test that you can pass a session with the `.session` file extension too
tw --session Offline_Twatter.session list_followed > /dev/null # Dummy operation
tw --session Offline_Twatter.session fetch_user Offline_Twatter # Dummy operation
# Test search

View File

@ -425,7 +425,7 @@ func (p Profile) NextPage(c Cursor, current_user_id scraper.UserID) (Feed, error
// From, to, by, and RT'd by user handles
if c.FromUserHandle != "" {
where_clauses = append(where_clauses, "tweets.user_id = (select id from users where handle like ?)")
where_clauses = append(where_clauses, "tweets.user_id = (select id from users_by_handle where handle like ?)")
bind_values = append(bind_values, c.FromUserHandle)
}
for _, to_user := range c.ToUserHandles {
@ -433,11 +433,11 @@ func (p Profile) NextPage(c Cursor, current_user_id scraper.UserID) (Feed, error
bind_values = append(bind_values, fmt.Sprintf("%%%s%%", to_user))
}
if c.RetweetedByUserHandle != "" {
where_clauses = append(where_clauses, "retweeted_by = (select id from users where handle like ?)")
where_clauses = append(where_clauses, "retweeted_by = (select id from users_by_handle where handle like ?)")
bind_values = append(bind_values, c.RetweetedByUserHandle)
}
if c.ByUserHandle != "" {
where_clauses = append(where_clauses, "by_user_id = (select id from users where handle like ?)")
where_clauses = append(where_clauses, "by_user_id = (select id from users_by_handle where handle like ?)")
bind_values = append(bind_values, c.ByUserHandle)
}
if c.ListID != 0 {
@ -446,7 +446,7 @@ func (p Profile) NextPage(c Cursor, current_user_id scraper.UserID) (Feed, error
}
if c.FollowedByUserHandle != "" {
where_clauses = append(where_clauses,
"by_user_id in (select followee_id from follows where follower_id = (select id from users where handle like ?))")
"by_user_id in (select followee_id from follows where follower_id = (select id from users_by_handle where handle like ?))")
bind_values = append(bind_values, c.FollowedByUserHandle)
}
@ -532,7 +532,7 @@ func (p Profile) NextPage(c Cursor, current_user_id scraper.UserID) (Feed, error
likes_sort_order_field := ""
if c.LikedByUserHandle != "" {
liked_by_filter_join_clause = " join likes filter_likes on tweets.id = filter_likes.tweet_id "
where_clauses = append(where_clauses, "filter_likes.user_id = (select id from users where handle like ?) ")
where_clauses = append(where_clauses, "filter_likes.user_id = (select id from users_by_handle where handle like ?) ")
bind_values = append(bind_values, c.LikedByUserHandle)
likes_sort_order_field = ", coalesce(filter_likes.sort_order, -1) likes_sort_order "
@ -545,7 +545,7 @@ func (p Profile) NextPage(c Cursor, current_user_id scraper.UserID) (Feed, error
bookmarks_sort_order_field := ""
if c.BookmarkedByUserHandle != "" {
bookmarked_by_filter_join_clause = " join bookmarks filter_bookmarks on tweets.id = filter_bookmarks.tweet_id "
where_clauses = append(where_clauses, "filter_bookmarks.user_id = (select id from users where handle like ?) ")
where_clauses = append(where_clauses, "filter_bookmarks.user_id = (select id from users_by_handle where handle like ?) ")
bind_values = append(bind_values, c.BookmarkedByUserHandle)
bookmarks_sort_order_field = ", coalesce(filter_bookmarks.sort_order, -1) bookmarks_sort_order "

View File

@ -7,7 +7,7 @@ PRAGMA foreign_keys = on;
create table users (rowid integer primary key,
id integer unique not null check(typeof(id) = 'integer'),
display_name text not null,
handle text unique not null,
handle text not null,
bio text,
following_count integer,
followers_count integer,
@ -28,6 +28,29 @@ create table users (rowid integer primary key,
is_id_fake boolean default 0,
is_content_downloaded boolean default 0
);
create unique index index_active_users_handle_unique on users (handle) where is_banned = 0 and is_deleted = 0;
create view users_by_handle as
with active_users as (
select * from users where is_banned = 0 and is_deleted = 0
), inactive_users as (
select * from users where is_banned = 1 or is_deleted = 1
), inactive_users_with_no_shadowing_active_user as (
select * from inactive_users where not exists (
-- Ensure no active user exists for this handle
select 1 from active_users where active_users.handle = inactive_users.handle
)
)
select * from users
where is_banned = 0 and is_deleted = 0 -- select active users directly
or users.id = (
-- select the inactive user with the highest ID if no active user exists for the handle
select max(id)
from inactive_users_with_no_shadowing_active_user
where users.handle = inactive_users_with_no_shadowing_active_user.handle
)
group by handle having id = max(id);
create table fake_user_sequence(latest_fake_id integer not null);
insert into fake_user_sequence values(0x4000000000000000);

View File

@ -23,7 +23,8 @@ const USERS_ALL_SQL_FIELDS = `
// - u: the User
func (p Profile) SaveUser(u *scraper.User) error {
if u.IsNeedingFakeID {
err := p.DB.QueryRow("select id from users where lower(handle) = lower(?)", u.Handle).Scan(&u.ID)
// User is fake; check if we already have them, in order to proceed
err := p.DB.QueryRow("select id from users_by_handle where lower(handle) = lower(?)", u.Handle).Scan(&u.ID)
if errors.Is(err, sql.ErrNoRows) {
// We need to continue-- create a new fake user
u.ID = p.NextFakeUserID()
@ -86,7 +87,7 @@ func (p Profile) GetUserByHandle(handle scraper.UserHandle) (scraper.User, error
var ret scraper.User
err := db.Get(&ret, `
select `+USERS_ALL_SQL_FIELDS+`
from users
from users_by_handle
where lower(handle) = lower(?)
`, handle)

View File

@ -21,8 +21,9 @@ Please upgrade this application to a newer version to use this profile. Or down
)
}
// The Nth entry is the migration that moves you from version N to version N+1.
// Database starts at version 0. First migration brings us to version 1
var MIGRATIONS = []string{
// Version 1
`create table polls (rowid integer primary key,
id integer unique not null check(typeof(id) = 'integer'),
tweet_id integer not null,
@ -297,6 +298,7 @@ var MIGRATIONS = []string{
foreign key(chat_message_id) references chat_messages(id)
);
create index if not exists index_chat_message_urls_chat_message_id on chat_message_urls (chat_message_id);`,
// 30
`create table bookmarks(rowid integer primary key,
sort_order integer not null, -- Can't be unique because "-1" is used as "unknown" value
user_id integer not null,
@ -354,6 +356,72 @@ var MIGRATIONS = []string{
user_id not null references users(id),
unique(notification_id, user_id)
);`,
`pragma foreign_keys = OFF;
begin exclusive transaction;
create table users_new (rowid integer primary key,
id integer unique not null check(typeof(id) = 'integer'),
display_name text not null,
handle text not null,
bio text,
following_count integer,
followers_count integer,
location text,
website text,
join_date integer,
is_private boolean default 0,
is_verified boolean default 0,
is_banned boolean default 0,
is_deleted boolean default 0,
profile_image_url text,
profile_image_local_path text,
banner_image_url text,
banner_image_local_path text,
pinned_tweet_id integer check(typeof(pinned_tweet_id) = 'integer' or pinned_tweet_id = ''),
is_followed boolean default 0,
is_id_fake boolean default 0,
is_content_downloaded boolean default 0
);
create unique index index_active_users_handle_unique on users_new (handle) where is_banned = 0 and is_deleted = 0;
INSERT INTO users_new (rowid, id, display_name, handle, bio, following_count, followers_count, location,
website, join_date, is_private, is_verified, is_banned, is_deleted, profile_image_url,
profile_image_local_path, banner_image_url, banner_image_local_path, pinned_tweet_id, is_followed,
is_id_fake, is_content_downloaded)
SELECT rowid, id, display_name, handle, bio, following_count, followers_count, location,
website, join_date, is_private, is_verified, is_banned, is_deleted, profile_image_url,
profile_image_local_path, banner_image_url, banner_image_local_path, pinned_tweet_id, is_followed,
is_id_fake, is_content_downloaded
FROM users;
drop table users;
alter table users_new rename to users;
create view users_by_handle as
with active_users as (
select * from users where is_banned = 0 and is_deleted = 0
), inactive_users as (
select * from users where is_banned = 1 or is_deleted = 1
), inactive_users_with_no_shadowing_active_user as (
select * from inactive_users where not exists (
-- Ensure no active user exists for this handle
select 1 from active_users where active_users.handle = inactive_users.handle
)
)
select * from users
where is_banned = 0 and is_deleted = 0 -- select active users directly
or users.id = (
-- select the inactive user with the highest ID if no active user exists for the handle
select max(id)
from inactive_users_with_no_shadowing_active_user
where users.handle = inactive_users_with_no_shadowing_active_user.handle
)
group by handle having id = max(id);
commit;
pragma foreign_keys = ON;
vacuum;
`,
}
var ENGINE_DATABASE_VERSION = len(MIGRATIONS)