Skip to content

Commit 9efccbd

Browse files
committed
Fix a race where users could get cleaned up too early
1 parent 82a17a4 commit 9efccbd

File tree

1 file changed

+114
-26
lines changed

1 file changed

+114
-26
lines changed

src/realm/object-store/sync/impl/app_metadata.cpp

Lines changed: 114 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,12 @@
3838
using namespace realm;
3939
using realm::app::UserData;
4040

41+
template <>
42+
inline SyncUser::State Obj::get(ColKey ck) const
43+
{
44+
return static_cast<SyncUser::State>(get<int64_t>(ck));
45+
}
46+
4147
namespace {
4248

4349
struct CurrentUserSchema {
@@ -223,7 +229,7 @@ void migrate_to_v7(std::shared_ptr<Realm> old_realm, std::shared_ptr<Realm> real
223229
// removed we'll use the logged out state. If both are logged out or
224230
// both are removed then it doesn't matter which we pick.
225231
using State = SyncUser::State;
226-
auto state = State(obj.get<int64_t>(schema.state_col));
232+
auto state = obj.get<State>(schema.state_col);
227233
auto existing_state = State(existing.get<int64_t>(schema.state_col));
228234
if (state == existing_state) {
229235
if (state == State::LoggedIn) {
@@ -340,6 +346,8 @@ struct PersistedSyncMetadataManager : public app::MetadataStore {
340346
UserIdentitySchema m_user_identity_schema;
341347
CurrentUserSchema m_current_user_schema;
342348

349+
using UserState = SyncUser::State;
350+
343351
PersistedSyncMetadataManager(const app::AppConfig& app_config, SyncFileManager& file_manager)
344352
{
345353
// Note that there are several deferred schema changes which don't
@@ -378,10 +386,73 @@ struct PersistedSyncMetadataManager : public app::MetadataStore {
378386
m_current_user_schema.read(*realm);
379387

380388
m_coordinator = _impl::RealmCoordinator::get_existing_coordinator(config.path);
389+
390+
// When App::remove_user() is called, we mark the user as "removed" but
391+
// don't actually delete the UserMetadata object or the files on disk
392+
// immediately, and instead defer the actual removal until the next
393+
// launch of the application. This makes it so that we don't have to
394+
// require developers to ensure that all Realms associated with a user
395+
// are closed before removing the user, as that can be a difficult thing
396+
// to do.
397+
//
398+
// The "next launch" in a multiprocess scenario can be a somewhat
399+
// complicated concept. If one process calls remove_user(), exits, and
400+
// then is restarted, it still isn't safe to delete the user's files yet
401+
// if another process has also been running the whole time. Instead, we
402+
// need to wait for *all* of the processes which share a metadata Realm
403+
// to exit, and perform the cleanup actions only when one is launching
404+
// in a fresh state with no other processes running (but also we need to
405+
// work if multiple processes launch at once).
406+
//
407+
// The lock file management code already solves this problem - using a
408+
// mix of exclusive and shared locks on the lock file, opening a Realm
409+
// will reinitialize the lock file from scratch only if it is the
410+
// "session initiator" and no one already has the file open. We therefore
411+
// can detect the scenario where we want to perform launch actions by
412+
// using a flag in the lock file: we begin a frozen read transaction,
413+
// attempt to atomically set the flag, and if we were able we proceeed
414+
// to perform the launch actions present in that frozen version.
415+
//
416+
// In the simple scenario of an unshared metadata Realm which is only
417+
// ever accessed by a single process, this all behaves identically to
418+
// the naive solution of always processing all launch actions on launch.
419+
// If the metadata Realm was already open in another process, the flag
420+
// will already be set and we'll skip performing launch actions. If two
421+
// processes open the metadata Realm at once we get to the complicated
422+
// scenario: one of them will successfully set the flag and the other
423+
// will fail. The one which failed may then go on to perform writes on
424+
// the metadata Realm, possibly creating new launch actions. This is why
425+
// we need the frozen read transaction created *before* trying to set
426+
// the flag. The process which does successfully set the flag will only
427+
// process launch actions present in that frozen read, and thus only ones
428+
// which already existing before it set the flag. Any new actions created
429+
// by the second process won't be visible and will wait for the next
430+
// launch.
431+
//
432+
// User cleanup is made slightly more complicated by that users can be
433+
// logged back in after being removed. To handle this, we only delete
434+
// users (and their files) if the user is Removed in both the frozen
435+
// transaction and inside the write transaction. If a second process
436+
// logs the user back in between when we start the frozen read and when
437+
// we acquire the write lock we'll skip it, and if it tries to log in
438+
// after we acquire the write lock it'll be blocked by that and only
439+
// log in after we have completed cleanup.
440+
//
441+
// This scheme is only fully safe if synchronized Realms are only ever
442+
// opened using a non-Removed user, and not by using the fake sync history
443+
// mode where no user is provided. That mode is hopefully only ever used
444+
// by Realm Studio.
445+
//
446+
// To avoid a lockfile format change, the flag used happens to be the
447+
// sync agent flag, which is otherwise unused for the metadata Realm
448+
// (which is a local unsynchronized Realm). The use of this flag should
449+
// not be confused for there actually being a sync agent for the
450+
// metadata Realm.
451+
auto frozen = realm->freeze();
381452
if (m_coordinator->try_claim_sync_agent()) {
382453
realm->begin_transaction();
383-
perform_file_actions(*realm, file_manager);
384-
remove_dead_users(*realm, file_manager);
454+
perform_file_actions(frozen->read_group(), realm->read_group(), file_manager);
455+
remove_dead_users(frozen->read_group(), realm->read_group(), file_manager);
385456
realm->commit_transaction();
386457
}
387458
}
@@ -391,17 +462,39 @@ struct PersistedSyncMetadataManager : public app::MetadataStore {
391462
return m_coordinator->get_realm(util::Scheduler::make_dummy());
392463
}
393464

394-
void remove_dead_users(Realm& realm, SyncFileManager& file_manager)
465+
void for_each_obj(Group& frozen, Group& live, TableKey tk, util::FunctionRef<void(Obj&, Obj&)> fn)
395466
{
396-
auto& schema = m_user_schema;
397-
TableRef table = realm.read_group().get_table(schema.table_key);
398-
for (auto obj : *table) {
399-
if (static_cast<SyncUser::State>(obj.get<int64_t>(schema.state_col)) == SyncUser::State::Removed) {
400-
delete_user_realms(file_manager, obj);
467+
auto frozen_table = frozen.get_table(tk);
468+
if (frozen_table->is_empty())
469+
return;
470+
471+
// We want to iterate the objects present in both the before and after
472+
// realms. Any other objects are either no longer relevant or too new.
473+
TableRef table = live.get_table(tk);
474+
for (auto frozen_obj : *frozen_table) {
475+
if (auto obj = table->get_object(frozen_obj.get_key())) {
476+
fn(frozen_obj, obj);
401477
}
402478
}
403479
}
404480

481+
void remove_dead_users(Group& frozen, Group& live, SyncFileManager& file_manager)
482+
{
483+
auto& schema = m_user_schema;
484+
for_each_obj(frozen, live, schema.table_key, [&](Obj& frozen_obj, Obj& live_obj) {
485+
// The frozen object being removed but not the live object means that
486+
// another process logged the user back in. The live object being
487+
// removed but not the frozen one means that the removal happened
488+
// after we acquired the sync agent, and so we shouldn't process
489+
// the user yet.
490+
if (frozen_obj.get<UserState>(schema.state_col) != UserState::Removed)
491+
return;
492+
if (live_obj.get<UserState>(schema.state_col) != UserState::Removed)
493+
return;
494+
delete_user_realms(file_manager, live_obj);
495+
});
496+
}
497+
405498
void delete_user_realms(SyncFileManager& file_manager, Obj& obj)
406499
{
407500
Set<StringData> paths = obj.get_set<StringData>(m_user_schema.realm_file_paths_col);
@@ -455,16 +548,12 @@ struct PersistedSyncMetadataManager : public app::MetadataStore {
455548
return false;
456549
}
457550

458-
void perform_file_actions(Realm& realm, SyncFileManager& file_manager)
551+
void perform_file_actions(Group& frozen, Group& live, SyncFileManager& file_manager)
459552
{
460-
TableRef table = realm.read_group().get_table(m_file_action_schema.table_key);
461-
if (table->is_empty())
462-
return;
463-
464-
for (auto obj : *table) {
553+
for_each_obj(frozen, live, m_file_action_schema.table_key, [&](Obj&, Obj& obj) {
465554
if (perform_file_action(file_manager, obj))
466555
obj.remove();
467-
}
556+
});
468557
}
469558

470559
bool immediately_run_file_actions(SyncFileManager& file_manager, std::string_view realm_path) override
@@ -514,7 +603,7 @@ struct PersistedSyncMetadataManager : public app::MetadataStore {
514603
current_user.set<String>(m_current_user_schema.user_id, user_id);
515604
}
516605

517-
obj.set(schema.state_col, (int64_t)SyncUser::State::LoggedIn);
606+
obj.set(schema.state_col, (int64_t)UserState::LoggedIn);
518607
obj.set<String>(schema.refresh_token_col, refresh_token);
519608
obj.set<String>(schema.access_token_col, access_token);
520609
obj.set<String>(schema.device_id_col, device_id);
@@ -537,8 +626,7 @@ struct PersistedSyncMetadataManager : public app::MetadataStore {
537626
auto& data = *opt_data;
538627
update_fn(data);
539628

540-
obj.set(schema.state_col,
541-
int64_t(data.access_token ? SyncUser::State::LoggedIn : SyncUser::State::LoggedOut));
629+
obj.set(schema.state_col, int64_t(data.access_token ? UserState::LoggedIn : UserState::LoggedOut));
542630
obj.set<String>(schema.refresh_token_col, data.refresh_token.token);
543631
obj.set<String>(schema.access_token_col, data.access_token.token);
544632
obj.set<String>(schema.device_id_col, data.device_id);
@@ -587,13 +675,13 @@ struct PersistedSyncMetadataManager : public app::MetadataStore {
587675
if (!obj) {
588676
return {};
589677
}
590-
auto state = SyncUser::State(obj.get<Int>(m_user_schema.state_col));
591-
if (state == SyncUser::State::Removed) {
678+
auto state = obj.get<UserState>(m_user_schema.state_col);
679+
if (state == UserState::Removed) {
592680
return {};
593681
}
594682

595683
UserData data;
596-
if (state == SyncUser::State::LoggedIn) {
684+
if (state == UserState::LoggedIn) {
597685
try {
598686
data.access_token = RealmJWT(get_string(obj, m_user_schema.access_token_col));
599687
data.refresh_token = RealmJWT(get_string(obj, m_user_schema.refresh_token_col));
@@ -637,9 +725,9 @@ struct PersistedSyncMetadataManager : public app::MetadataStore {
637725
}
638726
}
639727

640-
void log_out(std::string_view user_id, SyncUser::State new_state) override
728+
void log_out(std::string_view user_id, UserState new_state) override
641729
{
642-
REALM_ASSERT(new_state != SyncUser::State::LoggedIn);
730+
REALM_ASSERT(new_state != UserState::LoggedIn);
643731
auto realm = get_realm();
644732
realm->begin_transaction();
645733
if (auto obj = find_user(*realm, user_id)) {
@@ -677,7 +765,7 @@ struct PersistedSyncMetadataManager : public app::MetadataStore {
677765
// This is overly cautious and merely checking the state should suffice,
678766
// but because this is a persisted file that can be modified it's possible
679767
// to get invalid combinations of data.
680-
return obj && obj.get<int64_t>(m_user_schema.state_col) == int64_t(SyncUser::State::LoggedIn) &&
768+
return obj && obj.get<UserState>(m_user_schema.state_col) == UserState::LoggedIn &&
681769
RealmJWT::validate(get_string(obj, m_user_schema.access_token_col)) &&
682770
RealmJWT::validate(get_string(obj, m_user_schema.refresh_token_col));
683771
}
@@ -689,7 +777,7 @@ struct PersistedSyncMetadataManager : public app::MetadataStore {
689777
std::vector<std::string> users;
690778
users.reserve(table->size());
691779
for (auto& obj : *table) {
692-
if (obj.get<int64_t>(m_user_schema.state_col) != int64_t(SyncUser::State::Removed)) {
780+
if (obj.get<UserState>(m_user_schema.state_col) != UserState::Removed) {
693781
users.emplace_back(obj.get<String>(m_user_schema.user_id_col));
694782
}
695783
}

0 commit comments

Comments
 (0)