From 18e55c476411d08f129f4e4ff9c86f8cd16e65a6 Mon Sep 17 00:00:00 2001 From: Artyom Ivanov Date: Thu, 17 Jul 2025 15:51:19 +0300 Subject: [PATCH 1/4] fix(shutdown): Prevent race condition when ~GlobalObjectHolder() routine unlocks global mutex Unlocking global mutex in GlobalObject destruction routine made it possible for a new attachment to slip in, so it will be creating new GlobalObject and using it, while destroying routine still in action. This can lead to an undefined state of the global objects, such as shared memory, where one thread is actively using it while another thread is destroying it. --- src/jrd/Database.cpp | 14 ++++++++++++++ src/jrd/Database.h | 2 ++ 2 files changed, 16 insertions(+) diff --git a/src/jrd/Database.cpp b/src/jrd/Database.cpp index f2a5dfe51cc..4ed026a8839 100644 --- a/src/jrd/Database.cpp +++ b/src/jrd/Database.cpp @@ -586,6 +586,15 @@ namespace Jrd { MutexLockGuard guard(g_mutex, FB_FUNCTION); + while (g_shuttingDown) + { + // Currently other GlobalObject is in shutdown process but he unlock the mutex for some reason, + // so it's better to wait until this process is over. + // This is done to eliminate potential race conditions involving global objects, such as shared memory. + MutexUnlockGuard unlockGuard(g_mutex, FB_FUNCTION); + Thread::yield(); + } + Database::GlobalObjectHolder::DbId* entry = g_hashTable->lookup(id); if (!entry) { @@ -606,6 +615,11 @@ namespace Jrd fb_assert(false); { // scope + // We need to unlock mutex to safely shutdown some managers, so set shutdown flag to make sure that + // other threads will wait until we done our shutdown routine. + // This is done to eliminate potential race conditions involving global objects, such as shared memory. + AutoSetRestore shuttingDownGuard(&g_shuttingDown, true); + // here we cleanup what should not be globally protected MutexUnlockGuard guard(g_mutex, FB_FUNCTION); if (m_replMgr) diff --git a/src/jrd/Database.h b/src/jrd/Database.h index a2739a86308..8a0b3d5a876 100644 --- a/src/jrd/Database.h +++ b/src/jrd/Database.h @@ -290,6 +290,8 @@ class Database : public pool_alloc static Firebird::GlobalPtr g_hashTable; static Firebird::GlobalPtr g_mutex; + // It doesn't need to be atomic because all accesses is done under the mutex + static inline bool g_shuttingDown = false; public: static GlobalObjectHolder* init(const Firebird::string& id, From 57024acfdc910889bfc04c44877f035b4af318b7 Mon Sep 17 00:00:00 2001 From: Artyom Ivanov Date: Fri, 18 Jul 2025 14:17:01 +0300 Subject: [PATCH 2/4] fix(shutdown): Reimplement synchronization between shutdown and init routine for GlobalObjectHolder --- src/common/classes/RefCounted.h | 5 +++ src/jrd/Database.cpp | 56 ++++++++++++++++++++++----------- src/jrd/Database.h | 4 ++- 3 files changed, 46 insertions(+), 19 deletions(-) diff --git a/src/common/classes/RefCounted.h b/src/common/classes/RefCounted.h index 2a2ce5bacaa..8397fc723f1 100644 --- a/src/common/classes/RefCounted.h +++ b/src/common/classes/RefCounted.h @@ -48,6 +48,11 @@ namespace Firebird return refCnt; } + AtomicCounter::counter_type getRefCount() const noexcept + { + return m_refCnt.value(); + } + void assertNonZero() { fb_assert(m_refCnt.value() > 0); diff --git a/src/jrd/Database.cpp b/src/jrd/Database.cpp index 4ed026a8839..ce5a3e2b05b 100644 --- a/src/jrd/Database.cpp +++ b/src/jrd/Database.cpp @@ -586,21 +586,39 @@ namespace Jrd { MutexLockGuard guard(g_mutex, FB_FUNCTION); - while (g_shuttingDown) + // Get entry with incrementing ref counter, so if someone is currently destroying it, the object itself + // will remain alive. + RefPtr entry(g_hashTable->lookup(id)); + if (entry) { - // Currently other GlobalObject is in shutdown process but he unlock the mutex for some reason, - // so it's better to wait until this process is over. - // This is done to eliminate potential race conditions involving global objects, such as shared memory. - MutexUnlockGuard unlockGuard(g_mutex, FB_FUNCTION); - Thread::yield(); - } + auto& shutdownMutex = entry->shutdownMutex; + // Check if someone else currently destroying GlobalObject. + if (shutdownMutex.tryEnter(FB_FUNCTION)) + { + // No one is destroying GlobalObject, continue init routine. + shutdownMutex.leave(); + } + else + { + // Someone is currently destroying GlobalObject, wait until he finish it to eliminate potential + // race conditions. + { + MutexUnlockGuard unlockGuard(g_mutex, FB_FUNCTION); - Database::GlobalObjectHolder::DbId* entry = g_hashTable->lookup(id); + MutexLockGuard guard(shutdownMutex, FB_FUNCTION); + } + // Now we are the one who owned DbId object. + // It also was removed from hash table, so simply delete it and recreate it next. + fb_assert(entry->getRefCount() == 1); + entry = nullptr; + } + } if (!entry) { const auto holder = FB_NEW Database::GlobalObjectHolder(id, filename, config); - entry = FB_NEW Database::GlobalObjectHolder::DbId(id, holder); + entry = makeRef(FB_NEW Database::GlobalObjectHolder::DbId(id, holder)); g_hashTable->add(entry); + entry->addRef(); } entry->holder->addRef(); @@ -610,16 +628,17 @@ namespace Jrd Database::GlobalObjectHolder::~GlobalObjectHolder() { // dtor is executed under g_mutex protection - Database::GlobalObjectHolder::DbId* entry = g_hashTable->lookup(m_id); - if (!g_hashTable->remove(m_id)) - fb_assert(false); - { // scope - // We need to unlock mutex to safely shutdown some managers, so set shutdown flag to make sure that - // other threads will wait until we done our shutdown routine. - // This is done to eliminate potential race conditions involving global objects, such as shared memory. - AutoSetRestore shuttingDownGuard(&g_shuttingDown, true); + // Stole the object from the hash table without incrementing ref counter, so we will be the one who will delete the object + // at the end of this function. + RefPtr entry(REF_NO_INCR, g_hashTable->lookup(m_id)); + fb_assert(entry); + // We need to unlock the global mutex to safely shutdown some managers, so lock shutdown mutex to make sure that + // other threads will wait until we done our shutdown routine. + // This is done to eliminate potential race conditions involving global objects, such as shared memory. + MutexLockGuard guard(entry->shutdownMutex, FB_FUNCTION); + { // scope // here we cleanup what should not be globally protected MutexUnlockGuard guard(g_mutex, FB_FUNCTION); if (m_replMgr) @@ -630,7 +649,8 @@ namespace Jrd m_eventMgr = nullptr; m_replMgr = nullptr; - delete entry; + if (!g_hashTable->remove(m_id)) + fb_assert(false); fb_assert(m_tempCacheUsage == 0); } diff --git a/src/jrd/Database.h b/src/jrd/Database.h index 8a0b3d5a876..a0ef8035dc6 100644 --- a/src/jrd/Database.h +++ b/src/jrd/Database.h @@ -256,7 +256,7 @@ class Database : public pool_alloc typedef Firebird::HashTable DbIdHash; - struct DbId : public DbIdHash::Entry, public Firebird::GlobalStorage + struct DbId : public DbIdHash::Entry, public Firebird::GlobalStorage, public Firebird::RefCounted { DbId(const Firebird::string& x, GlobalObjectHolder* h) : id(getPool(), x), holder(h) @@ -286,6 +286,8 @@ class Database : public pool_alloc const Firebird::string id; GlobalObjectHolder* const holder; + + Firebird::Mutex shutdownMutex; }; static Firebird::GlobalPtr g_hashTable; From 62990429f58a0b007279c135bfb64bfee3a0bcf8 Mon Sep 17 00:00:00 2001 From: Artyom Ivanov Date: Fri, 18 Jul 2025 14:32:53 +0300 Subject: [PATCH 3/4] refactor(shutdown): Remove unused variable --- src/jrd/Database.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/jrd/Database.h b/src/jrd/Database.h index a0ef8035dc6..ede5262a33e 100644 --- a/src/jrd/Database.h +++ b/src/jrd/Database.h @@ -292,8 +292,6 @@ class Database : public pool_alloc static Firebird::GlobalPtr g_hashTable; static Firebird::GlobalPtr g_mutex; - // It doesn't need to be atomic because all accesses is done under the mutex - static inline bool g_shuttingDown = false; public: static GlobalObjectHolder* init(const Firebird::string& id, From 44fd279d95ac0a54cc8cd36fe4ceb98d1332362c Mon Sep 17 00:00:00 2001 From: Artyom Ivanov Date: Mon, 21 Jul 2025 14:56:30 +0300 Subject: [PATCH 4/4] refactor(shutdown): Code adjustments after Vlad review --- src/common/classes/RefCounted.h | 5 ----- src/jrd/Database.cpp | 7 ++++++- src/jrd/Database.h | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/common/classes/RefCounted.h b/src/common/classes/RefCounted.h index 8397fc723f1..2a2ce5bacaa 100644 --- a/src/common/classes/RefCounted.h +++ b/src/common/classes/RefCounted.h @@ -48,11 +48,6 @@ namespace Firebird return refCnt; } - AtomicCounter::counter_type getRefCount() const noexcept - { - return m_refCnt.value(); - } - void assertNonZero() { fb_assert(m_refCnt.value() > 0); diff --git a/src/jrd/Database.cpp b/src/jrd/Database.cpp index ce5a3e2b05b..803bb458a66 100644 --- a/src/jrd/Database.cpp +++ b/src/jrd/Database.cpp @@ -609,7 +609,7 @@ namespace Jrd } // Now we are the one who owned DbId object. // It also was removed from hash table, so simply delete it and recreate it next. - fb_assert(entry->getRefCount() == 1); + fb_assert(entry->holder == nullptr); entry = nullptr; } } @@ -633,9 +633,12 @@ namespace Jrd // at the end of this function. RefPtr entry(REF_NO_INCR, g_hashTable->lookup(m_id)); fb_assert(entry); + fb_assert(entry->holder == this); // We need to unlock the global mutex to safely shutdown some managers, so lock shutdown mutex to make sure that // other threads will wait until we done our shutdown routine. // This is done to eliminate potential race conditions involving global objects, such as shared memory. + //! Be careful with order of mutex locking: first - `g_mutex`, second - `shutdownMutex`, but after `MutexUnlockGuard` + //! this order will be reversed, so potentially a deadlock situation may occur here. MutexLockGuard guard(entry->shutdownMutex, FB_FUNCTION); { // scope @@ -652,6 +655,8 @@ namespace Jrd if (!g_hashTable->remove(m_id)) fb_assert(false); + entry->holder = nullptr; + fb_assert(m_tempCacheUsage == 0); } diff --git a/src/jrd/Database.h b/src/jrd/Database.h index ede5262a33e..47512bda677 100644 --- a/src/jrd/Database.h +++ b/src/jrd/Database.h @@ -285,8 +285,8 @@ class Database : public pool_alloc } const Firebird::string id; - GlobalObjectHolder* const holder; - + GlobalObjectHolder* holder; + // This mutex is working very tight with `g_mutex`, so use it carefully to avoid possible deadlocks. Firebird::Mutex shutdownMutex; };