Skip to content

Commit e6dfe4f

Browse files
davisprroelke
andauthored
Fix segfault race condition at process exit (#5477)
I discovered a couple race conditions leading to segfaults in a separate repository. After debugging I managed to narrow this down to issues around the lifetimes of our static global state instances. As these instances are all destructed as the main thread exits, anything in separate threads can end up attempting to reuse these instances after they have been destructed. The test included in this PR doesn't actually use threads due to flakiness concerns. Instead we can observe the same phenomenon by attempting to store a `static std::optional<Context>` instance which ends up having its destructor run after the destructors for GlobalState and the root Logger. This change updates the GlobalState to be a `static std::shared_ptr<GlobalState>` and the `Logger` to be a `static Logger*`. In the case of `GlobalState` we then just rely on shared_ptr reference counting to ensure that the instance is alive for as long as a `Context` needs it. The `Logger*` on the other hand is just never delete'd so that any log messages generated are included rather than silencing any thread after the main thread exits. --- TYPE: BUG DESC: Fix segfault race condition at process exit --------- Co-authored-by: Ryan Roelke <[email protected]>
1 parent 101bfa6 commit e6dfe4f

File tree

12 files changed

+148
-15
lines changed

12 files changed

+148
-15
lines changed

tiledb/common/logger.cc

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -329,8 +329,12 @@ std::string global_logger_name(const Logger::Format format) {
329329
}
330330

331331
Logger& global_logger(Logger::Format format) {
332-
static Logger l(global_logger_name(format), Logger::Level::ERR, format, true);
333-
return l;
332+
// Note: We are *not* deallocating this root `Logger` instance so that
333+
// during process exit, threads other than main will not trigger segfault's
334+
// if they attempt to log after the main thread has exited.
335+
static Logger* l = tiledb_new<Logger>(
336+
HERE(), global_logger_name(format), Logger::Level::ERR, format, true);
337+
return *l;
334338
}
335339

336340
/** Logs a trace. */

tiledb/sm/cpp_api/config.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838

3939
#include <map>
4040
#include <memory>
41+
#include <optional>
4142
#include <string>
4243
#include <unordered_map>
4344

tiledb/sm/global_state/global_state.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,9 @@ using namespace tiledb::common;
4747

4848
namespace tiledb::sm::global_state {
4949

50-
GlobalState& GlobalState::GetGlobalState() {
50+
shared_ptr<GlobalState> GlobalState::GetGlobalState() {
5151
// This is thread-safe in C++11.
52-
static GlobalState globalState;
52+
static shared_ptr<GlobalState> globalState = make_shared<GlobalState>(HERE());
5353
return globalState;
5454
}
5555

tiledb/sm/global_state/global_state.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
#include <set>
3838
#include <string>
3939

40+
#include "tiledb/common/common.h"
4041
#include "tiledb/sm/config/config.h"
4142
#include "tiledb/sm/storage_manager/storage_manager_declaration.h"
4243

@@ -59,8 +60,11 @@ class GlobalState {
5960
GlobalState& operator=(const GlobalState&) = delete;
6061
GlobalState& operator=(const GlobalState&&) = delete;
6162

63+
/** Constructor. */
64+
GlobalState();
65+
6266
/** Returns a reference to the singleton GlobalState instance. */
63-
static GlobalState& GetGlobalState();
67+
static shared_ptr<GlobalState> GetGlobalState();
6468

6569
/**
6670
* Initializes all TileDB global state in an idempotent and threadsafe way.
@@ -101,9 +105,6 @@ class GlobalState {
101105

102106
/** Mutex protecting list of StorageManagers. */
103107
std::mutex storage_managers_mtx_;
104-
105-
/** Constructor. */
106-
GlobalState();
107108
};
108109

109110
} // namespace global_state

tiledb/sm/global_state/watchdog.cc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,13 +73,15 @@ void Watchdog::watchdog_thread(Watchdog* watchdog) {
7373
return;
7474
}
7575

76+
auto global_state = GlobalState::GetGlobalState();
77+
7678
while (true) {
7779
std::unique_lock<std::mutex> lck(watchdog->mtx_);
7880
watchdog->cv_.wait_for(
7981
lck, std::chrono::milliseconds(constants::watchdog_thread_sleep_ms));
8082

8183
if (SignalHandlers::signal_received()) {
82-
for (auto* sm : GlobalState::GetGlobalState().storage_managers()) {
84+
for (auto* sm : global_state->storage_managers()) {
8385
throw_if_not_ok(sm->cancel_all_tasks());
8486
}
8587
}

tiledb/sm/global_state/watchdog.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@
3737
#include <mutex>
3838
#include <thread>
3939

40+
#include "tiledb/sm/global_state/global_state.h"
41+
4042
namespace tiledb {
4143
namespace sm {
4244
namespace global_state {

tiledb/sm/storage_manager/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,5 @@ commence(object_library context_resources)
3333
this_target_sources(context_resources.cc)
3434
this_target_object_libraries(baseline config rest_client stats thread_pool vfs)
3535
conclude(object_library)
36+
37+
add_test_subdirectory()

tiledb/sm/storage_manager/storage_manager.cc

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -60,18 +60,17 @@ StorageManagerCanonical::StorageManagerCanonical(
6060
ContextResources& resources,
6161
const shared_ptr<Logger>&, // unused
6262
const Config& config)
63-
: vfs_(resources.vfs())
63+
: global_state_(global_state::GlobalState::GetGlobalState())
64+
, vfs_(resources.vfs())
6465
, cancellation_in_progress_(false)
6566
, config_(config)
6667
, queries_in_progress_(0) {
67-
auto& global_state = global_state::GlobalState::GetGlobalState();
68-
global_state.init(config_);
69-
70-
global_state.register_storage_manager(this);
68+
global_state_->init(config_);
69+
global_state_->register_storage_manager(this);
7170
}
7271

7372
StorageManagerCanonical::~StorageManagerCanonical() {
74-
global_state::GlobalState::GetGlobalState().unregister_storage_manager(this);
73+
global_state_->unregister_storage_manager(this);
7574

7675
throw_if_not_ok(cancel_all_tasks());
7776

tiledb/sm/storage_manager/storage_manager_canonical.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
#include "tiledb/sm/array/array_directory.h"
5353
#include "tiledb/sm/enums/walk_order.h"
5454
#include "tiledb/sm/filesystem/uri.h"
55+
#include "tiledb/sm/global_state/global_state.h"
5556
#include "tiledb/sm/group/group.h"
5657
#include "tiledb/sm/misc/cancelable_tasks.h"
5758
#include "tiledb/sm/misc/types.h"
@@ -146,6 +147,9 @@ class StorageManagerCanonical {
146147
/* PRIVATE ATTRIBUTES */
147148
/* ********************************* */
148149

150+
/** The GlobalState to use for this StorageManager. */
151+
shared_ptr<global_state::GlobalState> global_state_;
152+
149153
/**
150154
* VFS instance used in `cancel_all_tasks`.
151155
*/
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
#
2+
# tiledb/sm/storage_manager/CMakeLists.txt
3+
#
4+
# The MIT License
5+
#
6+
# Copyright (c) 2025 TileDB, Inc.
7+
#
8+
# Permission is hereby granted, free of charge, to any person obtaining a copy
9+
# of this software and associated documentation files (the "Software"), to deal
10+
# in the Software without restriction, including without limitation the rights
11+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12+
# copies of the Software, and to permit persons to whom the Software is
13+
# furnished to do so, subject to the following conditions:
14+
#
15+
# The above copyright notice and this permission notice shall be included in
16+
# all copies or substantial portions of the Software.
17+
#
18+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24+
# THE SOFTWARE.
25+
#
26+
27+
include(unit_test)
28+
29+
commence(unit_test static_context)
30+
this_target_sources(main.cc unit_static_context.cc)
31+
target_include_directories(unit_static_context PRIVATE
32+
"$<TARGET_PROPERTY:TILEDB_CORE_OBJECTS,INCLUDE_DIRECTORIES>")
33+
this_target_link_libraries(TILEDB_CORE_OBJECTS)
34+
this_target_link_libraries(tiledb_test_support_lib)
35+
conclude(unit_test)

0 commit comments

Comments
 (0)