Skip to content

Commit 468d5a2

Browse files
authored
chore: passert failure should flush logs (#5682)
Resolves CORE-325. One of the concerns reported in [CORE-290](https://linear.app/tiledb/issue/CORE-290/consolidation-stdbad-alloc) was that the process aborting did not flush logs and consequently did not leave a lot of clues about what happened. We cannot always intercept events which lead to process shutdown, but one place we have managed to do so is assertion failures in core-owned code. #5514 added `iassert` and `passert` routines, which throws an exception or halts the process in the event of a failure respectively; #5532 added support to run arbitrary callbacks in the event of a `passert` failure. This pull request leverages that capability to flush all logs in the event of a `passert` failure so that if a customer sees a `passert` failure we will know that the logs are up to date. --- TYPE: NO_HISTORY DESC: Flush logs if there is a `passert` failure
1 parent 93a2621 commit 468d5a2

File tree

8 files changed

+237
-29
lines changed

8 files changed

+237
-29
lines changed

scripts/linter.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
heap_memory_ignored_files = frozenset(
1818
[
1919
"allocate_unique.h",
20+
'assert.cc',
21+
'assert.h',
2022
"dynamic_memory.h",
2123
"unit_dynamic_memory.cc",
2224
"heap_profiler.h",

test/ci/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ target_include_directories(test_assert
99
Catch2::Catch2
1010
)
1111

12-
target_link_libraries(test_assert PUBLIC tiledb_Catch2WithMain)
12+
target_link_libraries(test_assert PUBLIC tiledb_Catch2WithMain assert)
1313

1414
target_compile_definitions(
1515
test_assert
@@ -23,7 +23,7 @@ add_executable(
2323
try_assert.cc
2424
)
2525

26-
target_link_libraries(try_assert PUBLIC assert)
26+
target_link_libraries(try_assert PUBLIC assert baseline)
2727

2828
target_include_directories(try_assert
2929
PRIVATE

test/ci/test_assert.cc

Lines changed: 99 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,19 @@
3636

3737
#include <algorithm>
3838
#include <cassert>
39+
#include <filesystem>
40+
#include <fstream>
3941
#include <iostream>
4042
#include <list>
4143

44+
#include "tiledb/common/assert.h"
45+
46+
#ifdef TILEDB_ASSERTIONS
47+
static constexpr bool builtWithAssertions = true;
48+
#else
49+
static constexpr bool builtWithAssertions = false;
50+
#endif
51+
4252
#ifdef _WIN32
4353
std::vector<int> assert_exit_codes{3};
4454
#else
@@ -50,24 +60,56 @@ std::vector<int> assert_exit_codes{
5060
#endif
5161

5262
TEST_CASE("CI: Test assertions configuration", "[ci][assertions]") {
53-
int retval = system(TILEDB_PATH_TO_TRY_ASSERT);
63+
char command[2048];
64+
std::optional<std::filesystem::path> try_assert_logfile;
65+
if (builtWithAssertions) {
66+
try_assert_logfile =
67+
std::filesystem::temp_directory_path() / "try_assert.log";
68+
sprintf(
69+
command,
70+
"%s %s",
71+
TILEDB_PATH_TO_TRY_ASSERT,
72+
try_assert_logfile.value().string().c_str());
73+
} else {
74+
sprintf(command, "%s", TILEDB_PATH_TO_TRY_ASSERT);
75+
}
76+
77+
const int retval = system(command);
5478

5579
// in case value is one not currently accepted, report what was returned.
5680
std::cout << "retval is " << retval << " (0x" << std::hex << retval
5781
<< ") from " << TILEDB_PATH_TO_TRY_ASSERT << std::endl;
5882

59-
#ifdef TILEDB_ASSERTIONS
60-
REQUIRE(
61-
std::find(assert_exit_codes.begin(), assert_exit_codes.end(), retval) !=
62-
assert_exit_codes.end());
63-
#else
64-
(void)assert_exit_codes;
65-
REQUIRE(retval == 0);
66-
#endif
83+
if (builtWithAssertions) {
84+
REQUIRE(
85+
std::find(assert_exit_codes.begin(), assert_exit_codes.end(), retval) !=
86+
assert_exit_codes.end());
87+
88+
// the log should have flushed due to passert callback
89+
std::vector<std::string> logcontents;
90+
{
91+
std::ifstream logstream(try_assert_logfile.value());
92+
REQUIRE(logstream.is_open());
93+
94+
std::string logline;
95+
while (std::getline(logstream, logline)) {
96+
logcontents.push_back(logline);
97+
}
98+
}
99+
std::filesystem::remove(try_assert_logfile.value());
100+
101+
REQUIRE(logcontents.size() == 1);
102+
if (!logcontents.empty()) {
103+
REQUIRE(logcontents[0].find("begin passert(false)") != std::string::npos);
104+
}
105+
} else {
106+
(void)assert_exit_codes;
107+
REQUIRE(retval == 0);
108+
}
67109
}
68110

69111
TEST_CASE("CI: Test libc assertions configuration", "[ci][assertions]") {
70-
int retval = system(TILEDB_PATH_TO_TRY_LIBC_ASSERT);
112+
const int retval = system(TILEDB_PATH_TO_TRY_LIBC_ASSERT);
71113

72114
// report return value
73115
std::cout << "retval is " << retval << " (0x" << std::hex << retval
@@ -102,3 +144,50 @@ TEST_CASE("CI: Test libc assertions configuration", "[ci][assertions]") {
102144
REQUIRE(retval == 0);
103145
}
104146
}
147+
148+
TEST_CASE("CI: passert callback registration non-FIFO", "[assertions]") {
149+
int x = 0;
150+
int y = 100;
151+
int z = 10000;
152+
std::vector<int> order_;
153+
154+
using R = tiledb::common::PAssertFailureCallbackRegistration;
155+
156+
std::unique_ptr<R> inc_x(new R([&]() { order_.push_back(x++); }));
157+
std::unique_ptr<R> inc_y(new R([&]() { order_.push_back(y++); }));
158+
std::unique_ptr<R> inc_z(new R([&]() { order_.push_back(z++); }));
159+
160+
REQUIRE(order_.empty());
161+
162+
tiledb::common::passert_failure_run_callbacks();
163+
CHECK(order_ == std::vector<int>{10000, 100, 0});
164+
order_.clear();
165+
166+
tiledb::common::passert_failure_run_callbacks();
167+
CHECK(order_ == std::vector<int>{10001, 101, 1});
168+
order_.clear();
169+
170+
SECTION("Remove x") {
171+
inc_x.reset();
172+
173+
tiledb::common::passert_failure_run_callbacks();
174+
CHECK(order_ == std::vector<int>{10002, 102});
175+
order_.clear();
176+
}
177+
178+
SECTION("Remove y") {
179+
inc_y.reset();
180+
181+
tiledb::common::passert_failure_run_callbacks();
182+
CHECK(order_ == std::vector<int>{10002, 2});
183+
order_.clear();
184+
}
185+
186+
SECTION("Remove z") {
187+
inc_z.reset();
188+
189+
tiledb::common::passert_failure_run_callbacks();
190+
CHECK(order_ == std::vector<int>{102, 2});
191+
order_.clear();
192+
}
193+
}

test/ci/try_assert.cc

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,28 @@
11
#include <cassert>
2+
#include <filesystem>
23
#include <iostream>
34

5+
#include <spdlog/sinks/basic_file_sink.h>
6+
#include <spdlog/spdlog.h>
7+
48
#include "tiledb/common/assert.h"
9+
#include "tiledb/common/logger.h"
510

611
#if defined(_MSC_VER)
712
#include <crtdbg.h>
813
#endif
914

10-
int main(int, char**) {
15+
int main(int argc, char** argv) {
16+
std::optional<tiledb::common::Logger> log;
17+
if (argc > 1) {
18+
const auto logname =
19+
std::filesystem::path(std::string(argv[0])).filename().string();
20+
const auto logfile = std::filesystem::path(std::string(argv[1]));
21+
22+
auto spdlogger = spdlog::basic_logger_mt(logname, logfile.string());
23+
log.emplace(spdlogger);
24+
}
25+
1126
#if defined(_MSC_VER)
1227
// We disable the following events on abort:
1328
// _WRITE_ABORT_MSG: Display message box with Abort, Retry, Ignore
@@ -19,8 +34,16 @@ int main(int, char**) {
1934
_CrtSetReportMode(_CRT_ASSERT, _CRTDBG_MODE_FILE);
2035
_CrtSetReportFile(_CRT_ASSERT, _CRTDBG_FILE_STDERR);
2136
#endif
37+
if (log.has_value()) {
38+
log.value().error("begin passert(false)");
39+
}
40+
2241
passert(false);
2342

43+
if (log.has_value()) {
44+
log.value().error("end passert(false)");
45+
}
46+
2447
std::cout << "Assert did not exit!" << std::endl;
2548
return 0;
2649
}

tiledb/common/assert.cc

Lines changed: 44 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -45,17 +45,45 @@ AssertFailure::AssertFailure(const std::string& what)
4545
throw AssertFailure(file, line, expr);
4646
}
4747

48-
static std::mutex passertFailureCallbackMutex;
49-
static std::vector<std::function<void()>> passertFailureCallbacks;
48+
struct PAssertFailureCallbackProcessState {
49+
using self_type = PAssertFailureCallbackProcessState;
5050

51-
[[noreturn]] void passert_failure_abort(void) {
52-
{
53-
for (auto& callback : passertFailureCallbacks) {
54-
std::unique_lock<std::mutex> lk(passertFailureCallbackMutex);
55-
callback();
51+
static std::shared_ptr<self_type> get() {
52+
static std::shared_ptr<self_type> singleton;
53+
if (singleton == nullptr) {
54+
singleton.reset(new self_type());
5655
}
56+
return singleton;
57+
}
58+
59+
std::mutex mutex_;
60+
std::list<std::function<void()>> callbacks_;
61+
62+
private:
63+
PAssertFailureCallbackProcessState() {
64+
}
65+
};
66+
67+
void passert_failure_run_callbacks(void) {
68+
auto state = PAssertFailureCallbackProcessState::get();
69+
if (!state) {
70+
// NB: we expect this to be unreachable.
71+
// `!state` should only happen if the singleton has been
72+
// destructed, which should only occur when the shared library
73+
// is unloaded or when the process exits. In either case we
74+
// should not see a `passert` failure at that point - or there
75+
// are worse problems.
76+
return;
5777
}
5878

79+
std::unique_lock<std::mutex> lk(state->mutex_);
80+
for (auto& callback : state->callbacks_) {
81+
callback();
82+
}
83+
}
84+
85+
[[noreturn]] void passert_failure_abort(void) {
86+
passert_failure_run_callbacks();
5987
std::abort();
6088
}
6189

@@ -71,15 +99,19 @@ static std::vector<std::function<void()>> passertFailureCallbacks;
7199
#ifdef TILEDB_ASSERTIONS
72100

73101
PAssertFailureCallbackRegistration::PAssertFailureCallbackRegistration(
74-
std::function<void()>&& callback) {
75-
std::unique_lock<std::mutex> lk(passertFailureCallbackMutex);
76-
passertFailureCallbacks.push_back(
102+
std::function<void()>&& callback)
103+
: process_state_(PAssertFailureCallbackProcessState::get()) {
104+
std::unique_lock<std::mutex> lk(process_state_->mutex_);
105+
106+
process_state_->callbacks_.push_front(
77107
std::forward<std::function<void()>>(callback));
108+
109+
callback_node_ = process_state_->callbacks_.begin();
78110
}
79111

80112
PAssertFailureCallbackRegistration::~PAssertFailureCallbackRegistration() {
81-
std::unique_lock<std::mutex> lk(passertFailureCallbackMutex);
82-
passertFailureCallbacks.pop_back();
113+
std::unique_lock<std::mutex> lk(process_state_->mutex_);
114+
process_state_->callbacks_.erase(callback_node_);
83115
}
84116

85117
#endif

tiledb/common/assert.h

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,10 @@
9090
#include <cstdlib>
9191
#include <functional>
9292
#include <iostream>
93+
#include <list>
94+
#include <memory>
95+
96+
#include "tiledb/common/macros.h"
9397

9498
#define __SOURCE__ (&__FILE__[__SOURCE_DIR_PATH_SIZE__])
9599

@@ -169,6 +173,18 @@ template <typename... Args>
169173
throw AssertFailure(file, line, expr, fmt, std::forward<Args>(fmt_args)...);
170174
}
171175

176+
/**
177+
* Structure holding the process-level state required to run `passert`
178+
* failure callbacks.
179+
*/
180+
struct PAssertFailureCallbackProcessState;
181+
182+
/**
183+
* Runs registered callbacks prior to aborting the process in the event of a
184+
* `passert` failure. See `PAssertFailureCallbackRegistration`.
185+
*/
186+
void passert_failure_run_callbacks(void);
187+
172188
/**
173189
* Aborts the process upon `passert` failure.
174190
*/
@@ -217,16 +233,34 @@ template <typename... Args>
217233
* Registers a callback to run upon `passert` failure.
218234
* This can be used to print any diagnostic info prior to aborting the process.
219235
*/
220-
struct PAssertFailureCallbackRegistration {
221236
#ifdef TILEDB_ASSERTIONS
237+
238+
struct PAssertFailureCallbackRegistration {
222239
PAssertFailureCallbackRegistration(std::function<void()>&& callback);
223240
~PAssertFailureCallbackRegistration();
241+
DISABLE_COPY_AND_COPY_ASSIGN(PAssertFailureCallbackRegistration);
242+
243+
private:
244+
/**
245+
* Lazy initialization of global process state.
246+
* This avoids the "static initialization order fiasco".
247+
*/
248+
std::shared_ptr<PAssertFailureCallbackProcessState> process_state_;
249+
250+
/** Reference to the registered callback for deregistering */
251+
std::list<std::function<void()>>::const_iterator callback_node_;
252+
};
253+
224254
#else
255+
256+
struct PAssertFailureCallbackRegistration {
225257
PAssertFailureCallbackRegistration(std::function<void()>&&) {
226258
}
227-
#endif
259+
DISABLE_COPY_AND_COPY_ASSIGN(PAssertFailureCallbackRegistration);
228260
};
229261

262+
#endif
263+
230264
} // namespace tiledb::common
231265

232266
#ifdef TILEDB_ASSERTIONS

tiledb/common/logger.cc

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,8 @@ Logger::Logger(
7373
const Logger::Format format,
7474
const bool root)
7575
: name_(name)
76-
, root_(root) {
76+
, root_(root)
77+
, passert_callback_flush_([this]() { flush(); }) {
7778
logger_ = spdlog::get(name_);
7879
if (logger_ == nullptr) {
7980
#ifdef _WIN32
@@ -92,7 +93,8 @@ Logger::Logger(
9293
set_format(format);
9394
}
9495

95-
Logger::Logger(shared_ptr<spdlog::logger> logger) {
96+
Logger::Logger(shared_ptr<spdlog::logger> logger)
97+
: passert_callback_flush_([this]() { flush(); }) {
9698
logger_ = std::move(logger);
9799
}
98100

@@ -312,6 +314,12 @@ std::string Logger::add_tag(const std::string& tag, uint64_t id) {
312314
return tags;
313315
}
314316

317+
void Logger::flush() const {
318+
if (logger_) {
319+
logger_->flush();
320+
}
321+
}
322+
315323
/* ********************************* */
316324
/* GLOBAL */
317325
/* ********************************* */

0 commit comments

Comments
 (0)