Skip to content

Commit 9bed8e4

Browse files
committed
MB-42657: Write crash log messages to separate file
It is safe for the signal handler to use open/write/close, so to enhance the current signal safe crash handler: 1) Open a new file in the logging directory 2) Duplicate the stderr messages to the new file 3) On start-up check for the file and clone the contents to the memcached log file. The result is that the memcached log now includes (after a restart) a clear indication that a crash occurred. Change-Id: I733f300befd5e91dcec5f2ca899540016bd72fb7 Reviewed-on: http://review.couchbase.org/c/kv_engine/+/156115 Tested-by: Build Bot <[email protected]> Reviewed-by: Dave Rigby <[email protected]>
1 parent b55a3c3 commit 9bed8e4

File tree

4 files changed

+116
-56
lines changed

4 files changed

+116
-56
lines changed

daemon/memcached.cc

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -391,7 +391,7 @@ static void recalculate_max_connections() {
391391
}
392392

393393
static void breakpad_changed_listener(const std::string&, Settings &s) {
394-
cb::breakpad::initialize(s.getBreakpadSettings());
394+
cb::breakpad::initialize(s.getBreakpadSettings(), s.getLoggerConfig());
395395
}
396396

397397
static void verbosity_changed_listener(const std::string&, Settings &s) {
@@ -919,7 +919,8 @@ int memcached_main(int argc, char** argv) {
919919
/// Initialize breakpad crash catcher with our just-parsed settings
920920
Settings::instance().addChangeListener("breakpad",
921921
breakpad_changed_listener);
922-
cb::breakpad::initialize(Settings::instance().getBreakpadSettings());
922+
cb::breakpad::initialize(Settings::instance().getBreakpadSettings(),
923+
Settings::instance().getLoggerConfig());
923924

924925
LOG_INFO("Using SLA configuration: {}", cb::mcbp::sla::to_json().dump());
925926

tests/breakpad_test.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,8 @@ def set_core_file_ulimit():
261261
"stdin_listener": False,
262262
"root" : os.path.abspath(args.source_root),
263263
"verbosity" : 2,
264-
"rbac_file" : os.path.abspath(rbac_file.name)}
264+
"rbac_file" : os.path.abspath(rbac_file.name),
265+
"logger" : { "filename" : minidump_dir + "/log"}}
265266
config_json = json.dumps(config)
266267

267268
# Need a temporary file which can be opened (a second time) by memcached,
@@ -333,14 +334,16 @@ def set_core_file_ulimit():
333334

334335
if args.breakpad:
335336
# Check there is a minidump path in the output.
336-
m = re.search('Writing crash dump to ([\w\\\/\:\-.]+)', stderrdata)
337-
if not m:
337+
# MB-42657 means there can be multiple "Writing crash" messages, the last
338+
# one is the relevant message for this test.
339+
matches = re.findall('Writing crash dump to ([\w\\\/\:\-.]+)', stderrdata)
340+
if not matches:
338341
logging.error("FAIL - Unable to find crash filename in stderr.")
339342
print_stderrdata(stderrdata)
340343
cleanup_and_exit(4)
341344

345+
minidump = matches[-1]
342346
# Check the minidump file exists on disk.
343-
minidump = m.group(1)
344347
if not os.path.exists(minidump):
345348
logging.error(
346349
"FAIL - Minidump file '{0}' does not exist.".format(minidump))

utilities/breakpad.cc

Lines changed: 100 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,14 @@
1313
#include "terminate_handler.h"
1414

1515
#include <logger/logger.h>
16+
#include <logger/logger_config.h>
1617

1718
#include <platform/backtrace.h>
1819
#include <platform/dirutils.h>
1920

21+
#include <sstream>
22+
#include <string>
23+
2024
#if defined(WIN32) && defined(HAVE_BREAKPAD)
2125

2226
/*
@@ -69,6 +73,9 @@ using namespace google_breakpad;
6973
// breakpad handler
7074
static std::unique_ptr<ExceptionHandler> handler;
7175

76+
// A path to use for writing data about a crash.
77+
static std::string crashLogPath;
78+
7279
#if defined(WIN32) && defined(HAVE_BREAKPAD)
7380
// These methods are called from breakpad when creating
7481
// the dump. They're inside the #ifdef block to avoid
@@ -93,27 +100,29 @@ static void dump_stack() {
93100
// no timestamp (cannot generate timestamps from a signal handler)
94101
// The write macros are swallowing the return value, if it's not success there's
95102
// not much we can do.
96-
#define WRITE_MSG(MSG) \
97-
do { \
98-
auto rv = write(STDERR_FILENO, MSG, sizeof(MSG) - 1); \
99-
(void)rv; \
103+
#define WRITE_MSG(FD, MSG) \
104+
do { \
105+
auto rv = write(FD, MSG, sizeof(MSG) - 1); \
106+
(void)rv; \
100107
} while (0)
101108

102-
#define WRITE_CSTR(CSTR) \
103-
do { \
104-
auto rv = write(STDERR_FILENO, CSTR, strlen(CSTR)); \
105-
(void)rv; \
109+
#define WRITE_CSTR(FD, CSTR) \
110+
do { \
111+
auto rv = write(FD, CSTR, strlen(CSTR)); \
112+
(void)rv; \
106113
} while (0)
107114

108115
static void write_to_logger(void* ctx, const char* frame) {
109-
WRITE_MSG(" ");
110-
WRITE_CSTR(frame);
111-
WRITE_MSG("\n");
116+
int fd = *reinterpret_cast<int*>(ctx);
117+
WRITE_MSG(fd, " ");
118+
WRITE_CSTR(fd, frame);
119+
WRITE_MSG(fd, "\n");
112120
}
113121

114-
static void dump_stack() {
115-
WRITE_MSG("Stack backtrace of crashed thread:\n");
116-
print_backtrace(write_to_logger, nullptr);
122+
/// @param fd The file descriptor to write to
123+
static void dump_stack(int fd) {
124+
WRITE_MSG(fd, "Stack backtrace of crashed thread:\n");
125+
print_backtrace(write_to_logger, &fd);
117126
}
118127
#endif
119128

@@ -144,19 +153,50 @@ static bool dumpCallback(const wchar_t* dump_path,
144153
}
145154
#elif defined(linux) && defined(HAVE_BREAKPAD)
146155

147-
#define MSG1p1 "CRITICAL Breakpad caught a crash (Couchbase version "
156+
#define MSG_CRITICAL "CRITICAL "
157+
#define MSG1p1 "Breakpad caught a crash (Couchbase version "
148158
#define MSG1p2 "). Writing crash dump to "
149159
#define CAUGHT_CRASH_MSG MSG1p1 PRODUCT_VERSION MSG1p2
160+
#define CRITICAL_CAUGHT_CRASH_MSG MSG_CRITICAL CAUGHT_CRASH_MSG
161+
#define FAILED_OPEN MSG_CRITICAL "dumpCallback failed to open crashLogPath:"
150162

151163
static bool dumpCallback(const MinidumpDescriptor& descriptor,
152164
void* context,
153165
bool succeeded) {
154-
WRITE_MSG(CAUGHT_CRASH_MSG);
155-
WRITE_CSTR(descriptor.path());
156-
WRITE_MSG(" before terminating.\n");
157-
dump_stack();
166+
auto writeFn = [&descriptor](int fd) {
167+
if (fd == STDERR_FILENO) {
168+
WRITE_MSG(fd, CRITICAL_CAUGHT_CRASH_MSG);
169+
} else {
170+
WRITE_MSG(fd, CAUGHT_CRASH_MSG);
171+
}
172+
WRITE_CSTR(fd, descriptor.path());
173+
WRITE_MSG(fd, " before terminating.\n");
174+
dump_stack(fd);
175+
};
176+
177+
writeFn(STDERR_FILENO);
178+
179+
int logFd =
180+
open(crashLogPath.c_str(), O_CREAT | O_TRUNC | O_WRONLY, S_IRUSR);
181+
if (logFd < 0) {
182+
WRITE_MSG(STDERR_FILENO, FAILED_OPEN);
183+
WRITE_CSTR(STDERR_FILENO, crashLogPath.c_str());
184+
WRITE_MSG(STDERR_FILENO, "\n");
185+
} else {
186+
writeFn(logFd);
187+
auto rv = close(logFd);
188+
(void)rv; // If !success is ignored
189+
}
190+
158191
return succeeded;
159192
}
193+
194+
#undef MSG_CRITICAL
195+
#undef MSG1p1
196+
#undef MSG1p2
197+
#undef CAUGHT_CRASH_MSG
198+
#undef CRITICAL_CAUGHT_CRASH_MSG
199+
#undef FAILED_OPEN
160200
#endif
161201

162202
void create_handler(const std::string& minidump_dir) {
@@ -186,15 +226,52 @@ void create_handler(const std::string& minidump_dir) {
186226
/*install_handler*/ true,
187227
/*server_fd*/ -1));
188228
#else
189-
// Not supported on this plaform
229+
// Not supported on this platform
190230
#endif
191231
}
192232

193-
void cb::breakpad::initialize(const cb::breakpad::Settings& settings) {
233+
void logCrashData() {
234+
if (cb::io::isFile(crashLogPath)) {
235+
#if defined(WIN32)
236+
// I'm not reading this.
237+
LOG_CRITICAL_RAW("Unexpected crash file found on WIN32");
238+
#elif defined(linux)
239+
LOG_CRITICAL_RAW("Detected previous crash");
240+
241+
// OK to get a truncated input, 8K is plenty to capture most dumps and
242+
// get the CRITICAL messages into the log.
243+
const size_t fileReadLimit = 8192;
244+
std::string data = cb::io::loadFile(crashLogPath, {}, fileReadLimit);
245+
auto ss = std::stringstream{data};
246+
const int lineLimit = 70;
247+
int lineCount = 0;
248+
for (std::string line; std::getline(ss, line) && lineCount < lineLimit;
249+
lineCount++) {
250+
if (!line.empty()) {
251+
LOG_CRITICAL("{}", line);
252+
}
253+
}
254+
if (lineCount == lineLimit) {
255+
LOG_INFO("logCrashData reached line count limit {}", lineCount);
256+
}
257+
#endif
258+
if (remove(crashLogPath.c_str()) != 0) {
259+
LOG_CRITICAL("Failed to remove crashLogPath:{} errno:{}",
260+
crashLogPath,
261+
errno);
262+
}
263+
}
264+
}
265+
266+
void cb::breakpad::initialize(const cb::breakpad::Settings& settings,
267+
const cb::logger::Config& logConfig) {
194268
// We cannot actually change any of breakpad's settings once created, only
195269
// remove it and re-create with new settings.
196270
destroy();
197271

272+
bool checkForCrash = crashLogPath.empty();
273+
crashLogPath = logConfig.filename + ".breakpad.crash.txt";
274+
198275
if (settings.enabled) {
199276
create_handler(settings.minidump_dir);
200277
}
@@ -212,27 +289,9 @@ void cb::breakpad::initialize(const cb::breakpad::Settings& settings) {
212289
set_terminate_handler_print_backtrace(true);
213290
LOG_INFO_RAW("Breakpad disabled");
214291
}
215-
}
216-
217-
void cb::breakpad::initialize(const std::string& directory) {
218-
// We cannot actually change any of breakpad's settings once created, only
219-
// remove it and re-create with new settings.
220-
destroy();
221-
222-
if (directory.empty()) {
223-
// No directory provided
224-
LOG_INFO_RAW("Breakpad disabled");
225-
return;
226-
}
227-
228-
create_handler(directory);
229292

230-
if (handler) {
231-
// Turn off the terminate handler's backtrace - otherwise we
232-
// just print it twice.
233-
set_terminate_handler_print_backtrace(false);
234-
} else {
235-
// do we want to notify the user that we don't have access?
293+
if (checkForCrash) {
294+
logCrashData();
236295
}
237296
}
238297

utilities/breakpad.h

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,22 +12,19 @@
1212

1313
#include "breakpad_settings.h"
1414

15+
namespace cb::logger {
16+
struct Config;
17+
}
18+
1519
namespace cb::breakpad {
1620
/**
1721
* Initialize breakpad based on the specified settings struct.
1822
*
1923
* The function may be called multiple times and allow for reconfiguration
2024
* of the breakpad settings.
2125
*/
22-
void initialize(const cb::breakpad::Settings& settings);
23-
24-
/**
25-
* Initialize breakpad from use of a command line program.
26-
*
27-
* @param directory specifies the dump directory to use (an empty directory
28-
* is accepted and ignored).
29-
*/
30-
void initialize(const std::string& dumpdir);
26+
void initialize(const cb::breakpad::Settings& settings,
27+
const cb::logger::Config& logConfig);
3128

3229
/**
3330
* Cleaning up when breakpad no longer needed

0 commit comments

Comments
 (0)