Skip to content

Commit df83811

Browse files
authored
Show debugger backtraces when C++ tests crash in Github CI (#2609)
1 parent c0ab3cd commit df83811

File tree

4 files changed

+50
-33
lines changed

4 files changed

+50
-33
lines changed

.github/workflows/main.yml

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,10 @@ jobs:
4040
ARCH_AND_OS: x86_64-unknown-linux-musl
4141
SCCACHE_AZURE_CONNECTION_STRING: ${{ secrets.AZURE_CI_STORAGE_CONNECTION_STRING }}
4242

43-
- name: Setup library dependencies
43+
- name: Setup dependencies
4444
run: >
4545
sudo apt update &&
46-
sudo apt install -y libcurl4-openssl-dev
46+
sudo apt install -y gdb libcurl4-openssl-dev
4747
4848
- name: Setup misc
4949
run: |
@@ -55,17 +55,28 @@ jobs:
5555
run: bundle exec drake -j4 test:cxx:build
5656

5757
- name: Run C++ tests
58+
# The AbortHandler doesn't work so well on Github's Linux runners:
59+
# - Triggering the original signal handler makes the runner freeze, so we force
60+
# terminate the process.
61+
# - force killing the process may not work, so we enforce a low timeout to prevent
62+
# the test from haging for a long time. We enforce this primarily using the 'timeout'
63+
# command from coreutils instead of Github Actions' timeout, because the latter
64+
# could put the entire job in the "skipped" state and refuse to show logs.
65+
timeout-minutes: 11
5866
run: >
5967
sudo env
6068
PATH="$PATH"
6169
GEM_PATH="$GEM_PATH"
70+
PASSENGER_DUMP_CRASH_WATCH_TO_STDERR=true
71+
PASSENGER_FORCE_TERMINATE_ON_ABORT=true
72+
/usr/bin/timeout --signal KILL --verbose 600
6273
../buildout/test/cxx/main
6374
working-directory: test
6475

6576
- name: Archive logs
6677
uses: actions/upload-artifact@v4
6778
with:
68-
name: cxx-test-logs-macos
79+
name: cxx-test-logs-linux
6980
path: 'buildout/testlogs/*'
7081
if-no-files-found: ignore
7182
if: '!cancelled()'
@@ -116,18 +127,17 @@ jobs:
116127

117128
- name: Run C++ tests
118129
# The AbortHandler doesn't work so well on Github's macOS runners:
119-
# - crash-watch invocation freezes the runner, so we disable this.
120130
# - resuming the parent process after SIGSTOP doesn't work, so we force kill the parent.
121131
# - force killing the parent may not work, so we enforce a low timeout to prevent
122132
# the test from haging for a long time. We enforce this primarily using the 'timeout'
123-
# command from coreutils instead of Github Actions' timeout, because the latter
133+
# command from coreutils instead of Github Actions' timeout, because the latter
124134
# could put the entire job in the "skipped" state and refuse to show logs.
125135
timeout-minutes: 11
126136
run: >
127137
sudo env
128138
PATH="$PATH"
129139
GEM_PATH="$GEM_PATH"
130-
PASSENGER_DUMP_WITH_CRASH_WATCH=false
140+
PASSENGER_DUMP_CRASH_WATCH_TO_STDERR=true
131141
PASSENGER_FORCE_TERMINATE_ON_ABORT=true
132142
gtimeout --signal KILL --verbose 600
133143
../buildout/test/cxx/main

src/agent/Shared/Fundamentals/AbortHandler.cpp

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -550,10 +550,10 @@ dumpFileDescriptorInfo(AbortHandlerWorkingState &state) {
550550
}
551551

552552
static void
553-
dumpWithCrashWatch(AbortHandlerWorkingState &state) {
553+
dumpWithCrashWatch(AbortHandlerWorkingState &state, bool toStderr) {
554554
int fd = -1;
555555

556-
if (state.crashLogDirFd != -1) {
556+
if (state.crashLogDirFd != -1 && !toStderr) {
557557
fd = openat(state.crashLogDirFd, "backtrace.log", O_WRONLY | O_CREAT | O_TRUNC, 0600);
558558
if (fd != -1) {
559559
printCrashLogFileCreated(state, "backtrace.log");
@@ -594,7 +594,25 @@ dumpWithCrashWatch(AbortHandlerWorkingState &state) {
594594
write_nowarn(STDERR_FILENO, state.messageBuf, pos - state.messageBuf);
595595

596596
} else {
597-
waitpid(child, NULL, 0);
597+
int status = -1;
598+
int ret = waitpid(child, &status, 0);
599+
int e = errno;
600+
if (ret == -1 || status != 0) {
601+
pos = state.messageBuf;
602+
pos = ASSU::appendData(pos, end, "ERROR running 'crash-watch' (");
603+
if (ret == -1) {
604+
pos = ASSU::appendData(pos, end, "waitpid() failed, errno=");
605+
pos = ASSU::appendInteger<int, 10>(pos, end, e);
606+
} else if (WIFSIGNALED(status)) {
607+
pos = ASSU::appendData(pos, end, "exited with signal ");
608+
pos = ASSU::appendInteger<int, 10>(pos, end, WTERMSIG(status));
609+
} else {
610+
pos = ASSU::appendData(pos, end, "exit status ");
611+
pos = ASSU::appendInteger<int, 10>(pos, end, WEXITSTATUS(status));
612+
}
613+
pos = ASSU::appendData(pos, end, ")\n");
614+
write_nowarn(STDERR_FILENO, state.messageBuf, pos - state.messageBuf);
615+
}
598616
}
599617

600618
if (fd != -1) {
@@ -852,7 +870,7 @@ dumpDiagnostics(AbortHandlerWorkingState &state) {
852870
pos = ASSU::appendData(pos, end, " ] Dumping a backtrace with crash-watch...\n");
853871
#endif
854872
write_nowarn(STDERR_FILENO, state.messageBuf, pos - state.messageBuf);
855-
dumpWithCrashWatch(state);
873+
dumpWithCrashWatch(state, ctx->config->dumpCrashWatchToStderr);
856874
} else {
857875
write_nowarn(STDERR_FILENO, "\n", 1);
858876
}
@@ -969,7 +987,7 @@ forkAndRedirectToTeeAndMainLogFile(const char *crashLogDir) {
969987
execlp("/usr/bin/cat", "cat", (char *) 0);
970988
ASSU::printError("ERROR: cannot execute 'tee' or 'cat'; crash log will be lost!\n");
971989
_exit(1);
972-
return false;
990+
return false; // Unreachable
973991
} else if (pid == -1) {
974992
ASSU::printError("ERROR: cannot fork a process for executing 'tee'\n");
975993
return false;

src/agent/Shared/Fundamentals/AbortHandler.h

Lines changed: 10 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,6 @@
2626
#ifndef _PASSENGER_AGENT_FUNDAMENTALS_ABORT_HANDLER_H_
2727
#define _PASSENGER_AGENT_FUNDAMENTALS_ABORT_HANDLER_H_
2828

29-
#include <cstddef>
30-
3129
namespace Passenger {
3230
class ResourceLocator;
3331
}
@@ -38,7 +36,7 @@ namespace Fundamentals {
3836

3937

4038
struct AbortHandlerConfig {
41-
static const unsigned int MAX_DIAGNOSTICS_DUMPERS = 5;
39+
static constexpr unsigned int MAX_DIAGNOSTICS_DUMPERS = 5;
4240
typedef void (*DiagnosticsDumperFunc)(void *userData);
4341

4442
struct DiagnosticsDumper {
@@ -56,26 +54,16 @@ struct AbortHandlerConfig {
5654
};
5755

5856

59-
char *ruby;
60-
char **origArgv;
61-
unsigned int randomSeed;
62-
bool dumpWithCrashWatch;
63-
bool beep;
64-
bool stopProcess;
65-
bool forceTerminateProcess;
66-
ResourceLocator *resourceLocator;
57+
char *ruby = nullptr;
58+
char **origArgv = nullptr;
59+
unsigned int randomSeed = 0;
60+
bool dumpWithCrashWatch = false;
61+
bool dumpCrashWatchToStderr = false;
62+
bool beep = false;
63+
bool stopProcess = false;
64+
bool forceTerminateProcess = false;
65+
ResourceLocator *resourceLocator = nullptr;
6766
DiagnosticsDumper diagnosticsDumpers[MAX_DIAGNOSTICS_DUMPERS];
68-
69-
AbortHandlerConfig()
70-
: ruby(NULL),
71-
origArgv(NULL),
72-
randomSeed(0),
73-
dumpWithCrashWatch(false),
74-
beep(false),
75-
stopProcess(false),
76-
forceTerminateProcess(false),
77-
resourceLocator(NULL)
78-
{ }
7967
};
8068

8169
void installAbortHandler(const AbortHandlerConfig *config);

src/agent/Shared/Fundamentals/Initialization.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -465,6 +465,7 @@ maybeInitializeAbortHandler() {
465465
config->origArgv = context->origArgv;
466466
config->randomSeed = context->randomSeed;
467467
config->dumpWithCrashWatch = getEnvBool("PASSENGER_DUMP_WITH_CRASH_WATCH", true);
468+
config->dumpCrashWatchToStderr = getEnvBool("PASSENGER_DUMP_CRASH_WATCH_TO_STDERR", false);
468469
config->beep = getEnvBool("PASSENGER_BEEP_ON_ABORT");
469470
config->stopProcess = getEnvBool("PASSENGER_STOP_ON_ABORT");
470471
config->forceTerminateProcess = getEnvBool("PASSENGER_FORCE_TERMINATE_ON_ABORT");

0 commit comments

Comments
 (0)