Skip to content

Commit bf1e307

Browse files
charfordclaude
andcommitted
Fix SIGUSR1/SIGUSR2 handler clobbering breaking JVM processes
signal() overwrote SIGUSR1 and SIGUSR2 without saving previous handlers, causing JVM crashes (SIGSEGV in Monitor::wait) since the JVM uses these signals internally for GC safepoints and thread management. Replace signal() with sigaction() to save the old handlers, and chain to them in the stubs so other runtimes (JVM, etc.) can still process the signals correctly. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> Signed-off-by: Casey Harford <casey@caseyharford.com>
1 parent 54b0ca3 commit bf1e307

File tree

1 file changed

+20
-2
lines changed

1 file changed

+20
-2
lines changed

src/multiprocess/multiprocess_memory_limit.c

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <unistd.h>
1212
#include <time.h>
1313
#include <signal.h>
14+
#include <string.h>
1415

1516
#include <assert.h>
1617
#include <cuda.h>
@@ -73,12 +74,21 @@ void set_current_gpu_status(int status){
7374
}
7475
}
7576

77+
static struct sigaction old_sigusr1_sa;
78+
static struct sigaction old_sigusr2_sa;
79+
7680
void sig_restore_stub(int signo){
7781
set_current_gpu_status(1);
82+
// Chain to previous handler so JVM (and other runtimes) can process SIGUSR1
83+
if (old_sigusr1_sa.sa_handler != SIG_DFL && old_sigusr1_sa.sa_handler != SIG_IGN)
84+
old_sigusr1_sa.sa_handler(signo);
7885
}
7986

8087
void sig_swap_stub(int signo){
8188
set_current_gpu_status(2);
89+
// Chain to previous handler so JVM (and other runtimes) can process SIGUSR2
90+
if (old_sigusr2_sa.sa_handler != SIG_DFL && old_sigusr2_sa.sa_handler != SIG_IGN)
91+
old_sigusr2_sa.sa_handler(signo);
8292
}
8393

8494

@@ -689,8 +699,16 @@ void init_proc_slot_withlock() {
689699
if (proc_num >= SHARED_REGION_MAX_PROCESS_NUM) {
690700
exit_withlock(-1);
691701
}
692-
signal(SIGUSR2,sig_swap_stub);
693-
signal(SIGUSR1,sig_restore_stub);
702+
struct sigaction sa;
703+
memset(&sa, 0, sizeof(sa));
704+
sigemptyset(&sa.sa_mask);
705+
sa.sa_flags = SA_RESTART;
706+
707+
sa.sa_handler = sig_swap_stub;
708+
sigaction(SIGUSR2, &sa, &old_sigusr2_sa);
709+
710+
sa.sa_handler = sig_restore_stub;
711+
sigaction(SIGUSR1, &sa, &old_sigusr1_sa);
694712

695713
// If, by any means a pid of itself is found in region->process, then it is probably caused by crashloop
696714
// we need to reset it.

0 commit comments

Comments
 (0)