Skip to content

Commit 110840f

Browse files
authored
ess/hnp: add support for forwarding additional signals (#2712)
* ess/hnp: add support for forwarding additional signals This commit adds support to the hnp ess module to forward additional signals beyond the default SIGUSR1, SIGUSR2, SIGSTP, and SIGCONT. Signed-off-by: Nathan Hjelm <[email protected]> * Generalize this a bit to allow a broader range of signals to be forwarded. Turns out that SIGURG is now a "standard" signal, though the value differs across systems. So setup to forward it (and some friends) if they are defined. Allow users to provide the signal name (instead of the integer value) as the value of even the more common signals does vary across systems. Don't limit the number that can be supported. Signed-off-by: Ralph Castain <[email protected]> * ess/hnp: fix some bugs in the signal forwarding code This commit fixes two bugs: - signals_set needs to be set even if no signals are being forwarded. If it is not set we will SEGV in libevent if ess_hnp_forward_signals == none. - SIGTERM and SIGHUP are handled with a different type of handler. Do not allow the user to specify these to be forwarded. Signed-off-by: Nathan Hjelm <[email protected]> * We are sure to get "dinged" if error messages aren't nicely output via show_help, so do so here Signed-off-by: Ralph Castain <[email protected]>
1 parent 91c34c8 commit 110840f

File tree

5 files changed

+267
-42
lines changed

5 files changed

+267
-42
lines changed

orte/mca/ess/hnp/Makefile.am

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,18 @@
1010
# Copyright (c) 2004-2005 The Regents of the University of California.
1111
# All rights reserved.
1212
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
13+
# Copyright (c) 2017 Los Alamos National Security, LLC. All rights
14+
# reseved.
15+
# Copyright (c) 2017 Intel, Inc. All rights reserved.
1316
# $COPYRIGHT$
1417
#
1518
# Additional copyrights may follow
1619
#
1720
# $HEADER$
1821
#
1922

23+
dist_ortedata_DATA = help-ess-hnp.txt
24+
2025
sources = \
2126
ess_hnp.h \
2227
ess_hnp_component.c \

orte/mca/ess/hnp/ess_hnp.h

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
12
/*
23
* Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
34
* University Research and Technology
@@ -9,6 +10,9 @@
910
* University of Stuttgart. All rights reserved.
1011
* Copyright (c) 2004-2005 The Regents of the University of California.
1112
* All rights reserved.
13+
* Copyright (c) 2017 Los Alamos National Security, LLC. All rights
14+
* reserved.
15+
* Copyright (c) 2017 Intel, Inc. All rights reserved.
1216
* $COPYRIGHT$
1317
*
1418
* Additional copyrights may follow
@@ -24,12 +28,19 @@ BEGIN_C_DECLS
2428
/*
2529
* Module open / close
2630
*/
27-
int orte_ess_hnp_component_open(void);
28-
int orte_ess_hnp_component_close(void);
29-
int orte_ess_hnp_component_query(mca_base_module_t **module, int *priority);
31+
typedef struct {
32+
opal_list_item_t super;
33+
char *signame;
34+
int signal;
35+
} ess_hnp_signal_t;
36+
OBJ_CLASS_DECLARATION(ess_hnp_signal_t);
3037

38+
typedef struct {
39+
orte_ess_base_component_t base;
40+
opal_list_t signals;
41+
} orte_ess_hnp_component_t;
3142

32-
ORTE_MODULE_DECLSPEC extern orte_ess_base_component_t mca_ess_hnp_component;
43+
ORTE_MODULE_DECLSPEC extern orte_ess_hnp_component_t mca_ess_hnp_component;
3344

3445
END_C_DECLS
3546

orte/mca/ess/hnp/ess_hnp_component.c

Lines changed: 193 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,9 @@
1010
* University of Stuttgart. All rights reserved.
1111
* Copyright (c) 2004-2005 The Regents of the University of California.
1212
* All rights reserved.
13-
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
13+
* Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights
1414
* reserved.
15+
* Copyright (c) 2017 Intel, Inc. All rights reserved.
1516
* $COPYRIGHT$
1617
*
1718
* Additional copyrights may follow
@@ -29,45 +30,200 @@
2930
#include "orte/constants.h"
3031

3132
#include "orte/util/proc_info.h"
33+
#include "orte/util/show_help.h"
3234

3335
#include "orte/mca/ess/ess.h"
3436
#include "orte/mca/ess/hnp/ess_hnp.h"
37+
#include "orte/runtime/orte_globals.h"
3538

3639
extern orte_ess_base_module_t orte_ess_hnp_module;
40+
static int hnp_component_register (void);
41+
static int hnp_component_open(void);
42+
static int hnp_component_close(void);
43+
static int hnp_component_query(mca_base_module_t **module, int *priority);
44+
45+
struct known_signal {
46+
/** signal number */
47+
int signal;
48+
/** signal name */
49+
char *signame;
50+
/** can this signal be forwarded */
51+
bool can_forward;
52+
};
53+
54+
static struct known_signal known_signals[] = {
55+
{SIGTERM, "SIGTERM", false},
56+
{SIGHUP, "SIGHUP", false},
57+
{SIGINT, "SIGINT", false},
58+
{SIGKILL, "SIGKILL", false},
59+
#ifdef SIGSYS
60+
{SIGSYS, "SIGSYS", true},
61+
#endif
62+
#ifdef SIGXCPU
63+
{SIGXCPU, "SIGXCPU", true},
64+
#endif
65+
{SIGXFSZ, "SIGXFSZ", true},
66+
#ifdef SIGVTALRM
67+
{SIGVTALRM, "SIGVTALRM", true},
68+
#endif
69+
#ifdef SIGPROF
70+
{SIGPROF, "SIGPROF", true},
71+
#endif
72+
#ifdef SIGINFO
73+
{SIGINFO, "SIGINFO", true},
74+
#endif
75+
#ifdef SIGPWR
76+
{SIGPWR, "SIGPWR", true},
77+
#endif
78+
{0, NULL},
79+
};
3780

3881
/*
3982
* Instantiate the public struct with all of our public information
4083
* and pointers to our public functions in it
4184
*/
42-
orte_ess_base_component_t mca_ess_hnp_component = {
43-
.base_version = {
44-
ORTE_ESS_BASE_VERSION_3_0_0,
45-
46-
/* Component name and version */
47-
.mca_component_name = "hnp",
48-
MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION,
49-
ORTE_RELEASE_VERSION),
50-
51-
/* Component open and close functions */
52-
.mca_open_component = orte_ess_hnp_component_open,
53-
.mca_close_component = orte_ess_hnp_component_close,
54-
.mca_query_component = orte_ess_hnp_component_query,
55-
},
56-
.base_data = {
57-
/* The component is checkpoint ready */
58-
MCA_BASE_METADATA_PARAM_CHECKPOINT
59-
},
85+
orte_ess_hnp_component_t mca_ess_hnp_component = {
86+
.base = {
87+
.base_version = {
88+
ORTE_ESS_BASE_VERSION_3_0_0,
89+
90+
/* Component name and version */
91+
.mca_component_name = "hnp",
92+
MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION,
93+
ORTE_RELEASE_VERSION),
94+
95+
/* Component open and close functions */
96+
.mca_open_component = hnp_component_open,
97+
.mca_close_component = hnp_component_close,
98+
.mca_query_component = hnp_component_query,
99+
.mca_register_component_params = hnp_component_register,
100+
},
101+
.base_data = {
102+
/* The component is checkpoint ready */
103+
MCA_BASE_METADATA_PARAM_CHECKPOINT
104+
}
105+
}
60106
};
61107

108+
static char *additional_signals;
62109

63-
int
64-
orte_ess_hnp_component_open(void)
110+
static int hnp_component_register (void)
65111
{
112+
additional_signals = NULL;
113+
(void) mca_base_component_var_register (&mca_ess_hnp_component.base.base_version,
114+
"forward_signals", "Comma-delimited list "
115+
"of additional signals (names or integers) to forward to "
116+
"application processes [\"none\" => forward nothing]", MCA_BASE_VAR_TYPE_STRING,
117+
NULL, 0, 0, OPAL_INFO_LVL_4, MCA_BASE_VAR_SCOPE_READONLY,
118+
&additional_signals);
119+
120+
return ORTE_SUCCESS;
121+
}
122+
123+
#define ESS_ADDSIGNAL(x, s) \
124+
do { \
125+
ess_hnp_signal_t *_sig; \
126+
_sig = OBJ_NEW(ess_hnp_signal_t); \
127+
_sig->signal = (x); \
128+
_sig->signame = strdup((s)); \
129+
opal_list_append(&mca_ess_hnp_component.signals, &_sig->super); \
130+
} while(0)
131+
132+
static int hnp_component_open(void)
133+
{
134+
int i, sval;
135+
char **signals, *tmp;
136+
ess_hnp_signal_t *sig;
137+
bool ignore, found;
138+
139+
OBJ_CONSTRUCT(&mca_ess_hnp_component.signals, opal_list_t);
140+
141+
/* we know that some signals are (nearly) always defined, regardless
142+
* of environment, so add them here */
143+
ESS_ADDSIGNAL(SIGTSTP, "SIGTSTP");
144+
ESS_ADDSIGNAL(SIGUSR1, "SIGUSR1");
145+
ESS_ADDSIGNAL(SIGUSR2, "SIGUSR2");
146+
ESS_ADDSIGNAL(SIGABRT, "SIGABRT");
147+
ESS_ADDSIGNAL(SIGALRM, "SIGALRM");
148+
ESS_ADDSIGNAL(SIGCONT, "SIGCONT");
149+
#ifdef SIGURG
150+
ESS_ADDSIGNAL(SIGURG, "SIGURG");
151+
#endif
152+
153+
/* see if they asked for anything beyond those - note that they may
154+
* have asked for some we already cover, and so we ignore any duplicates */
155+
if (NULL != additional_signals) {
156+
/* if they told us "none", then dump the list */
157+
if (0 == strcmp(additional_signals, "none")) {
158+
OPAL_LIST_DESTRUCT(&mca_ess_hnp_component.signals);
159+
/* need to reconstruct it for when we close */
160+
OBJ_CONSTRUCT(&mca_ess_hnp_component.signals, opal_list_t);
161+
return ORTE_SUCCESS;
162+
}
163+
signals = opal_argv_split(additional_signals, ',');
164+
for (i=0; NULL != signals[i]; i++) {
165+
sval = 0;
166+
if (0 != strncmp(signals[i], "SIG", 3)) {
167+
/* treat it like a number */
168+
errno = 0;
169+
sval = strtoul(signals[i], &tmp, 10);
170+
if (0 != errno || '\0' != *tmp) {
171+
orte_show_help("help-ess-hnp.txt", "ess-hnp:unknown-signal",
172+
true, signals[i], additional_signals);
173+
opal_argv_free(signals);
174+
return OPAL_ERR_SILENT;
175+
}
176+
}
177+
178+
/* see if it is one we already covered */
179+
ignore = false;
180+
OPAL_LIST_FOREACH(sig, &mca_ess_hnp_component.signals, ess_hnp_signal_t) {
181+
if (0 == strcasecmp(signals[i], sig->signame) || sval == sig->signal) {
182+
/* got it - we will ignore */
183+
ignore = true;
184+
break;
185+
}
186+
}
187+
188+
if (ignore) {
189+
continue;
190+
}
191+
192+
/* see if they gave us a signal name */
193+
found = false;
194+
for (int j = 0 ; known_signals[j].signame ; ++j) {
195+
if (0 == strcasecmp (signals[i], known_signals[j].signame) || sval == known_signals[j].signal) {
196+
if (!known_signals[j].can_forward) {
197+
orte_show_help("help-ess-hnp.txt", "ess-hnp:cannot-forward",
198+
true, known_signals[j].signame, additional_signals);
199+
opal_argv_free(signals);
200+
return OPAL_ERR_SILENT;
201+
}
202+
found = true;
203+
ESS_ADDSIGNAL(known_signals[j].signal, known_signals[j].signame);
204+
break;
205+
}
206+
}
207+
208+
if (!found) {
209+
if (0 == strncmp(signals[i], "SIG", 3)) {
210+
orte_show_help("help-ess-hnp.txt", "ess-hnp:unknown-signal",
211+
true, signals[i], additional_signals);
212+
opal_argv_free(signals);
213+
return OPAL_ERR_SILENT;
214+
}
215+
216+
ESS_ADDSIGNAL(sval, signals[i]);
217+
}
218+
}
219+
opal_argv_free (signals);
220+
}
221+
66222
return ORTE_SUCCESS;
67223
}
68224

69225

70-
int orte_ess_hnp_component_query(mca_base_module_t **module, int *priority)
226+
static int hnp_component_query(mca_base_module_t **module, int *priority)
71227
{
72228

73229
/* we are the hnp module - we need to be selected
@@ -86,9 +242,22 @@ int orte_ess_hnp_component_query(mca_base_module_t **module, int *priority)
86242
}
87243

88244

89-
int
90-
orte_ess_hnp_component_close(void)
245+
static int hnp_component_close(void)
91246
{
92247
return ORTE_SUCCESS;
93248
}
94249

250+
/* instantiate the class */
251+
static void scon(ess_hnp_signal_t *t)
252+
{
253+
t->signame = NULL;
254+
}
255+
static void sdes(ess_hnp_signal_t *t)
256+
{
257+
if (NULL != t->signame) {
258+
free(t->signame);
259+
}
260+
}
261+
OBJ_CLASS_INSTANCE(ess_hnp_signal_t,
262+
opal_list_item_t,
263+
scon, sdes);

0 commit comments

Comments
 (0)