@@ -88,12 +88,11 @@ static bool signals_set=false;
88
88
static opal_event_t term_handler ;
89
89
static opal_event_t int_handler ;
90
90
static opal_event_t epipe_handler ;
91
- static opal_event_t sigusr1_handler ;
92
- static opal_event_t sigusr2_handler ;
93
91
static char * log_path = NULL ;
94
92
static void shutdown_signal (int fd , short flags , void * arg );
95
- static void signal_callback (int fd , short flags , void * arg );
96
93
static void epipe_signal_callback (int fd , short flags , void * arg );
94
+ static void signal_forward_callback (int fd , short event , void * arg );
95
+ static opal_event_t * forward_signals_events = NULL ;
97
96
98
97
static void setup_sighandler (int signal , opal_event_t * ev ,
99
98
opal_event_cbfunc_t cbfunc )
@@ -119,6 +118,8 @@ int orte_ess_base_orted_setup(void)
119
118
unsigned i , j ;
120
119
orte_topology_t * t ;
121
120
opal_list_t transports ;
121
+ orte_ess_base_signal_t * sig ;
122
+ int idx ;
122
123
123
124
/* my name is set, xfer it to the OPAL layer */
124
125
orte_process_info .super .proc_name = * (opal_process_name_t * )ORTE_PROC_MY_NAME ;
@@ -128,18 +129,31 @@ int orte_ess_base_orted_setup(void)
128
129
opal_proc_local_set (& orte_process_info .super );
129
130
130
131
plm_in_use = false;
132
+
131
133
/* setup callback for SIGPIPE */
132
134
setup_sighandler (SIGPIPE , & epipe_handler , epipe_signal_callback );
133
135
/* Set signal handlers to catch kill signals so we can properly clean up
134
136
* after ourselves.
135
137
*/
136
138
setup_sighandler (SIGTERM , & term_handler , shutdown_signal );
137
139
setup_sighandler (SIGINT , & int_handler , shutdown_signal );
138
- /** setup callbacks for signals we should ignore */
139
- setup_sighandler (SIGUSR1 , & sigusr1_handler , signal_callback );
140
- setup_sighandler (SIGUSR2 , & sigusr2_handler , signal_callback );
140
+ /** setup callbacks for signals we should forward */
141
+ if (0 < (idx = opal_list_get_size (& orte_ess_base_signals ))) {
142
+ forward_signals_events = (opal_event_t * )malloc (sizeof (opal_event_t ) * idx );
143
+ if (NULL == forward_signals_events ) {
144
+ ret = ORTE_ERR_OUT_OF_RESOURCE ;
145
+ error = "unable to malloc" ;
146
+ goto error ;
147
+ }
148
+ idx = 0 ;
149
+ OPAL_LIST_FOREACH (sig , & orte_ess_base_signals , orte_ess_base_signal_t ) {
150
+ setup_sighandler (sig -> signal , forward_signals_events + idx , signal_forward_callback );
151
+ ++ idx ;
152
+ }
153
+ }
141
154
signals_set = true;
142
155
156
+
143
157
/* get the local topology */
144
158
if (NULL == opal_hwloc_topology ) {
145
159
if (OPAL_SUCCESS != (ret = opal_hwloc_base_get_topology ())) {
@@ -653,14 +667,24 @@ int orte_ess_base_orted_setup(void)
653
667
654
668
int orte_ess_base_orted_finalize (void )
655
669
{
670
+ orte_ess_base_signal_t * sig ;
671
+ unsigned int i ;
672
+
656
673
if (signals_set ) {
657
- /* Release all local signal handlers */
658
674
opal_event_del (& epipe_handler );
659
675
opal_event_del (& term_handler );
660
676
opal_event_del (& int_handler );
661
- opal_event_signal_del (& sigusr1_handler );
662
- opal_event_signal_del (& sigusr2_handler );
677
+ /** Remove the USR signal handlers */
678
+ i = 0 ;
679
+ OPAL_LIST_FOREACH (sig , & orte_ess_base_signals , orte_ess_base_signal_t ) {
680
+ opal_event_signal_del (forward_signals_events + i );
681
+ ++ i ;
682
+ }
683
+ free (forward_signals_events );
684
+ forward_signals_events = NULL ;
685
+ signals_set = false;
663
686
}
687
+
664
688
/* cleanup */
665
689
if (NULL != log_path ) {
666
690
unlink (log_path );
@@ -717,7 +741,51 @@ static void epipe_signal_callback(int fd, short flags, void *arg)
717
741
return ;
718
742
}
719
743
720
- static void signal_callback (int fd , short event , void * arg )
744
+ /* Pass user signals to the local application processes */
745
+ static void signal_forward_callback (int fd , short event , void * arg )
721
746
{
722
- /* just ignore these signals */
747
+ opal_event_t * signal = (opal_event_t * )arg ;
748
+ int32_t signum , rc ;
749
+ opal_buffer_t * cmd ;
750
+ orte_daemon_cmd_flag_t command = ORTE_DAEMON_SIGNAL_LOCAL_PROCS ;
751
+ orte_jobid_t job = ORTE_JOBID_WILDCARD ;
752
+
753
+ signum = OPAL_EVENT_SIGNAL (signal );
754
+ if (!orte_execute_quiet ){
755
+ fprintf (stderr , "%s: Forwarding signal %d to job\n" ,
756
+ orte_basename , signum );
757
+ }
758
+
759
+ cmd = OBJ_NEW (opal_buffer_t );
760
+
761
+ /* pack the command */
762
+ if (ORTE_SUCCESS != (rc = opal_dss .pack (cmd , & command , 1 , ORTE_DAEMON_CMD ))) {
763
+ ORTE_ERROR_LOG (rc );
764
+ OBJ_RELEASE (cmd );
765
+ return ;
766
+ }
767
+
768
+ /* pack the jobid */
769
+ if (ORTE_SUCCESS != (rc = opal_dss .pack (cmd , & job , 1 , ORTE_JOBID ))) {
770
+ ORTE_ERROR_LOG (rc );
771
+ OBJ_RELEASE (cmd );
772
+ return ;
773
+ }
774
+
775
+ /* pack the signal */
776
+ if (ORTE_SUCCESS != (rc = opal_dss .pack (cmd , & signum , 1 , OPAL_INT32 ))) {
777
+ ORTE_ERROR_LOG (rc );
778
+ OBJ_RELEASE (cmd );
779
+ return ;
780
+ }
781
+
782
+ /* send it to ourselves */
783
+ if (0 > (rc = orte_rml .send_buffer_nb (orte_mgmt_conduit ,
784
+ ORTE_PROC_MY_NAME , cmd ,
785
+ ORTE_RML_TAG_DAEMON ,
786
+ NULL , NULL ))) {
787
+ ORTE_ERROR_LOG (rc );
788
+ OBJ_RELEASE (cmd );
789
+ }
790
+
723
791
}
0 commit comments