2525#ifdef HAVE_UNISTD_H
2626#include <unistd.h>
2727#endif
28+ #ifdef HAVE_SYS_TYPES_H
29+ #include <sys/types.h>
30+ #endif
31+ #ifdef HAVE_SYS_STAT_H
32+ #include <sys/stat.h>
33+ #endif
34+ #ifdef HAVE_SYS_FCNTL_H
35+ #include <fcntl.h>
36+ #endif
2837
2938#include <string.h>
3039#include <signal.h>
3544#include "opal/util/output.h"
3645#include "opal/util/show_help.h"
3746#include "opal/util/argv.h"
47+ #include "opal/util/proc.h"
3848#include "opal/runtime/opal_params.h"
3949
4050#ifndef _NSIG
4353
4454#define HOSTFORMAT "[%s:%05d] "
4555
56+ int opal_stacktrace_output_fileno = -1 ;
57+ static char * opal_stacktrace_output_filename_base = NULL ;
58+ static size_t opal_stacktrace_output_filename_max_len = 0 ;
4659static char stacktrace_hostname [OPAL_MAXHOSTNAMELEN ];
4760static char * unable_to_print_msg = "Unable to print stack trace!\n" ;
4861
62+ /*
63+ * Set the stacktrace filename:
64+ * stacktrace.PID
65+ * -or, if VPID is available-
66+ * stacktrace.VPID.PID
67+ */
68+ static void set_stacktrace_filename (void ) {
69+ opal_proc_t * my_proc = opal_proc_local_get ();
70+
71+ if ( NULL == my_proc ) {
72+ snprintf (opal_stacktrace_output_filename , opal_stacktrace_output_filename_max_len ,
73+ "%s.%lu" ,
74+ opal_stacktrace_output_filename_base , (unsigned long )getpid ());
75+ }
76+ else {
77+ snprintf (opal_stacktrace_output_filename , opal_stacktrace_output_filename_max_len ,
78+ "%s.%lu.%lu" ,
79+ opal_stacktrace_output_filename_base , (unsigned long )my_proc -> proc_name .vpid , (unsigned long )getpid ());
80+ }
81+
82+ return ;
83+ }
84+
4985/**
5086 * This function is being called as a signal-handler in response
5187 * to a user-specified signal (e.g. SIGFPE or SIGSEGV).
@@ -69,12 +105,37 @@ static void show_stackframe (int signo, siginfo_t * info, void * p)
69105 int ret ;
70106 char * si_code_str = "" ;
71107
108+ /* Do not print the stack trace */
109+ if ( 0 > opal_stacktrace_output_fileno && 0 == opal_stacktrace_output_filename_max_len ) {
110+ /* Raise the signal again, so we don't accidentally mask critical signals.
111+ * For critical signals, it is preferred that we call 'raise' instead of
112+ * 'exit' or 'abort' so that the return status is set properly for this
113+ * process.
114+ */
115+ signal (signo , SIG_DFL );
116+ raise (signo );
117+
118+ return ;
119+ }
120+
121+ /* Update the file name with the RANK, if available */
122+ if ( 0 < opal_stacktrace_output_filename_max_len ) {
123+ set_stacktrace_filename ();
124+ opal_stacktrace_output_fileno = open (opal_stacktrace_output_filename ,
125+ O_CREAT |O_WRONLY |O_TRUNC , S_IRUSR |S_IWUSR );
126+ if ( 0 > opal_stacktrace_output_fileno ) {
127+ opal_output (0 , "Error: Failed to open the stacktrace output file. Default: stderr\n\tFilename: %s\n\tErrno: %s" ,
128+ opal_stacktrace_output_filename , strerror (errno ));
129+ opal_stacktrace_output_fileno = fileno (stderr );
130+ }
131+ }
132+
72133 /* write out the footer information */
73134 memset (print_buffer , 0 , sizeof (print_buffer ));
74135 ret = snprintf (print_buffer , sizeof (print_buffer ),
75136 HOSTFORMAT "*** Process received signal ***\n" ,
76137 stacktrace_hostname , getpid ());
77- write (fileno ( stderr ) , print_buffer , ret );
138+ write (opal_stacktrace_output_fileno , print_buffer , ret );
78139
79140
80141 memset (print_buffer , 0 , sizeof (print_buffer ));
@@ -324,14 +385,14 @@ static void show_stackframe (int signo, siginfo_t * info, void * p)
324385 }
325386
326387 /* write out the signal information generated above */
327- write (fileno ( stderr ) , print_buffer , sizeof (print_buffer )- size );
388+ write (opal_stacktrace_output_fileno , print_buffer , sizeof (print_buffer )- size );
328389
329390 /* print out the stack trace */
330391 snprintf (print_buffer , sizeof (print_buffer ), HOSTFORMAT ,
331392 stacktrace_hostname , getpid ());
332- ret = opal_backtrace_print (stderr , print_buffer , 2 );
393+ ret = opal_backtrace_print (NULL , print_buffer , 2 );
333394 if (OPAL_SUCCESS != ret ) {
334- write (fileno ( stderr ) , unable_to_print_msg , strlen (unable_to_print_msg ));
395+ write (opal_stacktrace_output_fileno , unable_to_print_msg , strlen (unable_to_print_msg ));
335396 }
336397
337398 /* write out the footer information */
@@ -340,9 +401,15 @@ static void show_stackframe (int signo, siginfo_t * info, void * p)
340401 HOSTFORMAT "*** End of error message ***\n" ,
341402 stacktrace_hostname , getpid ());
342403 if (ret > 0 ) {
343- write (fileno ( stderr ) , print_buffer , ret );
404+ write (opal_stacktrace_output_fileno , print_buffer , ret );
344405 } else {
345- write (fileno (stderr ), unable_to_print_msg , strlen (unable_to_print_msg ));
406+ write (opal_stacktrace_output_fileno , unable_to_print_msg , strlen (unable_to_print_msg ));
407+ }
408+
409+ if ( fileno (stdout ) != opal_stacktrace_output_fileno &&
410+ fileno (stderr ) != opal_stacktrace_output_fileno ) {
411+ close (opal_stacktrace_output_fileno );
412+ opal_stacktrace_output_fileno = -1 ;
346413 }
347414
348415 /* Raise the signal again, so we don't accidentally mask critical signals.
@@ -373,7 +440,30 @@ void opal_stackframe_output(int stream)
373440 opal_output (stream , "%s" , traces [i ]);
374441 }
375442 } else {
376- opal_backtrace_print (stderr , NULL , 2 );
443+ /* Do not print the stack trace */
444+ if ( 0 > opal_stacktrace_output_fileno && 0 == opal_stacktrace_output_filename_max_len ) {
445+ return ;
446+ }
447+
448+ /* Update the file name with the RANK, if available */
449+ if ( 0 < opal_stacktrace_output_filename_max_len ) {
450+ set_stacktrace_filename ();
451+ opal_stacktrace_output_fileno = open (opal_stacktrace_output_filename ,
452+ O_CREAT |O_WRONLY |O_TRUNC , S_IRUSR |S_IWUSR );
453+ if ( 0 > opal_stacktrace_output_fileno ) {
454+ opal_output (0 , "Error: Failed to open the stacktrace output file. Default: stderr\n\tFilename: %s\n\tErrno: %s" ,
455+ opal_stacktrace_output_filename , strerror (errno ));
456+ opal_stacktrace_output_fileno = fileno (stderr );
457+ }
458+ }
459+
460+ opal_backtrace_print (NULL , NULL , 2 );
461+
462+ if ( fileno (stdout ) != opal_stacktrace_output_fileno &&
463+ fileno (stderr ) != opal_stacktrace_output_fileno ) {
464+ close (opal_stacktrace_output_fileno );
465+ opal_stacktrace_output_fileno = -1 ;
466+ }
377467 }
378468}
379469
@@ -444,6 +534,50 @@ int opal_util_register_stackhandlers (void)
444534 }
445535 }
446536
537+ /* Setup the output stream to use */
538+ if ( NULL == opal_stacktrace_output_filename ||
539+ 0 == strcasecmp (opal_stacktrace_output_filename , "none" ) ) {
540+ opal_stacktrace_output_fileno = -1 ;
541+ }
542+ else if ( 0 == strcasecmp (opal_stacktrace_output_filename , "stdout" ) ) {
543+ opal_stacktrace_output_fileno = fileno (stdout );
544+ }
545+ else if ( 0 == strcasecmp (opal_stacktrace_output_filename , "stderr" ) ) {
546+ opal_stacktrace_output_fileno = fileno (stdout );
547+ }
548+ else if ( 0 == strcasecmp (opal_stacktrace_output_filename , "file" ) ||
549+ 0 == strcasecmp (opal_stacktrace_output_filename , "file:" ) ) {
550+ opal_stacktrace_output_filename_base = strdup ("stacktrace" );
551+
552+ free (opal_stacktrace_output_filename );
553+ // Magic number: 8 = space for .PID and .RANK (allow 7 digits each)
554+ opal_stacktrace_output_filename_max_len = strlen ("stacktrace" ) + 8 + 8 ;
555+ opal_stacktrace_output_filename = (char * )malloc (sizeof (char ) * opal_stacktrace_output_filename_max_len );
556+ set_stacktrace_filename ();
557+ opal_stacktrace_output_fileno = -1 ;
558+ }
559+ else if ( 0 == strncasecmp (opal_stacktrace_output_filename , "file:" , 5 ) ) {
560+ char * filename_cpy = NULL ;
561+ next = strchr (opal_stacktrace_output_filename , ':' );
562+ next ++ ; // move past the ':' to the filename specified
563+
564+ opal_stacktrace_output_filename_base = strdup (next );
565+
566+ free (opal_stacktrace_output_filename );
567+ // Magic number: 8 = space for .PID and .RANK (allow 7 digits each)
568+ opal_stacktrace_output_filename_max_len = strlen (opal_stacktrace_output_filename_base ) + 8 + 8 ;
569+ opal_stacktrace_output_filename = (char * )malloc (sizeof (char ) * opal_stacktrace_output_filename_max_len );
570+ set_stacktrace_filename ();
571+ opal_stacktrace_output_fileno = -1 ;
572+
573+ free (filename_cpy );
574+ }
575+ else {
576+ opal_stacktrace_output_fileno = fileno (stderr );
577+ }
578+
579+
580+ /* Setup the signals to catch */
447581 memset (& act , 0 , sizeof (act ));
448582 act .sa_sigaction = show_stackframe ;
449583 act .sa_flags = SA_SIGINFO ;
0 commit comments