1111 * All rights reserved.
1212 * Copyright (c) 2006 Sun Microsystems, Inc. All rights reserved.
1313 * Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved.
14+ * Copyright (c) 2017 IBM Corporation. All rights reserved.
1415 * $COPYRIGHT$
1516 *
1617 * Additional copyrights may follow
2425#ifdef HAVE_UNISTD_H
2526#include <unistd.h>
2627#endif
28+ #ifdef HAVE_SYS_TYPES_H
29+ #include <sys/types.h>
30+ #endif
31+ #ifdef HAVE_SYS_STAT_H
32+ #include <sys/stat.h>
33+ #endif
34+ #ifdef HAVE_SYS_FCNTL_H
35+ #include <fcntl.h>
36+ #endif
2737
2838#include <string.h>
2939#include <signal.h>
3444#include "opal/util/output.h"
3545#include "opal/util/show_help.h"
3646#include "opal/util/argv.h"
47+ #include "opal/util/proc.h"
3748#include "opal/runtime/opal_params.h"
3849
3950#ifndef _NSIG
4253
4354#define HOSTFORMAT "[%s:%05d] "
4455
56+ int opal_stacktrace_output_fileno = -1 ;
57+ static char * opal_stacktrace_output_filename_base = NULL ;
58+ static size_t opal_stacktrace_output_filename_max_len = 0 ;
4559static char stacktrace_hostname [OPAL_MAXHOSTNAMELEN ];
4660static char * unable_to_print_msg = "Unable to print stack trace!\n" ;
4761
62+ /*
63+ * Set the stacktrace filename:
64+ * stacktrace.PID
65+ * -or, if VPID is available-
66+ * stacktrace.VPID.PID
67+ */
68+ static void set_stacktrace_filename (void ) {
69+ opal_proc_t * my_proc = opal_proc_local_get ();
70+
71+ if ( NULL == my_proc ) {
72+ snprintf (opal_stacktrace_output_filename , opal_stacktrace_output_filename_max_len ,
73+ "%s.%lu" ,
74+ opal_stacktrace_output_filename_base , (unsigned long )getpid ());
75+ }
76+ else {
77+ snprintf (opal_stacktrace_output_filename , opal_stacktrace_output_filename_max_len ,
78+ "%s.%lu.%lu" ,
79+ opal_stacktrace_output_filename_base , (unsigned long )my_proc -> proc_name .vpid , (unsigned long )getpid ());
80+ }
81+
82+ return ;
83+ }
84+
4885/**
4986 * This function is being called as a signal-handler in response
5087 * to a user-specified signal (e.g. SIGFPE or SIGSEGV).
@@ -68,12 +105,37 @@ static void show_stackframe (int signo, siginfo_t * info, void * p)
68105 int ret ;
69106 char * si_code_str = "" ;
70107
108+ /* Do not print the stack trace */
109+ if ( 0 > opal_stacktrace_output_fileno && 0 == opal_stacktrace_output_filename_max_len ) {
110+ /* Raise the signal again, so we don't accidentally mask critical signals.
111+ * For critical signals, it is preferred that we call 'raise' instead of
112+ * 'exit' or 'abort' so that the return status is set properly for this
113+ * process.
114+ */
115+ signal (signo , SIG_DFL );
116+ raise (signo );
117+
118+ return ;
119+ }
120+
121+ /* Update the file name with the RANK, if available */
122+ if ( 0 < opal_stacktrace_output_filename_max_len ) {
123+ set_stacktrace_filename ();
124+ opal_stacktrace_output_fileno = open (opal_stacktrace_output_filename ,
125+ O_CREAT |O_WRONLY |O_TRUNC , S_IRUSR |S_IWUSR );
126+ if ( 0 > opal_stacktrace_output_fileno ) {
127+ opal_output (0 , "Error: Failed to open the stacktrace output file. Default: stderr\n\tFilename: %s\n\tErrno: %s" ,
128+ opal_stacktrace_output_filename , strerror (errno ));
129+ opal_stacktrace_output_fileno = fileno (stderr );
130+ }
131+ }
132+
71133 /* write out the footer information */
72134 memset (print_buffer , 0 , sizeof (print_buffer ));
73135 ret = snprintf (print_buffer , sizeof (print_buffer ),
74136 HOSTFORMAT "*** Process received signal ***\n" ,
75137 stacktrace_hostname , getpid ());
76- write (fileno ( stderr ) , print_buffer , ret );
138+ write (opal_stacktrace_output_fileno , print_buffer , ret );
77139
78140
79141 memset (print_buffer , 0 , sizeof (print_buffer ));
@@ -323,14 +385,14 @@ static void show_stackframe (int signo, siginfo_t * info, void * p)
323385 }
324386
325387 /* write out the signal information generated above */
326- write (fileno ( stderr ) , print_buffer , sizeof (print_buffer )- size );
388+ write (opal_stacktrace_output_fileno , print_buffer , sizeof (print_buffer )- size );
327389
328390 /* print out the stack trace */
329391 snprintf (print_buffer , sizeof (print_buffer ), HOSTFORMAT ,
330392 stacktrace_hostname , getpid ());
331- ret = opal_backtrace_print (stderr , print_buffer , 2 );
393+ ret = opal_backtrace_print (NULL , print_buffer , 2 );
332394 if (OPAL_SUCCESS != ret ) {
333- write (fileno ( stderr ) , unable_to_print_msg , strlen (unable_to_print_msg ));
395+ write (opal_stacktrace_output_fileno , unable_to_print_msg , strlen (unable_to_print_msg ));
334396 }
335397
336398 /* write out the footer information */
@@ -339,10 +401,24 @@ static void show_stackframe (int signo, siginfo_t * info, void * p)
339401 HOSTFORMAT "*** End of error message ***\n" ,
340402 stacktrace_hostname , getpid ());
341403 if (ret > 0 ) {
342- write (fileno ( stderr ) , print_buffer , ret );
404+ write (opal_stacktrace_output_fileno , print_buffer , ret );
343405 } else {
344- write (fileno (stderr ), unable_to_print_msg , strlen (unable_to_print_msg ));
406+ write (opal_stacktrace_output_fileno , unable_to_print_msg , strlen (unable_to_print_msg ));
407+ }
408+
409+ if ( fileno (stdout ) != opal_stacktrace_output_fileno &&
410+ fileno (stderr ) != opal_stacktrace_output_fileno ) {
411+ close (opal_stacktrace_output_fileno );
412+ opal_stacktrace_output_fileno = -1 ;
345413 }
414+
415+ /* Raise the signal again, so we don't accidentally mask critical signals.
416+ * For critical signals, it is preferred that we call 'raise' instead of
417+ * 'exit' or 'abort' so that the return status is set properly for this
418+ * process.
419+ */
420+ signal (signo , SIG_DFL );
421+ raise (signo );
346422}
347423
348424#endif /* OPAL_WANT_PRETTY_PRINT_STACKTRACE */
@@ -364,7 +440,30 @@ void opal_stackframe_output(int stream)
364440 opal_output (stream , "%s" , traces [i ]);
365441 }
366442 } else {
367- opal_backtrace_print (stderr , NULL , 2 );
443+ /* Do not print the stack trace */
444+ if ( 0 > opal_stacktrace_output_fileno && 0 == opal_stacktrace_output_filename_max_len ) {
445+ return ;
446+ }
447+
448+ /* Update the file name with the RANK, if available */
449+ if ( 0 < opal_stacktrace_output_filename_max_len ) {
450+ set_stacktrace_filename ();
451+ opal_stacktrace_output_fileno = open (opal_stacktrace_output_filename ,
452+ O_CREAT |O_WRONLY |O_TRUNC , S_IRUSR |S_IWUSR );
453+ if ( 0 > opal_stacktrace_output_fileno ) {
454+ opal_output (0 , "Error: Failed to open the stacktrace output file. Default: stderr\n\tFilename: %s\n\tErrno: %s" ,
455+ opal_stacktrace_output_filename , strerror (errno ));
456+ opal_stacktrace_output_fileno = fileno (stderr );
457+ }
458+ }
459+
460+ opal_backtrace_print (NULL , NULL , 2 );
461+
462+ if ( fileno (stdout ) != opal_stacktrace_output_fileno &&
463+ fileno (stderr ) != opal_stacktrace_output_fileno ) {
464+ close (opal_stacktrace_output_fileno );
465+ opal_stacktrace_output_fileno = -1 ;
466+ }
368467 }
369468}
370469
@@ -435,6 +534,50 @@ int opal_util_register_stackhandlers (void)
435534 }
436535 }
437536
537+ /* Setup the output stream to use */
538+ if ( NULL == opal_stacktrace_output_filename ||
539+ 0 == strcasecmp (opal_stacktrace_output_filename , "none" ) ) {
540+ opal_stacktrace_output_fileno = -1 ;
541+ }
542+ else if ( 0 == strcasecmp (opal_stacktrace_output_filename , "stdout" ) ) {
543+ opal_stacktrace_output_fileno = fileno (stdout );
544+ }
545+ else if ( 0 == strcasecmp (opal_stacktrace_output_filename , "stderr" ) ) {
546+ opal_stacktrace_output_fileno = fileno (stdout );
547+ }
548+ else if ( 0 == strcasecmp (opal_stacktrace_output_filename , "file" ) ||
549+ 0 == strcasecmp (opal_stacktrace_output_filename , "file:" ) ) {
550+ opal_stacktrace_output_filename_base = strdup ("stacktrace" );
551+
552+ free (opal_stacktrace_output_filename );
553+ // Magic number: 8 = space for .PID and .RANK (allow 7 digits each)
554+ opal_stacktrace_output_filename_max_len = strlen ("stacktrace" ) + 8 + 8 ;
555+ opal_stacktrace_output_filename = (char * )malloc (sizeof (char ) * opal_stacktrace_output_filename_max_len );
556+ set_stacktrace_filename ();
557+ opal_stacktrace_output_fileno = -1 ;
558+ }
559+ else if ( 0 == strncasecmp (opal_stacktrace_output_filename , "file:" , 5 ) ) {
560+ char * filename_cpy = NULL ;
561+ next = strchr (opal_stacktrace_output_filename , ':' );
562+ next ++ ; // move past the ':' to the filename specified
563+
564+ opal_stacktrace_output_filename_base = strdup (next );
565+
566+ free (opal_stacktrace_output_filename );
567+ // Magic number: 8 = space for .PID and .RANK (allow 7 digits each)
568+ opal_stacktrace_output_filename_max_len = strlen (opal_stacktrace_output_filename_base ) + 8 + 8 ;
569+ opal_stacktrace_output_filename = (char * )malloc (sizeof (char ) * opal_stacktrace_output_filename_max_len );
570+ set_stacktrace_filename ();
571+ opal_stacktrace_output_fileno = -1 ;
572+
573+ free (filename_cpy );
574+ }
575+ else {
576+ opal_stacktrace_output_fileno = fileno (stderr );
577+ }
578+
579+
580+ /* Setup the signals to catch */
438581 memset (& act , 0 , sizeof (act ));
439582 act .sa_sigaction = show_stackframe ;
440583 act .sa_flags = SA_SIGINFO ;
0 commit comments