diff --git a/ompi/mpi/java/c/mpi_MPI.c b/ompi/mpi/java/c/mpi_MPI.c index 3c2a22a23a..f239e0a663 100644 --- a/ompi/mpi/java/c/mpi_MPI.c +++ b/ompi/mpi/java/c/mpi_MPI.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015-2016 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -135,6 +135,10 @@ OBJ_CLASS_INSTANCE(ompi_java_buffer_t, */ jint JNI_OnLoad(JavaVM *vm, void *reserved) { + // Ensure that PSM signal hijacking is disabled *before* loading + // the library (see comment in the function for more detail). + opal_init_psm(); + libmpi = dlopen("libmpi." OPAL_DYN_LIB_SUFFIX, RTLD_NOW | RTLD_GLOBAL); #if defined(HAVE_DL_INFO) && defined(HAVE_LIBGEN_H) diff --git a/opal/runtime/opal.h b/opal/runtime/opal.h index ccec2f7fcf..df663a87d9 100644 --- a/opal/runtime/opal.h +++ b/opal/runtime/opal.h @@ -6,18 +6,19 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2010-2016 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -53,7 +54,7 @@ OPAL_DECLSPEC extern int opal_cache_line_size; OPAL_DECLSPEC int opal_init(int* pargc, char*** pargv); /** - * Finalize the OPAL layer, including the MCA system. + * Finalize the OPAL layer, including the MCA system. * * @retval OPAL_SUCCESS Upon success. * @retval OPAL_ERROR Upon failure. @@ -75,7 +76,14 @@ OPAL_DECLSPEC int opal_finalize(void); OPAL_DECLSPEC int opal_init_util(int* pargc, char*** pargv); /** - * Finalize the OPAL layer, excluding the MCA system. + * Disable PSM/PSM2 signal hijacking. + * + * See comment in the function for more detail. + */ +OPAL_DECLSPEC int opal_init_psm(void); + +/** + * Finalize the OPAL layer, excluding the MCA system. * * @retval OPAL_SUCCESS Upon success. * @retval OPAL_ERROR Upon failure. diff --git a/opal/runtime/opal_init.c b/opal/runtime/opal_init.c index 46a2fa1d08..2c623e71b7 100644 --- a/opal/runtime/opal_init.c +++ b/opal/runtime/opal_init.c @@ -10,7 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007-2016 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2010-2013 Los Alamos National Security, LLC. @@ -246,6 +246,34 @@ opal_err2str(int errnum, const char **errmsg) } +int opal_init_psm(void) +{ + /* Very early in the init sequence -- before *ANY* MCA components + are opened -- we need to disable some behavior from the PSM and + PSM2 libraries (by default): at least some old versions of + these libraries hijack signal handlers during their library + constructors and then do not un-hijack them when the libraries + are unloaded. + + It is a bit of an abstraction break that we have to put + vendor/transport-specific code in the OPAL core, but we're + out of options, unfortunately. + + NOTE: We only disable this behavior if the corresponding + environment variables are not already set (i.e., if the + user/environment has indicated a preference for this behavior, + we won't override it). */ + if (NULL == getenv("IPATH_NO_BACKTRACE")) { + opal_setenv("IPATH_NO_BACKTRACE", "1", true, &environ); + } + if (NULL == getenv("HFI_NO_BACKTRACE")) { + opal_setenv("HFI_NO_BACKTRACE", "1", true, &environ); + } + + return OPAL_SUCCESS; +} + + int opal_init_util(int* pargc, char*** pargv) { @@ -301,6 +329,10 @@ opal_init_util(int* pargc, char*** pargv) goto return_error; } + // Disable PSM signal hijacking (see comment in function for more + // details) + opal_init_psm(); + /* Setup the parameter system */ if (OPAL_SUCCESS != (ret = mca_base_param_init())) { error = "mca_base_param_init";