From 8c4b0ecb90fde32c14b73af731201610cbbbb0a4 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 13 Jul 2018 20:39:03 -0700 Subject: [PATCH 1/4] Support PMIx versions > 2 Terminate configure with error if PMIx version is less than 1.2.0. Direct all versions above 2.x to the ext2x component. Signed-off-by: Ralph Castain --- config/opal_check_pmi.m4 | 62 +++++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 30 deletions(-) diff --git a/config/opal_check_pmi.m4 b/config/opal_check_pmi.m4 index b62327e3813..00f3e2cf6bb 100644 --- a/config/opal_check_pmi.m4 +++ b/config/opal_check_pmi.m4 @@ -13,7 +13,7 @@ # Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights # reserved. -# Copyright (c) 2014-2016 Intel, Inc. All rights reserved. +# Copyright (c) 2014-2018 Intel, Inc. All rights reserved. # Copyright (c) 2014-2018 Research Organization for Information Science # and Technology (RIST). All rights reserved. # Copyright (c) 2016 IBM Corporation. All rights reserved. @@ -251,7 +251,8 @@ AC_DEFUN([OPAL_CHECK_PMIX],[ opal_external_have_pmix1=0 AS_IF([test -z "$with_pmix" || test "$with_pmix" = "yes" || test "$with_pmix" = "internal"], [AC_MSG_RESULT([no]) - opal_external_pmix_happy=no], + opal_external_pmix_happy=no + opal_external_pmix_version=internal], [AC_MSG_RESULT([yes]) # check for external pmix lib */ @@ -266,15 +267,20 @@ AC_DEFUN([OPAL_CHECK_PMIX],[ [AC_MSG_CHECKING([libpmix.* in $with_pmix_libdir]) files=`ls $with_pmix_libdir/libpmix.* 2> /dev/null | wc -l` AS_IF([test "$files" -gt 0], - [pmix_ext_install_libdir=$with_pmix_libdir], - [AC_MSG_CHECKING([libpmix.* in $with_pmix_libdir/lib64]) + [AC_MSG_RESULT([found]) + pmix_ext_install_libdir=$with_pmix_libdir], + [AC_MSG_RESULT([not found]) + AC_MSG_CHECKING([libpmix.* in $with_pmix_libdir/lib64]) files=`ls $with_pmix_libdir/lib64/libpmix.* 2> /dev/null | wc -l` AS_IF([test "$files" -gt 0], - [pmix_ext_install_libdir=$with_pmix_libdir/lib64], - [AC_MSG_CHECKING([libpmix.* in $with_pmix_libdir/lib]) + [AC_MSG_RESULT([found]) + pmix_ext_install_libdir=$with_pmix_libdir/lib64], + [AC_MSG_RESULT([not found]) + AC_MSG_CHECKING([libpmix.* in $with_pmix_libdir/lib]) files=`ls $with_pmix_libdir/lib/libpmix.* 2> /dev/null | wc -l` AS_IF([test "$files" -gt 0], - [pmix_ext_install_libdir=$with_pmix_libdir/lib], + [AC_MSG_RESULT([found]) + pmix_ext_install_libdir=$with_pmix_libdir/lib], [AC_MSG_RESULT([not found]) AC_MSG_ERROR([Cannot continue])])])])], [# check for presence of lib64 directory - if found, see if the @@ -282,11 +288,14 @@ AC_DEFUN([OPAL_CHECK_PMIX],[ AC_MSG_CHECKING([libpmix.* in $pmix_ext_install_dir/lib64]) files=`ls $pmix_ext_install_dir/lib64/libpmix.* 2> /dev/null | wc -l` AS_IF([test "$files" -gt 0], - [pmix_ext_install_libdir=$pmix_ext_install_dir/lib64], - [AC_MSG_CHECKING([libpmix.* in $pmix_ext_install_dir/lib]) + [AC_MSG_RESULT([found]) + pmix_ext_install_libdir=$pmix_ext_install_dir/lib64], + [AC_MSG_RESULT([not found]) + AC_MSG_CHECKING([libpmix.* in $pmix_ext_install_dir/lib]) files=`ls $pmix_ext_install_dir/lib/libpmix.* 2> /dev/null | wc -l` AS_IF([test "$files" -gt 0], - [pmix_ext_install_libdir=$pmix_ext_install_dir/lib], + [AC_MSG_RESULT([found]) + pmix_ext_install_libdir=$pmix_ext_install_dir/lib], [AC_MSG_RESULT([not found]) AC_MSG_ERROR([Cannot continue])])])]) @@ -301,33 +310,21 @@ AC_DEFUN([OPAL_CHECK_PMIX],[ CPPFLAGS="-I$pmix_ext_install_dir/include $CPPFLAGS" AS_IF([test "x`ls $pmix_ext_install_dir/include/pmix_version.h 2> /dev/null`" = "x"], [AC_MSG_RESULT([version file not found - assuming v1.1.4]) - opal_external_pmix_version_found=1 - opal_external_pmix_version=114 - opal_external_have_pmix1=1], + AC_MSG_WARN([External PMIx support requested, but version]) + AC_MSG_WARN([of the external lib is less than the minimum]) + AC_MSG_WARN([v1.2.x required by Open MPI.]) + AC_MSG_ERROR([cannot continue])], [AC_MSG_RESULT([version file found]) opal_external_pmix_version_found=0]) # if it does exist, then we need to parse it to find # the actual release series AS_IF([test "$opal_external_pmix_version_found" = "0"], - [AC_MSG_CHECKING([version 3x]) - AC_PREPROC_IFELSE([AC_LANG_PROGRAM([ - #include - #if (PMIX_VERSION_MAJOR != 3L) - #error "not version 3" - #endif - ], [])], - [AC_MSG_RESULT([found]) - opal_external_pmix_version=3x - opal_external_pmix_version_found=1], - [AC_MSG_RESULT([not found])])]) - - AS_IF([test "$opal_external_pmix_version_found" = "0"], - [AC_MSG_CHECKING([version 2x]) + [AC_MSG_CHECKING([version 2x or above]) AC_PREPROC_IFELSE([AC_LANG_PROGRAM([ #include - #if (PMIX_VERSION_MAJOR != 2L) - #error "not version 2" + #if (PMIX_VERSION_MAJOR < 2L) + #error "not version 2 or above" #endif ], [])], [AC_MSG_RESULT([found]) @@ -339,7 +336,7 @@ AC_DEFUN([OPAL_CHECK_PMIX],[ [AC_MSG_CHECKING([version 1x]) AC_PREPROC_IFELSE([AC_LANG_PROGRAM([ #include - #if (PMIX_VERSION_MAJOR != 1L) + #if (PMIX_VERSION_MAJOR != 1L && PMIX_VERSION_MINOR != 2L) #error "not version 1" #endif ], [])], @@ -367,5 +364,10 @@ AC_DEFUN([OPAL_CHECK_PMIX],[ AC_DEFINE_UNQUOTED([OPAL_PMIX_V1],[$opal_external_have_pmix1], [Whether the external PMIx library is v1]) + + AS_IF([test "$opal_external_pmix_version" = "1x"], + [OPAL_SUMMARY_ADD([[Miscellaneous]],[[PMIx support]], [opal_pmix], [1.2.x: WARNING - DYNAMIC OPS NOT SUPPORTED])], + [OPAL_SUMMARY_ADD([[Miscellaneous]],[[PMIx support]], [opal_pmix], [$opal_external_pmix_version])]) + OPAL_VAR_SCOPE_POP ]) From 87ddfdd1a5c942e501333b4490e94891b7717326 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Thu, 19 Jul 2018 12:27:14 -0700 Subject: [PATCH 2/4] Update to PMIx v2.1.3 Signed-off-by: Ralph Castain (cherry picked from commit 6bee6c0e8e8aa49a243a14f5d85dc88846d00ad1) --- opal/mca/pmix/pmix2x/pmix/NEWS | 9 ++++++++- opal/mca/pmix/pmix2x/pmix/VERSION | 8 ++++---- opal/mca/pmix/pmix2x/pmix/contrib/pmix.spec | 2 +- opal/mca/pmix/pmix2x/pmix/include/pmix_version.h.in | 3 +++ .../pmix/pmix2x/pmix/src/event/pmix_event_registration.c | 8 ++++---- 5 files changed, 20 insertions(+), 10 deletions(-) diff --git a/opal/mca/pmix/pmix2x/pmix/NEWS b/opal/mca/pmix/pmix2x/pmix/NEWS index 2ca29705d1a..8c1add89935 100644 --- a/opal/mca/pmix/pmix2x/pmix/NEWS +++ b/opal/mca/pmix/pmix2x/pmix/NEWS @@ -21,7 +21,14 @@ example, a bug might be fixed in the master, and then moved to the current release as well as the "stable" bug fix release branch. -2.1.2 -- TBD +2.1.3 -- TBD +---------------------- +- Fixed memory corruption bug in event notification + system due to uninitialized variable +- Add numeric version definition + + +2.1.2 -- 6 July 2018 ---------------------- - Added PMIX_VERSION_RELEASE string to pmix_version.h - Added PMIX_SPAWNED and PMIX_PARENT_ID keys to all procs diff --git a/opal/mca/pmix/pmix2x/pmix/VERSION b/opal/mca/pmix/pmix2x/pmix/VERSION index f30d547eabd..b25d89c87b4 100644 --- a/opal/mca/pmix/pmix2x/pmix/VERSION +++ b/opal/mca/pmix/pmix2x/pmix/VERSION @@ -15,7 +15,7 @@ major=2 minor=1 -release=2 +release=3 # greek is used for alpha or beta release tags. If it is non-empty, # it will be appended to the version number. It does not have to be @@ -23,7 +23,7 @@ release=2 # The only requirement is that it must be entirely printable ASCII # characters and have no white space. -greek= +greek=rc1 # If repo_rev is empty, then the repository version number will be # obtained during "make dist" via the "git describe --tags --always" @@ -44,7 +44,7 @@ tarball_version= # The date when this release was created -date="Jul 01, 2018" +date="Jul 19, 2018" # The shared library version of each of PMIx's public libraries. # These versions are maintained in accordance with the "Library @@ -75,6 +75,6 @@ date="Jul 01, 2018" # Version numbers are described in the Libtool current:revision:age # format. -libpmix_so_version=3:12:1 +libpmix_so_version=3:13:1 libpmi_so_version=1:0:0 libpmi2_so_version=1:0:0 diff --git a/opal/mca/pmix/pmix2x/pmix/contrib/pmix.spec b/opal/mca/pmix/pmix2x/pmix/contrib/pmix.spec index ba9973b5f6c..9407541cc29 100644 --- a/opal/mca/pmix/pmix2x/pmix/contrib/pmix.spec +++ b/opal/mca/pmix/pmix2x/pmix/contrib/pmix.spec @@ -192,7 +192,7 @@ Summary: An extended/exascale implementation of PMI Name: %{?_name:%{_name}}%{!?_name:pmix} -Version: 2.1.2 +Version: 2.1.3rc1 Release: 1%{?dist} License: BSD Group: Development/Libraries diff --git a/opal/mca/pmix/pmix2x/pmix/include/pmix_version.h.in b/opal/mca/pmix/pmix2x/pmix/include/pmix_version.h.in index a01adfc9f86..202a77a85dd 100644 --- a/opal/mca/pmix/pmix2x/pmix/include/pmix_version.h.in +++ b/opal/mca/pmix/pmix2x/pmix/include/pmix_version.h.in @@ -18,4 +18,7 @@ #define PMIX_VERSION_MAJOR @pmixmajor@ #define PMIX_VERSION_MINOR @pmixminor@ #define PMIX_VERSION_RELEASE @pmixrelease@ + +#define PMIX_NUMERIC_VERSION 0x00020103 + #endif diff --git a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c index b208c326d88..b2f187c9166 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c +++ b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c @@ -391,11 +391,11 @@ static void check_cached_events(pmix_rshift_caddy_t *cd) chain->status = ncd->status; (void)strncpy(chain->source.nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN); chain->source.rank = pmix_globals.myid.rank; - /* we already left space for evhandler name plus - * a callback object when we cached the notification */ - chain->ninfo = ncd->ninfo; - PMIX_INFO_CREATE(chain->info, chain->ninfo); + /* we always leave space for event hdlr name and a callback object */ + chain->nallocated = ncd->ninfo + 2; + PMIX_INFO_CREATE(chain->info, chain->nallocated); if (0 < cd->ninfo) { + chain->ninfo = ncd->ninfo; /* need to copy the info */ for (n=0; n < ncd->ninfo; n++) { PMIX_INFO_XFER(&chain->info[n], &ncd->info[n]); From 646e9191972199700001a6a780974a61b8f426f8 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Thu, 19 Jul 2018 13:50:12 -0700 Subject: [PATCH 3/4] Protect against infinite loops Flag that we provided a notification and ignore it if it attempts to come back up. Signed-off-by: Ralph Castain (cherry picked from commit ea0d70bc9396def61545e2ce492a55c4c3aa7772) --- orte/orted/pmix/pmix_server_gen.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/orte/orted/pmix/pmix_server_gen.c b/orte/orted/pmix/pmix_server_gen.c index ea2cf4fb3b6..4fd2419c740 100644 --- a/orte/orted/pmix/pmix_server_gen.c +++ b/orte/orted/pmix/pmix_server_gen.c @@ -355,6 +355,17 @@ void pmix_server_notify(int status, orte_process_name_t* sender, } } + /* protect against infinite loops by marking that this notification was + * passed down to the server by me */ + if (NULL == cd->info) { + cd->info = OBJ_NEW(opal_list_t); + } + val = OBJ_NEW(opal_value_t); + val->key = strdup("orte.notify.donotloop"); + val->type = OPAL_BOOL; + val->data.flag = true; + opal_list_append(cd->info, &val->super); + opal_output_verbose(2, orte_pmix_server_globals.output, "%s NOTIFYING PMIX SERVER OF STATUS %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ret); @@ -381,6 +392,14 @@ int pmix_server_notify_event(int code, opal_process_name_t *source, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(source), code); + /* check to see if this is one we sent down */ + OPAL_LIST_FOREACH(val, info, opal_value_t) { + if (0 == strcmp(val->key, "orte.notify.donotloop")) { + /* yep - do not process */ + goto done; + } + } + /* a local process has generated an event - we need to xcast it * to all the daemons so it can be passed down to their local * procs */ @@ -447,6 +466,7 @@ int pmix_server_notify_event(int code, opal_process_name_t *source, /* maintain accounting */ OBJ_RELEASE(sig); + done: /* execute the callback */ if (NULL != cbfunc) { cbfunc(ORTE_SUCCESS, cbdata); From 83912e522e7c94550b189c66530c44d893047054 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Thu, 19 Jul 2018 13:52:04 -0700 Subject: [PATCH 4/4] Pickup minor update from PMIx v2.1.3 Signed-off-by: Ralph Castain (cherry picked from commit 0ef52f2dc428128c2a1a46ec9c79c3080fea58ca) --- .../pmix2x/pmix/src/event/pmix_event_notification.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c index eec17db8171..1065ad0c533 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c +++ b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c @@ -198,8 +198,8 @@ static pmix_status_t notify_server_of_event(pmix_status_t status, for (n=0; n < cd->ninfo; n++) { PMIX_INFO_XFER(&cd->info[n], &chain->info[n]); if (0 == strncmp(cd->info[n].key, PMIX_EVENT_NON_DEFAULT, PMIX_MAX_KEYLEN)) { - cd->nondefault = true; - chain->nondefault = true; + cd->nondefault = PMIX_INFO_TRUE(&info[n]); + chain->nondefault = cd->nondefault; } else if (0 == strncmp(cd->info[n].key, PMIX_EVENT_CUSTOM_RANGE, PMIX_MAX_KEYLEN)) { /* provides an array of pmix_proc_t identifying the procs * that are to receive this notification, or a single pmix_proc_t */ @@ -972,8 +972,8 @@ static void _notify_client_event(int sd, short args, void *cbdata) for (n=0; n < cd->ninfo; n++) { PMIX_INFO_XFER(&chain->info[n], &cd->info[n]); if (0 == strncmp(cd->info[n].key, PMIX_EVENT_NON_DEFAULT, PMIX_MAX_KEYLEN)) { - cd->nondefault = true; - chain->nondefault = true; + cd->nondefault = PMIX_INFO_TRUE(&cd->info[n]); + chain->nondefault = cd->nondefault; } else if (0 == strncmp(cd->info[n].key, PMIX_EVENT_CUSTOM_RANGE, PMIX_MAX_KEYLEN)) { /* provides an array of pmix_proc_t identifying the procs * that are to receive this notification, or a single pmix_proc_t */ @@ -1087,7 +1087,7 @@ pmix_status_t pmix_server_notify_client_of_event(pmix_status_t status, if (NULL != info) { for (n=0; n < ninfo; n++) { if (0 == strncmp(info[n].key, PMIX_EVENT_NON_DEFAULT, PMIX_MAX_KEYLEN)) { - cd->nondefault = true; + cd->nondefault = PMIX_INFO_TRUE(&info[n]); } else if (0 == strncmp(info[n].key, PMIX_EVENT_CUSTOM_RANGE, PMIX_MAX_KEYLEN)) { /* provides an array of pmix_proc_t identifying the procs * that are to receive this notification, or a single pmix_proc_t */