Skip to content

Commit 39f8903

Browse files
authored
Merge pull request #5429 from rhc54/cmr31/px
v3.1.x: Support PMIx versions > 2
2 parents e70006b + 0ef52f2 commit 39f8903

File tree

9 files changed

+84
-49
lines changed

9 files changed

+84
-49
lines changed

config/opal_check_pmi.m4

Lines changed: 32 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
# Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved.
1414
# Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights
1515
# reserved.
16-
# Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
16+
# Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
1717
# Copyright (c) 2014-2018 Research Organization for Information Science
1818
# and Technology (RIST). All rights reserved.
1919
# Copyright (c) 2016 IBM Corporation. All rights reserved.
@@ -253,7 +253,8 @@ AC_DEFUN([OPAL_CHECK_PMIX],[
253253
AS_IF([test -z "$with_pmix" || test "$with_pmix" = "yes" || test "$with_pmix" = "internal"],
254254
[AC_MSG_RESULT([no])
255255
opal_external_pmix_happy=no
256-
opal_prun_happy=yes],
256+
opal_prun_happy=yes
257+
opal_external_pmix_version=internal],
257258
258259
[AC_MSG_RESULT([yes])
259260
# check for external pmix lib */
@@ -272,23 +273,29 @@ AC_DEFUN([OPAL_CHECK_PMIX],[
272273
[AC_MSG_CHECKING([libpmix.* in $with_pmix_libdir/lib64])
273274
files=`ls $with_pmix_libdir/lib64/libpmix.* 2> /dev/null | wc -l`
274275
AS_IF([test "$files" -gt 0],
275-
[pmix_ext_install_libdir=$with_pmix_libdir/lib64],
276-
[AC_MSG_CHECKING([libpmix.* in $with_pmix_libdir/lib])
276+
[AC_MSG_RESULT([found])
277+
pmix_ext_install_libdir=$with_pmix_libdir/lib64],
278+
[AC_MSG_RESULT([not found])
279+
AC_MSG_CHECKING([libpmix.* in $with_pmix_libdir/lib])
277280
files=`ls $with_pmix_libdir/lib/libpmix.* 2> /dev/null | wc -l`
278281
AS_IF([test "$files" -gt 0],
279-
[pmix_ext_install_libdir=$with_pmix_libdir/lib],
282+
[AC_MSG_RESULT([found])
283+
pmix_ext_install_libdir=$with_pmix_libdir/lib],
280284
[AC_MSG_RESULT([not found])
281285
AC_MSG_ERROR([Cannot continue])])])])],
282286
[# check for presence of lib64 directory - if found, see if the
283287
# desired library is present and matches our build requirements
284288
AC_MSG_CHECKING([libpmix.* in $pmix_ext_install_dir/lib64])
285289
files=`ls $pmix_ext_install_dir/lib64/libpmix.* 2> /dev/null | wc -l`
286290
AS_IF([test "$files" -gt 0],
287-
[pmix_ext_install_libdir=$pmix_ext_install_dir/lib64],
288-
[AC_MSG_CHECKING([libpmix.* in $pmix_ext_install_dir/lib])
291+
[AC_MSG_RESULT([found])
292+
pmix_ext_install_libdir=$pmix_ext_install_dir/lib64],
293+
[AC_MSG_RESULT([not found])
294+
AC_MSG_CHECKING([libpmix.* in $pmix_ext_install_dir/lib])
289295
files=`ls $pmix_ext_install_dir/lib/libpmix.* 2> /dev/null | wc -l`
290296
AS_IF([test "$files" -gt 0],
291-
[pmix_ext_install_libdir=$pmix_ext_install_dir/lib],
297+
[AC_MSG_RESULT([found])
298+
pmix_ext_install_libdir=$pmix_ext_install_dir/lib],
292299
[AC_MSG_RESULT([not found])
293300
AC_MSG_ERROR([Cannot continue])])])])
294301
@@ -303,33 +310,22 @@ AC_DEFUN([OPAL_CHECK_PMIX],[
303310
CPPFLAGS="-I$pmix_ext_install_dir/include $CPPFLAGS"
304311
AS_IF([test "x`ls $pmix_ext_install_dir/include/pmix_version.h 2> /dev/null`" = "x"],
305312
[AC_MSG_RESULT([version file not found - assuming v1.1.4])
306-
opal_external_pmix_version_found=1
307-
opal_external_pmix_version=114
308-
opal_external_have_pmix1=1],
313+
# we don't support anything earlier than 1.2.x
314+
AC_MSG_WARN([External PMIx support requested, but version])
315+
AC_MSG_WARN([of the external lib is less than the minimum])
316+
AC_MSG_WARN([v1.2.x required by Open MPI.])
317+
AC_MSG_ERROR([cannot continue])],
309318
[AC_MSG_RESULT([version file found])
310319
opal_external_pmix_version_found=0])
311320
312321
# if it does exist, then we need to parse it to find
313322
# the actual release series
314323
AS_IF([test "$opal_external_pmix_version_found" = "0"],
315-
[AC_MSG_CHECKING([version 3x])
324+
[AC_MSG_CHECKING([version 2x or above])
316325
AC_PREPROC_IFELSE([AC_LANG_PROGRAM([
317326
#include <pmix_version.h>
318-
#if (PMIX_VERSION_MAJOR != 3L)
319-
#error "not version 3"
320-
#endif
321-
], [])],
322-
[AC_MSG_RESULT([found])
323-
opal_external_pmix_version=3x
324-
opal_external_pmix_version_found=1],
325-
[AC_MSG_RESULT([not found])])])
326-
327-
AS_IF([test "$opal_external_pmix_version_found" = "0"],
328-
[AC_MSG_CHECKING([version 2x])
329-
AC_PREPROC_IFELSE([AC_LANG_PROGRAM([
330-
#include <pmix_version.h>
331-
#if (PMIX_VERSION_MAJOR != 2L)
332-
#error "not version 2"
327+
#if (PMIX_VERSION_MAJOR < 2L)
328+
#error "not version 2 or above"
333329
#endif
334330
], [])],
335331
[AC_MSG_RESULT([found])
@@ -342,8 +338,8 @@ AC_DEFUN([OPAL_CHECK_PMIX],[
342338
[AC_MSG_CHECKING([version 1x])
343339
AC_PREPROC_IFELSE([AC_LANG_PROGRAM([
344340
#include <pmix_version.h>
345-
#if (PMIX_VERSION_MAJOR != 1L)
346-
#error "not version 1"
341+
#if (PMIX_VERSION_MAJOR != 1L && PMIX_VERSION_MINOR != 2L)
342+
#error "not version 1.2.x"
347343
#endif
348344
], [])],
349345
[AC_MSG_RESULT([found])
@@ -354,8 +350,8 @@ AC_DEFUN([OPAL_CHECK_PMIX],[
354350
355351
AS_IF([test "x$opal_external_pmix_version" = "x"],
356352
[AC_MSG_WARN([External PMIx support requested, but version])
357-
AC_MSG_WARN([information of the external lib could not])
358-
AC_MSG_WARN([be detected])
353+
AC_MSG_WARN([of the external lib is less than the minimum])
354+
AC_MSG_WARN([v1.2.x required by Open MPI])
359355
AC_MSG_ERROR([cannot continue])])
360356
361357
CPPFLAGS=$opal_external_pmix_save_CPPFLAGS
@@ -371,5 +367,10 @@ AC_DEFUN([OPAL_CHECK_PMIX],[
371367
AC_DEFINE_UNQUOTED([OPAL_PMIX_V1],[$opal_external_have_pmix1],
372368
[Whether the external PMIx library is v1])
373369
AM_CONDITIONAL([OPAL_WANT_PRUN], [test "$opal_prun_happy" = "yes"])
370+
371+
AS_IF([test "$opal_external_pmix_version" = "1x"],
372+
[OPAL_SUMMARY_ADD([[Miscellaneous]],[[PMIx support]], [opal_pmix], [1.2.x: WARNING - DYNAMIC OPS NOT SUPPORTED])],
373+
[OPAL_SUMMARY_ADD([[Miscellaneous]],[[PMIx support]], [opal_pmix], [$opal_external_pmix_version])])
374+
374375
OPAL_VAR_SCOPE_POP
375376
])

opal/mca/pmix/pmix2x/pmix/NEWS

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,13 @@ example, a bug might be fixed in the master, and then moved to the
2121
current release as well as the "stable" bug fix release branch.
2222

2323

24+
2.1.3 -- TBD
25+
----------------------
26+
- Fixed memory corruption bug in event notification
27+
system due to uninitialized variable
28+
- Add numeric version definition
29+
30+
2431
2.1.2 -- 6 July 2018
2532
----------------------
2633
- Added PMIX_VERSION_RELEASE string to pmix_version.h

opal/mca/pmix/pmix2x/pmix/VERSION

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,22 +15,22 @@
1515

1616
major=2
1717
minor=1
18-
release=2
18+
release=3
1919

2020
# greek is used for alpha or beta release tags. If it is non-empty,
2121
# it will be appended to the version number. It does not have to be
2222
# numeric. Common examples include a1 (alpha release 1), b1 or (beta release 1).
2323
# The only requirement is that it must be entirely printable ASCII
2424
# characters and have no white space.
2525

26-
greek=
26+
greek=rc1
2727

2828
# If repo_rev is empty, then the repository version number will be
2929
# obtained during "make dist" via the "git describe --tags --always"
3030
# command, or with the date (if "git describe" fails) in the form of
3131
# "date<date>".
3232

33-
repo_rev=git8b0bc1f
33+
repo_rev=git1b0b577
3434

3535
# If tarball_version is not empty, it is used as the version string in
3636
# the tarball filename, regardless of all other versions listed in
@@ -44,7 +44,7 @@ tarball_version=
4444

4545
# The date when this release was created
4646

47-
date="Jul 06, 2018"
47+
date="Jul 19, 2018"
4848

4949
# The shared library version of each of PMIx's public libraries.
5050
# These versions are maintained in accordance with the "Library
@@ -75,6 +75,6 @@ date="Jul 06, 2018"
7575
# Version numbers are described in the Libtool current:revision:age
7676
# format.
7777

78-
libpmix_so_version=3:12:1
78+
libpmix_so_version=3:13:1
7979
libpmi_so_version=1:0:0
8080
libpmi2_so_version=1:0:0

opal/mca/pmix/pmix2x/pmix/contrib/pmix.spec

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@
192192

193193
Summary: An extended/exascale implementation of PMI
194194
Name: %{?_name:%{_name}}%{!?_name:pmix}
195-
Version: 2.1.2
195+
Version: 2.1.3rc1
196196
Release: 1%{?dist}
197197
License: BSD
198198
Group: Development/Libraries

opal/mca/pmix/pmix2x/pmix/include/pmix_version.h.in

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
* Copyright (c) 2016 Mellanox Technologies, Inc.
33
* All rights reserved.
44
* Copyright (c) 2018 IBM Corporation. All rights reserved.
5+
* Copyright (c) 2018 Intel, Inc. All rights reserved.
56
* $COPYRIGHT$
67
*
78
* Additional copyrights may follow
@@ -17,4 +18,7 @@
1718
#define PMIX_VERSION_MAJOR @pmixmajor@
1819
#define PMIX_VERSION_MINOR @pmixminor@
1920
#define PMIX_VERSION_RELEASE @pmixrelease@
21+
22+
#define PMIX_NUMERIC_VERSION 0x00020103
23+
2024
#endif

opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -198,8 +198,8 @@ static pmix_status_t notify_server_of_event(pmix_status_t status,
198198
for (n=0; n < cd->ninfo; n++) {
199199
PMIX_INFO_XFER(&cd->info[n], &chain->info[n]);
200200
if (0 == strncmp(cd->info[n].key, PMIX_EVENT_NON_DEFAULT, PMIX_MAX_KEYLEN)) {
201-
cd->nondefault = true;
202-
chain->nondefault = true;
201+
cd->nondefault = PMIX_INFO_TRUE(&info[n]);
202+
chain->nondefault = cd->nondefault;
203203
} else if (0 == strncmp(cd->info[n].key, PMIX_EVENT_CUSTOM_RANGE, PMIX_MAX_KEYLEN)) {
204204
/* provides an array of pmix_proc_t identifying the procs
205205
* that are to receive this notification, or a single pmix_proc_t */
@@ -972,8 +972,8 @@ static void _notify_client_event(int sd, short args, void *cbdata)
972972
for (n=0; n < cd->ninfo; n++) {
973973
PMIX_INFO_XFER(&chain->info[n], &cd->info[n]);
974974
if (0 == strncmp(cd->info[n].key, PMIX_EVENT_NON_DEFAULT, PMIX_MAX_KEYLEN)) {
975-
cd->nondefault = true;
976-
chain->nondefault = true;
975+
cd->nondefault = PMIX_INFO_TRUE(&cd->info[n]);
976+
chain->nondefault = cd->nondefault;
977977
} else if (0 == strncmp(cd->info[n].key, PMIX_EVENT_CUSTOM_RANGE, PMIX_MAX_KEYLEN)) {
978978
/* provides an array of pmix_proc_t identifying the procs
979979
* that are to receive this notification, or a single pmix_proc_t */
@@ -1087,7 +1087,7 @@ pmix_status_t pmix_server_notify_client_of_event(pmix_status_t status,
10871087
if (NULL != info) {
10881088
for (n=0; n < ninfo; n++) {
10891089
if (0 == strncmp(info[n].key, PMIX_EVENT_NON_DEFAULT, PMIX_MAX_KEYLEN)) {
1090-
cd->nondefault = true;
1090+
cd->nondefault = PMIX_INFO_TRUE(&info[n]);
10911091
} else if (0 == strncmp(info[n].key, PMIX_EVENT_CUSTOM_RANGE, PMIX_MAX_KEYLEN)) {
10921092
/* provides an array of pmix_proc_t identifying the procs
10931093
* that are to receive this notification, or a single pmix_proc_t */

opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
22
/*
3-
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
3+
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
44
* Copyright (c) 2017 Research Organization for Information Science
55
* and Technology (RIST). All rights reserved.
66
* $COPYRIGHT$
@@ -391,11 +391,11 @@ static void check_cached_events(pmix_rshift_caddy_t *cd)
391391
chain->status = ncd->status;
392392
(void)strncpy(chain->source.nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN);
393393
chain->source.rank = pmix_globals.myid.rank;
394-
/* we already left space for evhandler name plus
395-
* a callback object when we cached the notification */
396-
chain->ninfo = ncd->ninfo;
397-
PMIX_INFO_CREATE(chain->info, chain->ninfo);
394+
/* we always leave space for event hdlr name and a callback object */
395+
chain->nallocated = ncd->ninfo + 2;
396+
PMIX_INFO_CREATE(chain->info, chain->nallocated);
398397
if (0 < cd->ninfo) {
398+
chain->ninfo = ncd->ninfo;
399399
/* need to copy the info */
400400
for (n=0; n < ncd->ninfo; n++) {
401401
PMIX_INFO_XFER(&chain->info[n], &ncd->info[n]);

opal/mca/pmix/pmix2x/pmix2x.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -317,13 +317,16 @@ void pmix2x_event_hdlr(size_t evhdlr_registration_id,
317317
}
318318

319319
/* convert the array of info */
320-
if (NULL != info) {
320+
if (NULL != info && 0 < ninfo) {
321321
cd->info = OBJ_NEW(opal_list_t);
322322
for (n=0; n < ninfo; n++) {
323323
iptr = OBJ_NEW(opal_value_t);
324+
/* ensure that this key is NULL terminated */
325+
info[n].key[PMIX_MAX_KEYLEN] = '\0';
324326
iptr->key = strdup(info[n].key);
325327
if (OPAL_SUCCESS != (rc = pmix2x_value_unload(iptr, &info[n].value))) {
326328
OPAL_ERROR_LOG(rc);
329+
opal_output(0, "KEY %s FAILED VALUE TRANSLATION", info[n].key);
327330
OBJ_RELEASE(iptr);
328331
continue;
329332
}
@@ -332,7 +335,7 @@ void pmix2x_event_hdlr(size_t evhdlr_registration_id,
332335
}
333336

334337
/* convert the array of prior results */
335-
if (NULL != results) {
338+
if (NULL != results && 0 < nresults) {
336339
for (n=0; n < nresults; n++) {
337340
iptr = OBJ_NEW(opal_value_t);
338341
iptr->key = strdup(results[n].key);

orte/orted/pmix/pmix_server_gen.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -356,6 +356,17 @@ void pmix_server_notify(int status, orte_process_name_t* sender,
356356
}
357357
}
358358

359+
/* protect against infinite loops by marking that this notification was
360+
* passed down to the server by me */
361+
if (NULL == cd->info) {
362+
cd->info = OBJ_NEW(opal_list_t);
363+
}
364+
val = OBJ_NEW(opal_value_t);
365+
val->key = strdup("orte.notify.donotloop");
366+
val->type = OPAL_BOOL;
367+
val->data.flag = true;
368+
opal_list_append(cd->info, &val->super);
369+
359370
opal_output_verbose(2, orte_pmix_server_globals.output,
360371
"%s NOTIFYING PMIX SERVER OF STATUS %d",
361372
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ret);
@@ -382,6 +393,14 @@ int pmix_server_notify_event(int code, opal_process_name_t *source,
382393
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
383394
ORTE_NAME_PRINT(source), code);
384395

396+
/* check to see if this is one we sent down */
397+
OPAL_LIST_FOREACH(val, info, opal_value_t) {
398+
if (0 == strcmp(val->key, "orte.notify.donotloop")) {
399+
/* yep - do not process */
400+
goto done;
401+
}
402+
}
403+
385404
/* a local process has generated an event - we need to xcast it
386405
* to all the daemons so it can be passed down to their local
387406
* procs */
@@ -448,6 +467,7 @@ int pmix_server_notify_event(int code, opal_process_name_t *source,
448467
/* maintain accounting */
449468
OBJ_RELEASE(sig);
450469

470+
done:
451471
/* execute the callback */
452472
if (NULL != cbfunc) {
453473
cbfunc(ORTE_SUCCESS, cbdata);

0 commit comments

Comments
 (0)