Skip to content

Commit 9bb06d0

Browse files
authored
Merge pull request #7559 from rhc54/topic/fixes
Bunch of fixes plus PMIx/PRRTE updates
2 parents 48b5247 + 43f79be commit 9bb06d0

File tree

9 files changed

+53
-9
lines changed

9 files changed

+53
-9
lines changed

config/ompi_setup_prrte.m4

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,15 @@ AC_DEFUN([OMPI_SETUP_PRRTE],[
4646
[AC_HELP_STRING([--enable-prte-prefix-by-default],
4747
[Make "mpirun ..." behave exactly the same as "mpirun --prefix \$prefix" (where \$prefix is the value given to --prefix in configure) (default:enabled)])])
4848

49+
AS_IF([test "$opal_external_pmix_happy" = "yes" && test $opal_numerical_pmix_version -lt 4 && test "$enable_internal_rte" != "no"],
50+
[AC_MSG_WARN([OMPI's internal runtime environment "PRRTE" does not support])
51+
AC_MSG_WARN([PMIx versions less than v4.x as they lack adequate tool])
52+
AC_MSG_WARN([support. You can, if desired, build OMPI against an earlier])
53+
AC_MSG_WARN([version of PMIx for strictly direct-launch purposes - e.g., using)])
54+
AC_MSG_WARN([Slurm's srun to launch the job - by configuring with the])
55+
AC_MSG_WARN([--disable-internal-rte option.])
56+
AC_MSG_ERROR([Cannot continue])])
57+
4958
AC_MSG_CHECKING([if RTE support is enabled])
5059
if test "$enable_internal_rte" != "no"; then
5160
AC_MSG_RESULT([yes])
@@ -81,7 +90,7 @@ AC_DEFUN([OMPI_SETUP_PRRTE],[
8190
opal_prrte_prefix_arg=
8291
fi
8392

84-
opal_prrte_args="--prefix=$prefix --disable-dlopen $opal_prrte_prefix_arg $opal_prrte_libevent_arg $opal_prrte_hwloc_arg $opal_prrte_pmix_arg"
93+
opal_prrte_args="--prefix=$prefix $opal_prrte_prefix_arg $opal_prrte_libevent_arg $opal_prrte_hwloc_arg $opal_prrte_pmix_arg"
8594
AS_IF([test "$enable_debug" = "yes"],
8695
[opal_prrte_args="--enable-debug $opal_prrte_args"
8796
CFLAGS="$OPAL_CFLAGS_BEFORE_PICKY $OPAL_VISIBILITY_CFLAGS -g"],

config/opal_check_pmi.m4

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ AC_DEFUN([OPAL_CHECK_PMIX_LIB],[
125125
], [])],
126126
[AC_MSG_RESULT([found])
127127
opal_external_pmix_version=4x
128+
opal_numerical_pmix_version=4
128129
opal_external_pmix_version_found=1
129130
opal_external_pmix_happy=yes],
130131
[AC_MSG_RESULT([not found])])])
@@ -139,6 +140,7 @@ AC_DEFUN([OPAL_CHECK_PMIX_LIB],[
139140
], [])],
140141
[AC_MSG_RESULT([found])
141142
opal_external_pmix_version=3x
143+
opal_numerical_pmix_version=3
142144
opal_external_pmix_version_found=1
143145
opal_external_pmix_happy=yes],
144146
[AC_MSG_RESULT([not found])])])
@@ -153,6 +155,7 @@ AC_DEFUN([OPAL_CHECK_PMIX_LIB],[
153155
], [])],
154156
[AC_MSG_RESULT([found])
155157
opal_external_pmix_version=2x
158+
opal_numerical_pmix_version=2
156159
opal_external_pmix_version_found=1
157160
opal_external_pmix_happy=yes],
158161
[AC_MSG_RESULT([not found])])])
@@ -167,6 +170,7 @@ AC_DEFUN([OPAL_CHECK_PMIX_LIB],[
167170
], [])],
168171
[AC_MSG_RESULT([found])
169172
opal_external_pmix_version=1x
173+
opal_numerical_pmix_version=1
170174
opal_external_pmix_version_found=1
171175
opal_external_have_pmix1=1
172176
opal_external_pmix_happy=yes],
@@ -179,6 +183,12 @@ AC_DEFUN([OPAL_CHECK_PMIX_LIB],[
179183
opal_external_pmix_happy=no])
180184
181185
])
186+
AS_IF([test "$opal_external_pmix_happy" = "yes" && test $opal_numerical_pmix_version -lt 3],
187+
[AC_MSG_WARN([OMPI no longer supports PMIx versions prior to v3])
188+
AC_MSG_WARN([Please direct us to a more current PMIx release or])
189+
AC_MSG_WARN([use the internally provided one])
190+
AC_MSG_ERROR([Cannot continue])])
191+
182192
AS_IF([test "$opal_external_pmix_happy" = "yes"],
183193
[$3
184194
# add the new flags to our wrapper compilers

ompi/interlib/interlib.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,10 @@ int ompi_interlib_declare(int threadlevel, char *version)
121121
PMIX_INFO_DESTRUCT(&info[3]);
122122
/* account for our refcount on pmix_init */
123123
PMIx_Finalize(NULL, 0);
124-
ret = opal_pmix_convert_status(rc);
124+
if (ompi_singleton && PMIX_ERR_UNREACH == rc) {
125+
ret = OMPI_SUCCESS;
126+
} else {
127+
ret = opal_pmix_convert_status(rc);
128+
}
125129
return ret;
126130
}

ompi/runtime/ompi_mpi_abort.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,8 @@ static void try_kill_peers(ompi_communicator_t *comm,
8585

8686
procs = (ompi_process_name_t*) calloc(nprocs, sizeof(ompi_process_name_t));
8787
if (NULL == procs) {
88-
/* quick clean orte and get out */
89-
ompi_rte_abort(errno, "Abort: unable to alloc memory to kill procs");
88+
/* quick clean RTE and get out */
89+
ompi_rte_abort(errcode, "Abort: unable to alloc memory to kill procs");
9090
}
9191

9292
/* put all the local group procs in the abort list */

ompi/runtime/ompi_rte.c

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,27 @@
6161
opal_process_name_t pmix_name_wildcard = {UINT32_MAX-1, UINT32_MAX-1};
6262
opal_process_name_t pmix_name_invalid = {UINT32_MAX, UINT32_MAX};
6363
hwloc_cpuset_t ompi_proc_applied_binding = NULL;
64-
pmix_process_info_t pmix_process_info = {0};
64+
pmix_process_info_t pmix_process_info = {
65+
.my_name = {OPAL_JOBID_INVALID, OPAL_VPID_INVALID},
66+
.nodename = NULL,
67+
.pid = 0,
68+
.top_session_dir = NULL,
69+
.job_session_dir = NULL,
70+
.proc_session_dir = NULL,
71+
.my_local_rank = 0,
72+
.my_node_rank = 0,
73+
.num_local_peers = 0,
74+
.num_procs = 0,
75+
.app_num = 0,
76+
.univ_size = 0,
77+
.app_sizes = NULL,
78+
.app_ldrs = NULL,
79+
.cpuset = NULL,
80+
.command = NULL,
81+
.num_apps = 0,
82+
.initial_wdir = NULL,
83+
.reincarnation = 0
84+
};
6585
bool pmix_proc_is_bound = false;
6686
bool ompi_singleton = false;
6787

opal/mca/rcache/grdma/rcache_grdma_module.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ static inline mca_rcache_base_registration_t *mca_rcache_grdma_remove_lru_head(m
187187
/* registration has been selected for removal and is no longer in the LRU. mark it
188188
* as such. */
189189
new_flags = (old_flags & ~MCA_RCACHE_GRDMA_REG_FLAG_IN_LRU) | MCA_RCACHE_FLAGS_INVALID;
190-
if (opal_atomic_compare_exchange_strong_32(&old_reg->flags, &old_flags, new_flags)) {
190+
if (opal_atomic_compare_exchange_strong_32((opal_atomic_int32_t*)&old_reg->flags, &old_flags, new_flags)) {
191191
break;
192192
}
193193
} while (1);

opal/runtime/opal_init.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
#include "opal/mca/installdirs/base/base.h"
5555
#include "opal/mca/memory/base/base.h"
5656
#include "opal/mca/patcher/base/base.h"
57+
#include "opal/mca/pmix/base/base.h"
5758
#include "opal/mca/memcpy/base/base.h"
5859
#include "opal/mca/hwloc/base/base.h"
5960
#include "opal/mca/reachable/base/base.h"
@@ -630,7 +631,7 @@ opal_init_util(int* pargc, char*** pargv)
630631
static mca_base_framework_t *opal_init_frameworks[] = {
631632
&opal_hwloc_base_framework, &opal_memcpy_base_framework, &opal_memchecker_base_framework,
632633
&opal_backtrace_base_framework, &opal_timer_base_framework, &opal_event_base_framework,
633-
&opal_shmem_base_framework, &opal_reachable_base_framework,
634+
&opal_shmem_base_framework, &opal_reachable_base_framework, &opal_pmix_base_framework,
634635
NULL,
635636
};
636637

0 commit comments

Comments
 (0)