Skip to content

Commit 1609d1d

Browse files
author
rhc54
committed
Merge pull request open-mpi#1001 from plesn/pmi2_singloton
pmi2 disqualified in singloton startup
2 parents a1b50ac + 043051f commit 1609d1d

File tree

2 files changed

+9
-4
lines changed

2 files changed

+9
-4
lines changed

opal/mca/common/pmi/common_pmi.c

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,12 +40,12 @@ bool mca_common_pmi_init (void) {
4040
{
4141
int spawned, size, rank, appnum;
4242
int rc;
43+
char buf[PMI2_MAX_VALLEN];
4344

4445
size = -1;
4546
rank = -1;
4647
appnum = -1;
4748

48-
4949
/* if we can't startup PMI, we can't be used */
5050
if (PMI2_Initialized ()) {
5151
return true;
@@ -56,11 +56,16 @@ bool mca_common_pmi_init (void) {
5656
mca_common_pmi_init_count--;
5757
return false;
5858
}
59-
if (size < 0 || rank < 0 ) {
60-
opal_show_help("help-common-pmi.txt", "pmi2-init-returned-bad-values", true);
59+
/* depending on slurm versions, we may get bad rank/size or bad jobid */
60+
if (size < 0 || rank < 0 || PMI2_SUCCESS != PMI2_Job_GetId(buf, PMI2_MAX_VALLEN)) {
61+
/* When no srun (singloton) fail quietly */
62+
if (NULL != getenv("SLURM_STEP_NUM_TASKS")) {
63+
opal_show_help("help-common-pmi.txt", "pmi2-init-returned-bad-values", true);
64+
}
6165
mca_common_pmi_init_count--;
6266
return false;
6367
}
68+
6469
mca_common_pmi_init_size = size;
6570
mca_common_pmi_init_rank = rank;
6671
mca_common_pmi_init_count--;

opal/mca/common/pmi/help-common-pmi.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ We cannot use PMI2 at this time, and your job will
1313
likely abort.
1414
#
1515
[pmi2-init-returned-bad-values]
16-
PMI2 initialized but returned bad values for size and rank.
16+
PMI2 initialized but returned bad values for size/rank/jobid.
1717
This is symptomatic of either a failure to use the
1818
"--mpi=pmi2" flag in SLURM, or a borked PMI2 installation.
1919
If running under SLURM, try adding "-mpi=pmi2" to your

0 commit comments

Comments
 (0)