Skip to content

Commit 43a8e77

Browse files
authored
Merge pull request #5309 from rhc54/cmr31/probe
Try to debug the MTT failures
2 parents 2ed1a69 + 4ced01c commit 43a8e77

File tree

2 files changed

+184
-6
lines changed

2 files changed

+184
-6
lines changed

opal/mca/pmix/pmix2x/pmix2x.c

Lines changed: 180 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -949,9 +949,9 @@ int pmix2x_value_unload(opal_value_t *kv,
949949
kv->type = OPAL_STATUS;
950950
kv->data.status = pmix2x_convert_rc(v->data.status);
951951
break;
952-
case PMIX_PROC_RANK:
953-
kv->type = OPAL_VPID;
954-
kv->data.name.vpid = pmix2x_convert_rank(v->data.rank);
952+
case PMIX_VALUE:
953+
OPAL_ERROR_LOG(OPAL_ERR_NOT_SUPPORTED);
954+
rc = OPAL_ERR_NOT_SUPPORTED;
955955
break;
956956
case PMIX_PROC:
957957
kv->type = OPAL_NAME;
@@ -971,6 +971,18 @@ int pmix2x_value_unload(opal_value_t *kv,
971971
}
972972
kv->data.name.vpid = pmix2x_convert_rank(v->data.proc->rank);
973973
break;
974+
case PMIX_INFO:
975+
OPAL_ERROR_LOG(OPAL_ERR_NOT_SUPPORTED);
976+
rc = OPAL_ERR_NOT_SUPPORTED;
977+
break;
978+
case PMIX_PDATA:
979+
OPAL_ERROR_LOG(OPAL_ERR_NOT_SUPPORTED);
980+
rc = OPAL_ERR_NOT_SUPPORTED;
981+
break;
982+
case PMIX_BUFFER:
983+
OPAL_ERROR_LOG(OPAL_ERR_NOT_SUPPORTED);
984+
rc = OPAL_ERR_NOT_SUPPORTED;
985+
break;
974986
case PMIX_BYTE_OBJECT:
975987
kv->type = OPAL_BYTE_OBJECT;
976988
if (NULL != v->data.bo.bytes && 0 < v->data.bo.size) {
@@ -982,10 +994,22 @@ int pmix2x_value_unload(opal_value_t *kv,
982994
kv->data.bo.size = 0;
983995
}
984996
break;
997+
case PMIX_KVAL:
998+
OPAL_ERROR_LOG(OPAL_ERR_NOT_SUPPORTED);
999+
rc = OPAL_ERR_NOT_SUPPORTED;
1000+
break;
1001+
case PMIX_MODEX:
1002+
OPAL_ERROR_LOG(OPAL_ERR_NOT_SUPPORTED);
1003+
rc = OPAL_ERR_NOT_SUPPORTED;
1004+
break;
9851005
case PMIX_PERSIST:
9861006
kv->type = OPAL_PERSIST;
9871007
kv->data.uint8 = pmix2x_convert_persist(v->data.persist);
9881008
break;
1009+
case PMIX_POINTER:
1010+
kv->type = OPAL_PTR;
1011+
kv->data.ptr = v->data.ptr;
1012+
break;
9891013
case PMIX_SCOPE:
9901014
kv->type = OPAL_SCOPE;
9911015
kv->data.uint8 = pmix2x_convert_scope(v->data.scope);
@@ -994,15 +1018,54 @@ int pmix2x_value_unload(opal_value_t *kv,
9941018
kv->type = OPAL_DATA_RANGE;
9951019
kv->data.uint8 = pmix2x_convert_range(v->data.range);
9961020
break;
1021+
case PMIX_COMMAND:
1022+
OPAL_ERROR_LOG(OPAL_ERR_NOT_SUPPORTED);
1023+
rc = OPAL_ERR_NOT_SUPPORTED;
1024+
break;
1025+
case PMIX_INFO_DIRECTIVES:
1026+
OPAL_ERROR_LOG(OPAL_ERR_NOT_SUPPORTED);
1027+
rc = OPAL_ERR_NOT_SUPPORTED;
1028+
break;
1029+
case PMIX_DATA_TYPE:
1030+
OPAL_ERROR_LOG(OPAL_ERR_NOT_SUPPORTED);
1031+
rc = OPAL_ERR_NOT_SUPPORTED;
1032+
break;
9971033
case PMIX_PROC_STATE:
9981034
kv->type = OPAL_PROC_STATE;
9991035
/* the OPAL layer doesn't have any concept of proc state,
10001036
* so the ORTE layer is responsible for converting it */
10011037
memcpy(&kv->data.uint8, &v->data.state, sizeof(uint8_t));
10021038
break;
1003-
case PMIX_POINTER:
1004-
kv->type = OPAL_PTR;
1005-
kv->data.ptr = v->data.ptr;
1039+
case PMIX_PROC_INFO:
1040+
kv->type = OPAL_PROC_INFO;
1041+
if (NULL == v->data.pinfo) {
1042+
rc = OPAL_ERR_BAD_PARAM;
1043+
break;
1044+
}
1045+
/* see if this job is in our list of known nspaces */
1046+
found = false;
1047+
OPAL_LIST_FOREACH(job, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) {
1048+
if (0 == strncmp(job->nspace, v->data.pinfo->proc.nspace, PMIX_MAX_NSLEN)) {
1049+
kv->data.pinfo.name.jobid = job->jobid;
1050+
found = true;
1051+
break;
1052+
}
1053+
}
1054+
if (!found) {
1055+
if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&kv->data.pinfo.name.jobid, v->data.pinfo->proc.nspace))) {
1056+
return pmix2x_convert_opalrc(rc);
1057+
}
1058+
}
1059+
kv->data.pinfo.name.vpid = pmix2x_convert_rank(v->data.pinfo->proc.rank);
1060+
if (NULL != v->data.pinfo->hostname) {
1061+
kv->data.pinfo.hostname = strdup(v->data.pinfo->hostname);
1062+
}
1063+
if (NULL != v->data.pinfo->executable_name) {
1064+
kv->data.pinfo.executable_name = strdup(v->data.pinfo->executable_name);
1065+
}
1066+
kv->data.pinfo.pid = v->data.pinfo->pid;
1067+
kv->data.pinfo.exit_code = v->data.pinfo->exit_code;
1068+
kv->data.pinfo.state = pmix2x_convert_state(v->data.pinfo->state);
10061069
break;
10071070
case PMIX_DATA_ARRAY:
10081071
if (NULL == v->data.darray || NULL == v->data.darray->array) {
@@ -1029,8 +1092,30 @@ int pmix2x_value_unload(opal_value_t *kv,
10291092
}
10301093
}
10311094
break;
1095+
case PMIX_PROC_RANK:
1096+
kv->type = OPAL_VPID;
1097+
kv->data.name.vpid = pmix2x_convert_rank(v->data.rank);
1098+
break;
1099+
case PMIX_QUERY:
1100+
OPAL_ERROR_LOG(OPAL_ERR_NOT_SUPPORTED);
1101+
rc = OPAL_ERR_NOT_SUPPORTED;
1102+
break;
1103+
case PMIX_COMPRESSED_STRING:
1104+
OPAL_ERROR_LOG(OPAL_ERR_NOT_SUPPORTED);
1105+
rc = OPAL_ERR_NOT_SUPPORTED;
1106+
break;
1107+
case PMIX_ALLOC_DIRECTIVE:
1108+
OPAL_ERROR_LOG(OPAL_ERR_NOT_SUPPORTED);
1109+
rc = OPAL_ERR_NOT_SUPPORTED;
1110+
break;
1111+
case PMIX_INFO_ARRAY:
1112+
OPAL_ERROR_LOG(OPAL_ERR_NOT_SUPPORTED);
1113+
rc = OPAL_ERR_NOT_SUPPORTED;
1114+
break;
1115+
10321116
default:
10331117
/* silence warnings */
1118+
opal_output(0, "VALUE UNLOAD NOT SUPPORTED FOR TYPE %d", v->type);
10341119
rc = OPAL_ERROR;
10351120
break;
10361121
}
@@ -1418,6 +1503,95 @@ opal_pmix_alloc_directive_t pmix2x_convert_allocdir(pmix_alloc_directive_t dir)
14181503
}
14191504
}
14201505

1506+
int pmix2x_convert_state(pmix_proc_state_t state)
1507+
{
1508+
switch(state) {
1509+
case PMIX_PROC_STATE_UNDEF:
1510+
return 0;
1511+
case PMIX_PROC_STATE_PREPPED:
1512+
case PMIX_PROC_STATE_LAUNCH_UNDERWAY:
1513+
return 1;
1514+
case PMIX_PROC_STATE_RESTART:
1515+
return 2;
1516+
case PMIX_PROC_STATE_TERMINATE:
1517+
return 3;
1518+
case PMIX_PROC_STATE_RUNNING:
1519+
return 4;
1520+
case PMIX_PROC_STATE_CONNECTED:
1521+
return 5;
1522+
case PMIX_PROC_STATE_UNTERMINATED:
1523+
return 15;
1524+
case PMIX_PROC_STATE_TERMINATED:
1525+
return 20;
1526+
case PMIX_PROC_STATE_KILLED_BY_CMD:
1527+
return 51;
1528+
case PMIX_PROC_STATE_ABORTED:
1529+
return 52;
1530+
case PMIX_PROC_STATE_FAILED_TO_START:
1531+
return 53;
1532+
case PMIX_PROC_STATE_ABORTED_BY_SIG:
1533+
return 54;
1534+
case PMIX_PROC_STATE_TERM_WO_SYNC:
1535+
return 55;
1536+
case PMIX_PROC_STATE_COMM_FAILED:
1537+
return 56;
1538+
case PMIX_PROC_STATE_CALLED_ABORT:
1539+
return 58;
1540+
case PMIX_PROC_STATE_MIGRATING:
1541+
return 60;
1542+
case PMIX_PROC_STATE_CANNOT_RESTART:
1543+
return 61;
1544+
case PMIX_PROC_STATE_TERM_NON_ZERO:
1545+
return 62;
1546+
case PMIX_PROC_STATE_FAILED_TO_LAUNCH:
1547+
return 63;
1548+
default:
1549+
return 0; // undef
1550+
}
1551+
}
1552+
1553+
pmix_proc_state_t pmix2x_convert_opalstate(int state)
1554+
{
1555+
switch(state) {
1556+
case 0:
1557+
return PMIX_PROC_STATE_UNDEF;
1558+
case 1:
1559+
return PMIX_PROC_STATE_LAUNCH_UNDERWAY;
1560+
case 2:
1561+
return PMIX_PROC_STATE_RESTART;
1562+
case 3:
1563+
return PMIX_PROC_STATE_TERMINATE;
1564+
case 4:
1565+
return PMIX_PROC_STATE_RUNNING;
1566+
case 5:
1567+
return PMIX_PROC_STATE_CONNECTED;
1568+
case 51:
1569+
return PMIX_PROC_STATE_KILLED_BY_CMD;
1570+
case 52:
1571+
return PMIX_PROC_STATE_ABORTED;
1572+
case 53:
1573+
return PMIX_PROC_STATE_FAILED_TO_START;
1574+
case 54:
1575+
return PMIX_PROC_STATE_ABORTED_BY_SIG;
1576+
case 55:
1577+
return PMIX_PROC_STATE_TERM_WO_SYNC;
1578+
case 56:
1579+
return PMIX_PROC_STATE_COMM_FAILED;
1580+
case 58:
1581+
return PMIX_PROC_STATE_CALLED_ABORT;
1582+
case 59:
1583+
return PMIX_PROC_STATE_MIGRATING;
1584+
case 61:
1585+
return PMIX_PROC_STATE_CANNOT_RESTART;
1586+
case 62:
1587+
return PMIX_PROC_STATE_TERM_NON_ZERO;
1588+
case 63:
1589+
return PMIX_PROC_STATE_FAILED_TO_LAUNCH;
1590+
default:
1591+
return PMIX_PROC_STATE_UNDEF;
1592+
}
1593+
}
1594+
14211595
/**** INSTANTIATE INTERNAL CLASSES ****/
14221596
OBJ_CLASS_INSTANCE(opal_pmix2x_jobid_trkr_t,
14231597
opal_list_item_t,

opal/mca/pmix/pmix2x/pmix2x.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,10 @@ OPAL_MODULE_DECLSPEC opal_pmix_alloc_directive_t pmix2x_convert_allocdir(pmix_al
339339

340340
OPAL_MODULE_DECLSPEC char* pmix2x_convert_jobid(opal_jobid_t jobid);
341341

342+
OPAL_MODULE_DECLSPEC int pmix2x_convert_state(pmix_proc_state_t state);
343+
344+
OPAL_MODULE_DECLSPEC pmix_proc_state_t pmix2x_convert_opalstate(int state);
345+
342346
END_C_DECLS
343347

344348
#endif /* MCA_PMIX_EXTERNAL_H */

0 commit comments

Comments
 (0)