Skip to content

Commit 6c66871

Browse files
author
Ralph Castain
authored
Merge pull request #2826 from rhc54/topic/topo
Have rank=1 daemon always send its topology back as this is the most common use-case
2 parents d3a5065 + 2f4e87e commit 6c66871

File tree

2 files changed

+46
-20
lines changed

2 files changed

+46
-20
lines changed

orte/mca/plm/base/plm_base_launch_support.c

Lines changed: 40 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -991,6 +991,7 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender,
991991
opal_buffer_t *relay;
992992
char *sig;
993993
orte_topology_t *t;
994+
hwloc_topology_t topo;
994995
int i;
995996
bool found;
996997
orte_daemon_cmd_flag_t cmd = ORTE_DAEMON_REPORT_TOPOLOGY_CMD;
@@ -1126,6 +1127,18 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender,
11261127
OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
11271128
"%s RECEIVED TOPOLOGY SIG %s FROM NODE %s",
11281129
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), sig, nodename));
1130+
1131+
/* rank=1 always sends its topology back */
1132+
topo = NULL;
1133+
if (1 == sender->vpid) {
1134+
idx=1;
1135+
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &topo, &idx, OPAL_HWLOC_TOPO))) {
1136+
ORTE_ERROR_LOG(rc);
1137+
orted_failed_launch = true;
1138+
goto CLEANUP;
1139+
}
1140+
}
1141+
11291142
/* do we already have this topology from some other node? */
11301143
found = false;
11311144
for (i=0; i < orte_node_topologies->size; i++) {
@@ -1139,6 +1152,9 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender,
11391152
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
11401153
found = true;
11411154
node->topology = t;
1155+
if (NULL != topo) {
1156+
hwloc_topology_destroy(topo);
1157+
}
11421158
free(sig);
11431159
break;
11441160
}
@@ -1152,27 +1168,31 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender,
11521168
t->sig = sig;
11531169
opal_pointer_array_add(orte_node_topologies, t);
11541170
node->topology = t;
1155-
/* construct the request */
1156-
relay = OBJ_NEW(opal_buffer_t);
1157-
if (OPAL_SUCCESS != (rc = opal_dss.pack(relay, &cmd, 1, ORTE_DAEMON_CMD))) {
1158-
ORTE_ERROR_LOG(rc);
1159-
OBJ_RELEASE(relay);
1160-
orted_failed_launch = true;
1161-
goto CLEANUP;
1162-
}
1163-
/* send it */
1164-
orte_rml.send_buffer_nb(orte_mgmt_conduit,
1165-
sender, relay,
1166-
ORTE_RML_TAG_DAEMON,
1167-
orte_rml_send_callback, NULL);
1168-
/* we will count this node as completed
1169-
* when we get the full topology back */
1170-
if (NULL != nodename) {
1171-
free(nodename);
1172-
nodename = NULL;
1171+
if (NULL != topo) {
1172+
t->topo = topo;
1173+
} else {
1174+
/* construct the request */
1175+
relay = OBJ_NEW(opal_buffer_t);
1176+
if (OPAL_SUCCESS != (rc = opal_dss.pack(relay, &cmd, 1, ORTE_DAEMON_CMD))) {
1177+
ORTE_ERROR_LOG(rc);
1178+
OBJ_RELEASE(relay);
1179+
orted_failed_launch = true;
1180+
goto CLEANUP;
1181+
}
1182+
/* send it */
1183+
orte_rml.send_buffer_nb(orte_mgmt_conduit,
1184+
sender, relay,
1185+
ORTE_RML_TAG_DAEMON,
1186+
orte_rml_send_callback, NULL);
1187+
/* we will count this node as completed
1188+
* when we get the full topology back */
1189+
if (NULL != nodename) {
1190+
free(nodename);
1191+
nodename = NULL;
1192+
}
1193+
idx = 1;
1194+
continue;
11731195
}
1174-
idx = 1;
1175-
continue;
11761196
}
11771197

11781198
CLEANUP:

orte/orted/orted_main.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -759,6 +759,12 @@ int orte_daemon(int argc, char *argv[])
759759
ORTE_ERROR_LOG(ret);
760760
}
761761

762+
/* if we are rank=1, then send our topology back - otherwise, mpirun
763+
* will request it if necessary */
764+
if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &opal_hwloc_topology, 1, OPAL_HWLOC_TOPO))) {
765+
ORTE_ERROR_LOG(ret);
766+
}
767+
762768
/* send to the HNP's callback - will be routed if routes are available */
763769
if (0 > (ret = orte_rml.send_buffer_nb(orte_coll_conduit,
764770
ORTE_PROC_MY_HNP, buffer,

0 commit comments

Comments
 (0)