Skip to content

Commit b59ae14

Browse files
author
Ralph Castain
committed
Fix static port and partial allocation operations
Fix static port wireup by recording the TCP port mpirun is using and correctly passing the regex of hosts to the daemons. Do a better job of closing sockets on failed connection attempts. Correctly identify the remote host in the associated error message. Fix partial allocation operations by not attempting to set #slots on nodes that were not used, and thus don't have a daemon or topology assigned to them Signed-off-by: Ralph Castain <[email protected]>
1 parent 06ef1aa commit b59ae14

File tree

14 files changed

+142
-68
lines changed

14 files changed

+142
-68
lines changed

orte/mca/errmgr/default_orted/errmgr_default_orted.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
* reserved.
99
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
1010
* All rights reserved.
11-
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
11+
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
1212
* $COPYRIGHT$
1313
*
1414
* Additional copyrights may follow
@@ -245,9 +245,12 @@ static void proc_errors(int fd, short args, void *cbdata)
245245
* lifeline
246246
*/
247247
if (ORTE_PROC_STATE_LIFELINE_LOST == state ||
248-
ORTE_PROC_STATE_UNABLE_TO_SEND_MSG == state) {
248+
ORTE_PROC_STATE_UNABLE_TO_SEND_MSG == state ||
249+
ORTE_PROC_STATE_NO_PATH_TO_TARGET == state ||
250+
ORTE_PROC_STATE_PEER_UNKNOWN == state ||
251+
ORTE_PROC_STATE_FAILED_TO_CONNECT == state) {
249252
OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base_framework.framework_output,
250-
"%s errmgr:orted lifeline lost - exiting",
253+
"%s errmgr:orted lifeline lost or unable to communicate - exiting",
251254
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
252255
/* set our exit status */
253256
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);

orte/mca/oob/base/oob_base_stubs.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ void orte_oob_base_send_nb(int fd, short args, void *cbdata)
4646
OBJ_RELEASE(cd);
4747

4848
opal_output_verbose(5, orte_oob_base_framework.framework_output,
49-
"%s oob:base:send to target %s - %u attempt",
49+
"%s oob:base:send to target %s - attempt %u",
5050
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
5151
ORTE_NAME_PRINT(&msg->dst), msg->retries);
5252

orte/mca/oob/tcp/oob_tcp_connection.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,8 @@ void mca_oob_tcp_peer_try_connect(int fd, short args, void *cbdata)
273273
if (mca_oob_tcp_component.max_recon_attempts < 0 ||
274274
peer->num_retries < mca_oob_tcp_component.max_recon_attempts) {
275275
struct timeval tv;
276+
/* close the current socket */
277+
CLOSE_THE_SOCKET(peer->sd);
276278
/* reset the addr states */
277279
OPAL_LIST_FOREACH(addr, &peer->addrs, mca_oob_tcp_addr_t) {
278280
addr->state = MCA_OOB_TCP_UNCONNECTED;
@@ -306,6 +308,8 @@ void mca_oob_tcp_peer_try_connect(int fd, short args, void *cbdata)
306308
"------------------------------------------------------------",
307309
orte_process_info.nodename,
308310
(NULL == host) ? "<unknown>" : host);
311+
/* close the socket */
312+
CLOSE_THE_SOCKET(peer->sd);
309313
/* let the TCP component know that this module failed to make
310314
* the connection so it can do some bookkeeping and fail back
311315
* to the OOB level so another component can try. This will activate
@@ -350,6 +354,8 @@ void mca_oob_tcp_peer_try_connect(int fd, short args, void *cbdata)
350354
} else {
351355
peer->state = MCA_OOB_TCP_UNCONNECTED;
352356
}
357+
/* close the socket */
358+
CLOSE_THE_SOCKET(peer->sd);
353359
return;
354360
} else {
355361
opal_output(0,
@@ -361,6 +367,8 @@ void mca_oob_tcp_peer_try_connect(int fd, short args, void *cbdata)
361367
opal_net_get_port((struct sockaddr*)&addr->addr),
362368
opal_strerror(rc),
363369
rc);
370+
/* close the socket */
371+
CLOSE_THE_SOCKET(peer->sd);
364372
ORTE_FORCED_TERMINATE(1);
365373
}
366374

orte/mca/oob/tcp/oob_tcp_listener.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,10 @@ static int create_listen(void)
385385
conn = OBJ_NEW(mca_oob_tcp_listener_t);
386386
conn->sd = sd;
387387
conn->port = ntohs(((struct sockaddr_in*) &inaddr)->sin_port);
388+
if (orte_static_ports && 0 == orte_process_info.my_port) {
389+
/* save the first one */
390+
orte_process_info.my_port = conn->port;
391+
}
388392
opal_list_append(&mca_oob_tcp_component.listeners, &conn->item);
389393
/* and to our ports */
390394
asprintf(&tconn, "%d", ntohs(((struct sockaddr_in*) &inaddr)->sin_port));

orte/mca/plm/alps/plm_alps_module.c

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
* Copyright (c) 2006-2011 Cisco Systems, Inc. All rights reserved.
1414
* Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights
1515
* reserved.
16-
* Copyright (c) 2014 Intel Corporation. All rights reserved.
16+
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
1717
* $COPYRIGHT$
1818
*
1919
* Additional copyrights may follow
@@ -350,6 +350,17 @@ static void launch_daemons(int fd, short args, void *cbdata)
350350
/* add the daemon command (as specified by user) */
351351
orte_plm_base_setup_orted_cmd(&argc, &argv);
352352

353+
/* if we have static ports, we need to ensure that mpirun is
354+
* on the list. Since alps won't be launching a daemon on it,
355+
* it won't have been placed on the list, so create a new
356+
* version here that includes it */
357+
if (orte_static_ports) {
358+
char *ltmp;
359+
asprintf(&ltmp, "%s,%s", orte_process_info.nodename, nodelist_flat);
360+
free(nodelist_flat);
361+
nodelist_flat = ltmp;
362+
}
363+
353364
/* Add basic orted command line options, including debug flags */
354365
orte_plm_base_orted_append_basic_args(&argc, &argv,
355366
NULL,

orte/mca/plm/base/plm_base_launch_support.c

Lines changed: 39 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -84,27 +84,35 @@
8484
void orte_plm_base_set_slots(orte_node_t *node)
8585
{
8686
if (0 == strncmp(orte_set_slots, "cores", strlen(orte_set_slots))) {
87-
node->slots = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo,
88-
HWLOC_OBJ_CORE, 0,
89-
OPAL_HWLOC_LOGICAL);
87+
if (NULL != node->topology && NULL != node->topology->topo) {
88+
node->slots = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo,
89+
HWLOC_OBJ_CORE, 0,
90+
OPAL_HWLOC_LOGICAL);
91+
}
9092
} else if (0 == strncmp(orte_set_slots, "sockets", strlen(orte_set_slots))) {
91-
if (0 == (node->slots = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo,
92-
HWLOC_OBJ_SOCKET, 0,
93-
OPAL_HWLOC_LOGICAL))) {
94-
/* some systems don't report sockets - in this case,
95-
* use numanodes */
93+
if (NULL != node->topology && NULL != node->topology->topo) {
94+
if (0 == (node->slots = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo,
95+
HWLOC_OBJ_SOCKET, 0,
96+
OPAL_HWLOC_LOGICAL))) {
97+
/* some systems don't report sockets - in this case,
98+
* use numanodes */
99+
node->slots = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo,
100+
HWLOC_OBJ_NODE, 0,
101+
OPAL_HWLOC_LOGICAL);
102+
}
103+
}
104+
} else if (0 == strncmp(orte_set_slots, "numas", strlen(orte_set_slots))) {
105+
if (NULL != node->topology && NULL != node->topology->topo) {
96106
node->slots = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo,
97107
HWLOC_OBJ_NODE, 0,
98108
OPAL_HWLOC_LOGICAL);
99109
}
100-
} else if (0 == strncmp(orte_set_slots, "numas", strlen(orte_set_slots))) {
101-
node->slots = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo,
102-
HWLOC_OBJ_NODE, 0,
103-
OPAL_HWLOC_LOGICAL);
104110
} else if (0 == strncmp(orte_set_slots, "hwthreads", strlen(orte_set_slots))) {
105-
node->slots = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo,
106-
HWLOC_OBJ_PU, 0,
107-
OPAL_HWLOC_LOGICAL);
111+
if (NULL != node->topology && NULL != node->topology->topo) {
112+
node->slots = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo,
113+
HWLOC_OBJ_PU, 0,
114+
OPAL_HWLOC_LOGICAL);
115+
}
108116
} else {
109117
/* must be a number */
110118
node->slots = strtol(orte_set_slots, NULL, 10);
@@ -1436,16 +1444,23 @@ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv,
14361444
free(rml_uri);
14371445

14381446
/* if we have static ports, pass the node list */
1439-
if (orte_static_ports && NULL != nodes) {
1440-
/* convert the nodes to a regex */
1441-
if (ORTE_SUCCESS != (rc = orte_regex_create(nodes, &param))) {
1442-
ORTE_ERROR_LOG(rc);
1443-
return rc;
1447+
if (orte_static_ports) {
1448+
param = NULL;
1449+
if (NULL != nodes) {
1450+
/* convert the nodes to a regex */
1451+
if (ORTE_SUCCESS != (rc = orte_regex_create(nodes, &param))) {
1452+
ORTE_ERROR_LOG(rc);
1453+
return rc;
1454+
}
1455+
} else if (NULL != orte_node_regex) {
1456+
param = strdup(orte_node_regex);
1457+
}
1458+
if (NULL != param) {
1459+
opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID);
1460+
opal_argv_append(argc, argv, "orte_node_regex");
1461+
opal_argv_append(argc, argv, param);
1462+
free(param);
14441463
}
1445-
opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID);
1446-
opal_argv_append(argc, argv, "orte_node_regex");
1447-
opal_argv_append(argc, argv, param);
1448-
free(param);
14491464
}
14501465

14511466
/* if output-filename was specified, pass that along */

orte/mca/plm/lsf/plm_lsf_module.c

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
* reserved.
1515
* Copyright (c) 2008 Institut National de Recherche en Informatique
1616
* et Automatique. All rights reserved.
17-
* Copyright (c) 2014 Intel Corporation. All rights reserved.
17+
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
1818
* $COPYRIGHT$
1919
*
2020
* Additional copyrights may follow
@@ -258,6 +258,17 @@ static void launch_daemons(int fd, short args, void *cbdata)
258258
/* add the daemon command (as specified by user) */
259259
orte_plm_base_setup_orted_cmd(&argc, &argv);
260260

261+
/* if we have static ports, we need to ensure that mpirun is
262+
* on the list. Since lsf won't be launching a daemon on it,
263+
* it won't have been placed on the list, so create a new
264+
* version here that includes it */
265+
if (orte_static_ports) {
266+
char *ltmp;
267+
asprintf(&ltmp, "%s,%s", orte_process_info.nodename, nodelist);
268+
free(nodelist);
269+
nodelist = ltmp;
270+
}
271+
261272
/* Add basic orted command line options */
262273
orte_plm_base_orted_append_basic_args(&argc, &argv,
263274
"lsf",

orte/mca/plm/rsh/plm_rsh_module.c

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,8 @@ static void rsh_wait_daemon(orte_proc_t *daemon, void* cbdata)
328328
static int setup_launch(int *argcptr, char ***argvptr,
329329
char *nodename,
330330
int *node_name_index1,
331-
int *proc_vpid_index, char *prefix_dir)
331+
int *proc_vpid_index, char *prefix_dir,
332+
char *nodelist)
332333
{
333334
int argc;
334335
char **argv;
@@ -613,7 +614,7 @@ static int setup_launch(int *argcptr, char ***argvptr,
613614
orte_plm_base_orted_append_basic_args(&argc, &argv,
614615
"env",
615616
proc_vpid_index,
616-
NULL);
617+
nodelist);
617618

618619
/* ensure that only the ssh plm is selected on the remote daemon */
619620
opal_argv_append_nosize(&argv, "-"OPAL_MCA_CMD_LINE_ID);
@@ -828,7 +829,7 @@ static int remote_spawn(opal_buffer_t *launch)
828829

829830
/* setup the launch */
830831
if (ORTE_SUCCESS != (rc = setup_launch(&argc, &argv, orte_process_info.nodename, &node_name_index1,
831-
&proc_vpid_index, prefix))) {
832+
&proc_vpid_index, prefix, NULL))) {
832833
ORTE_ERROR_LOG(rc);
833834
OBJ_DESTRUCT(&coll);
834835
goto cleanup;
@@ -993,6 +994,7 @@ static void launch_daemons(int fd, short args, void *cbdata)
993994
int port, *portptr;
994995
orte_namelist_t *child;
995996
char *rtmod;
997+
char *nlistflat;
996998

997999
/* if we are launching debugger daemons, then just go
9981000
* do it - no new daemons will be launched
@@ -1153,12 +1155,37 @@ static void launch_daemons(int fd, short args, void *cbdata)
11531155
orte_routed.get_routing_list(rtmod, &coll);
11541156
}
11551157

1158+
if (orte_static_ports) {
1159+
/* create a list of all nodes involved so we can pass it along */
1160+
char **nodelist = NULL;
1161+
orte_node_t *n2;
1162+
for (nnode=0; nnode < map->nodes->size; nnode++) {
1163+
if (NULL != (n2 = (orte_node_t*)opal_pointer_array_get_item(map->nodes, nnode))) {
1164+
opal_argv_append_nosize(&nodelist, n2->name);
1165+
}
1166+
}
1167+
/* we need mpirun to be the first node on this list */
1168+
if (0 != strcmp(nodelist[0], orte_process_info.nodename)) {
1169+
opal_argv_prepend_nosize(&nodelist, orte_process_info.nodename);
1170+
}
1171+
nlistflat = opal_argv_join(nodelist, ',');
1172+
opal_argv_free(nodelist);
1173+
} else {
1174+
nlistflat = NULL;
1175+
}
1176+
11561177
/* setup the launch */
11571178
if (ORTE_SUCCESS != (rc = setup_launch(&argc, &argv, node->name, &node_name_index1,
1158-
&proc_vpid_index, prefix_dir))) {
1179+
&proc_vpid_index, prefix_dir, nlistflat))) {
11591180
ORTE_ERROR_LOG(rc);
1181+
if (NULL != nlistflat) {
1182+
free(nlistflat);
1183+
}
11601184
goto cleanup;
11611185
}
1186+
if (NULL != nlistflat) {
1187+
free(nlistflat);
1188+
}
11621189

11631190
/*
11641191
* Iterate through each of the nodes

orte/mca/plm/slurm/plm_slurm_module.c

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved.
1313
* Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights
1414
* reserved.
15-
* Copyright (c) 2014 Intel Corporation. All rights reserved.
15+
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
1616
* $COPYRIGHT$
1717
*
1818
* Additional copyrights may follow
@@ -354,6 +354,17 @@ static void launch_daemons(int fd, short args, void *cbdata)
354354
/* add the daemon command (as specified by user) */
355355
orte_plm_base_setup_orted_cmd(&argc, &argv);
356356

357+
/* if we have static ports, we need to ensure that mpirun is
358+
* on the list. Since slurm won't be launching a daemon on it,
359+
* it won't have been placed on the list, so create a new
360+
* version here that includes it */
361+
if (orte_static_ports) {
362+
char *ltmp;
363+
asprintf(&ltmp, "%s,%s", orte_process_info.nodename, nodelist_flat);
364+
free(nodelist_flat);
365+
nodelist_flat = ltmp;
366+
}
367+
357368
/* Add basic orted command line options, including debug flags */
358369
orte_plm_base_orted_append_basic_args(&argc, &argv,
359370
"slurm", &proc_vpid_index,

orte/mca/plm/tm/plm_tm_module.c

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
1313
* Copyright (c) 2007-2012 Los Alamos National Security, LLC. All rights
1414
* reserved.
15-
* Copyright (c) 2014 Intel Corporation. All rights reserved.
15+
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
1616
* $COPYRIGHT$
1717
*
1818
* Additional copyrights may follow
@@ -278,6 +278,17 @@ static void launch_daemons(int fd, short args, void *cbdata)
278278
nodelist = opal_argv_join(nodeargv, ',');
279279
opal_argv_free(nodeargv);
280280

281+
/* if we have static ports, we need to ensure that mpirun is
282+
* on the list. Since Torque won't be launching a daemon on it,
283+
* it won't have been placed on the list, so create a new
284+
* version here that includes it */
285+
if (orte_static_ports) {
286+
char *ltmp;
287+
asprintf(&ltmp, "%s,%s", orte_process_info.nodename, nodelist);
288+
free(nodelist);
289+
nodelist = ltmp;
290+
}
291+
281292
/* Add basic orted command line options */
282293
orte_plm_base_orted_append_basic_args(&argc, &argv, "tm",
283294
&proc_vpid_index,

0 commit comments

Comments
 (0)