Skip to content

Commit 4e9364b

Browse files
author
Ralph Castain
authored
Merge pull request #2794 from rhc54/topic/regs
Next step in reducing launch time
2 parents 682f511 + 86ab751 commit 4e9364b

File tree

9 files changed

+493
-205
lines changed

9 files changed

+493
-205
lines changed

orte/mca/grpcomm/direct/grpcomm_direct.c

Lines changed: 7 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -374,33 +374,16 @@ static void xcast_recv(int status, orte_process_name_t* sender,
374374
orte_orteds_term_ordered = true;
375375
} else if (ORTE_DAEMON_ADD_LOCAL_PROCS == command ||
376376
ORTE_DAEMON_DVM_NIDMAP_CMD == command) {
377-
/* extract the byte object holding the daemonmap */
378-
cnt=1;
379-
if (ORTE_SUCCESS != (ret = opal_dss.unpack(data, &bo, &cnt, OPAL_BYTE_OBJECT))) {
380-
ORTE_ERROR_LOG(ret);
381-
goto relay;
382-
}
383-
384377
/* update our local nidmap, if required - the decode function
385-
* knows what to do - it will also free the bytes in the byte object
378+
* knows what to do
386379
*/
387-
if (ORTE_PROC_IS_HNP) {
388-
/* no need - already have the info */
389-
if (NULL != bo) {
390-
if (NULL != bo->bytes) {
391-
free(bo->bytes);
392-
}
393-
free(bo);
394-
}
395-
} else {
396-
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
397-
"%s grpcomm:direct:xcast updating daemon nidmap",
398-
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
380+
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
381+
"%s grpcomm:direct:xcast updating daemon nidmap",
382+
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
399383

400-
if (ORTE_SUCCESS != (ret = orte_util_decode_daemon_nodemap(bo))) {
401-
ORTE_ERROR_LOG(ret);
402-
goto relay;
403-
}
384+
if (ORTE_SUCCESS != (ret = orte_util_decode_daemon_nodemap(data))) {
385+
ORTE_ERROR_LOG(ret);
386+
goto relay;
404387
}
405388

406389
/* update the routing plan */

orte/mca/odls/base/odls_base_default_fns.c

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@
7777
#include "orte/util/session_dir.h"
7878
#include "orte/util/proc_info.h"
7979
#include "orte/util/nidmap.h"
80-
#include "orte/util/regex.h"
8180
#include "orte/util/show_help.h"
8281
#include "orte/runtime/orte_globals.h"
8382
#include "orte/runtime/orte_wait.h"
@@ -138,21 +137,12 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *buffer,
138137
return rc;
139138
}
140139

141-
/* construct a nodemap - only want updated items */
142-
if (ORTE_SUCCESS != (rc = orte_util_encode_nodemap(&bo, true))) {
140+
/* construct a nodemap of the daemons */
141+
if (ORTE_SUCCESS != (rc = orte_util_encode_nodemap(buffer))) {
143142
ORTE_ERROR_LOG(rc);
144143
return rc;
145144
}
146145

147-
/* store it */
148-
boptr = &bo;
149-
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &boptr, 1, OPAL_BYTE_OBJECT))) {
150-
ORTE_ERROR_LOG(rc);
151-
return rc;
152-
}
153-
/* release the data since it has now been copied into our buffer */
154-
free(bo.bytes);
155-
156146
/* if we are not using static ports, we need to send the wireup info */
157147
if (!orte_static_ports) {
158148
/* pack a flag indicating wiring info is provided */

orte/mca/plm/base/plm_base_launch_support.c

Lines changed: 6 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1059,41 +1059,12 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender,
10591059
"%s plm:base:orted_report_launch attempting to assign daemon %s to node %s",
10601060
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
10611061
ORTE_NAME_PRINT(&dname), nodename));
1062-
for (idx=0; idx < orte_node_pool->size; idx++) {
1063-
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, idx))) {
1064-
continue;
1065-
}
1066-
if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_LOC_VERIFIED)) {
1067-
/* already assigned */
1068-
continue;
1069-
}
1070-
if (0 == strcmp(nodename, node->name)) {
1071-
/* flag that we verified the location */
1072-
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_LOC_VERIFIED);
1073-
if (node == daemon->node) {
1074-
/* it wound up right where it should */
1075-
break;
1076-
}
1077-
/* remove the prior association */
1078-
if (NULL != daemon->node) {
1079-
OBJ_RELEASE(daemon->node);
1080-
}
1081-
if (NULL != node->daemon) {
1082-
OBJ_RELEASE(node->daemon);
1083-
}
1084-
/* associate this daemon with the node */
1085-
node->daemon = daemon;
1086-
OBJ_RETAIN(daemon);
1087-
/* associate this node with the daemon */
1088-
daemon->node = node;
1089-
OBJ_RETAIN(node);
1090-
OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
1091-
"%s plm:base:orted_report_launch assigning daemon %s to node %s",
1092-
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
1093-
ORTE_NAME_PRINT(&daemon->name), node->name));
1094-
break;
1095-
}
1096-
}
1062+
/* to "relocate" the daemon, we just update the name of
1063+
* the node object pointed to by this daemon */
1064+
free(daemon->node->name);
1065+
daemon->node->name = strdup(nodename);
1066+
/* mark that it was verified */
1067+
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_LOC_VERIFIED);
10971068
}
10981069

10991070
node = daemon->node;

orte/mca/plm/rsh/plm_rsh_module.c

Lines changed: 4 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
* reserved.
1515
* Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved.
1616
* Copyright (c) 2011 IBM Corporation. All rights reserved.
17-
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
17+
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
1818
* Copyright (c) 2015-2017 Research Organization for Information Science
1919
* and Technology (RIST). All rights reserved.
2020
* $COPYRIGHT$
@@ -780,7 +780,6 @@ static int remote_spawn(opal_buffer_t *launch)
780780
int rc=ORTE_SUCCESS;
781781
bool failed_launch = true;
782782
orte_std_cntr_t n;
783-
opal_byte_object_t *bo;
784783
orte_process_name_t target;
785784
orte_plm_rsh_caddy_t *caddy;
786785
orte_job_t *daemons;
@@ -802,23 +801,8 @@ static int remote_spawn(opal_buffer_t *launch)
802801
goto cleanup;
803802
}
804803

805-
/* extract the byte object holding the nidmap */
806-
n=1;
807-
if (ORTE_SUCCESS != (rc = opal_dss.unpack(launch, &bo, &n, OPAL_BYTE_OBJECT))) {
808-
ORTE_ERROR_LOG(rc);
809-
goto cleanup;
810-
}
811-
/* update our nidmap - this will free data in the byte object */
812-
if (ORTE_SUCCESS != (rc = orte_util_decode_daemon_nodemap(bo))) {
813-
ORTE_ERROR_LOG(rc);
814-
goto cleanup;
815-
}
816-
817-
/* ensure the routing plan is updated */
818-
rtmod = orte_rml.get_routed(orte_coll_conduit);
819-
orte_routed.update_routing_plan(rtmod);
820-
821804
/* get the updated routing list */
805+
rtmod = orte_rml.get_routed(orte_coll_conduit);
822806
OBJ_CONSTRUCT(&coll, opal_list_t);
823807
orte_routed.get_routing_list(rtmod, &coll);
824808

@@ -1124,7 +1108,6 @@ static void launch_daemons(int fd, short args, void *cbdata)
11241108
/* if we are tree launching, find our children and create the launch cmd */
11251109
if (!mca_plm_rsh_component.no_tree_spawn) {
11261110
orte_daemon_cmd_flag_t command = ORTE_DAEMON_TREE_SPAWN;
1127-
opal_byte_object_t bo, *boptr;
11281111
orte_job_t *jdatorted;
11291112

11301113
/* get the tree spawn buffer */
@@ -1142,21 +1125,12 @@ static void launch_daemons(int fd, short args, void *cbdata)
11421125
goto cleanup;
11431126
}
11441127
/* construct a nodemap of all daemons we know about */
1145-
if (ORTE_SUCCESS != (rc = orte_util_encode_nodemap(&bo, false))) {
1128+
if (ORTE_SUCCESS != (rc = orte_util_encode_nodemap(orte_tree_launch_cmd))) {
11461129
ORTE_ERROR_LOG(rc);
11471130
OBJ_RELEASE(orte_tree_launch_cmd);
11481131
goto cleanup;
11491132
}
1150-
/* store it */
1151-
boptr = &bo;
1152-
if (ORTE_SUCCESS != (rc = opal_dss.pack(orte_tree_launch_cmd, &boptr, 1, OPAL_BYTE_OBJECT))) {
1153-
ORTE_ERROR_LOG(rc);
1154-
OBJ_RELEASE(orte_tree_launch_cmd);
1155-
free(bo.bytes);
1156-
goto cleanup;
1157-
}
1158-
/* release the data since it has now been copied into our buffer */
1159-
free(bo.bytes);
1133+
11601134
/* get the orted job data object */
11611135
if (NULL == (jdatorted = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) {
11621136
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);

orte/mca/state/dvm/state_dvm.c

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2015-2016 Intel, Inc. All rights reserved
2+
* Copyright (c) 2015-2017 Intel, Inc. All rights reserved.
33
* $COPYRIGHT$
44
*
55
* Additional copyrights may follow
@@ -260,22 +260,12 @@ static void vm_ready(int fd, short args, void *cbdata)
260260
return;
261261
}
262262
/* construct a nodemap with everything in it */
263-
if (ORTE_SUCCESS != (rc = orte_util_encode_nodemap(&bo, false))) {
263+
if (ORTE_SUCCESS != (rc = orte_util_encode_nodemap(buf))) {
264264
ORTE_ERROR_LOG(rc);
265265
OBJ_RELEASE(buf);
266266
return;
267267
}
268268

269-
/* store it */
270-
boptr = &bo;
271-
if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &boptr, 1, OPAL_BYTE_OBJECT))) {
272-
ORTE_ERROR_LOG(rc);
273-
OBJ_RELEASE(buf);
274-
return;
275-
}
276-
/* release the data since it has now been copied into our buffer */
277-
free(bo.bytes);
278-
279269
/* pack a flag indicating wiring info is provided */
280270
flag = 1;
281271
opal_dss.pack(buf, &flag, 1, OPAL_INT8);

0 commit comments

Comments
 (0)