1111 * All rights reserved.
1212 * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
1313 * Copyright (c) 2014 Intel, Inc. All rights reserved
14+ * Copyright (c) 2016 IBM Corporation. All rights reserved.
1415 * $COPYRIGHT$
1516 *
1617 * Additional copyrights may follow
3839#include "orte/util/show_help.h"
3940
4041#include "orte/mca/ras/base/ras_private.h"
42+ #include "orte/mca/ras/base/base.h"
4143#include "ras_lsf.h"
4244
4345
@@ -98,6 +100,8 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes)
98100 if (NULL != node && 0 == strcmp (nodelist [i ], node -> name )) {
99101 /* it is a repeat - just bump the slot count */
100102 ++ node -> slots ;
103+ opal_output_verbose (10 , orte_ras_base_framework .framework_output ,
104+ "ras/lsf: +++ Node (%s) [slots=%d]" , node -> name , node -> slots );
101105 continue ;
102106 }
103107
@@ -109,6 +113,9 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes)
109113 node -> slots = 1 ;
110114 node -> state = ORTE_NODE_STATE_UP ;
111115 opal_list_append (nodes , & node -> super );
116+
117+ opal_output_verbose (10 , orte_ras_base_framework .framework_output ,
118+ "ras/lsf: New Node (%s) [slots=%d]" , node -> name , node -> slots );
112119 }
113120
114121 /* release the nodelist from lsf */
@@ -142,14 +149,20 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes)
142149 if (!OPAL_BINDING_POLICY_IS_SET (opal_hwloc_binding_policy )) {
143150 OPAL_SET_BINDING_POLICY (opal_hwloc_binding_policy , OPAL_BIND_TO_HWTHREAD );
144151 }
145- /* get the apps and set the hostfile attribute in each to point to
146- * the hostfile */
147- for (i = 0 ; i < jdata -> apps -> size ; i ++ ) {
148- if (NULL == (app = (orte_app_context_t * )opal_pointer_array_get_item (jdata -> apps , i ))) {
149- continue ;
150- }
151- orte_set_attribute (& app -> attributes , ORTE_APP_HOSTFILE , true, (void * )affinity_file , OPAL_STRING );
152+ /*
153+ * Do not set the hostfile attribute on each app_context since that
154+ * would confuse the sequential mapper when it tries to assign bindings
155+ * when running an MPMD job.
156+ * Instead just overwrite the orte_default_hostfile so it will be
157+ * general for all of the app_contexts.
158+ */
159+ if ( NULL != orte_default_hostfile ) {
160+ free (orte_default_hostfile );
161+ orte_default_hostfile = NULL ;
152162 }
163+ orte_default_hostfile = strdup (affinity_file );
164+ opal_output_verbose (10 , orte_ras_base_framework .framework_output ,
165+ "ras/lsf: Set default_hostfile to %s" ,orte_default_hostfile );
153166
154167 return ORTE_SUCCESS ;
155168 }
0 commit comments