1111 * All rights reserved.
1212 * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
1313 * Copyright (c) 2014 Intel, Inc. All rights reserved
14+ * Copyright (c) 2016 IBM Corporation. All rights reserved.
1415 * $COPYRIGHT$
1516 *
1617 * Additional copyrights may follow
3839#include "orte/util/show_help.h"
3940
4041#include "orte/mca/ras/base/ras_private.h"
42+ #include "orte/mca/ras/base/base.h"
4143#include "ras_lsf.h"
4244
4345
@@ -98,6 +100,8 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes)
98100 if (NULL != node && 0 == strcmp (nodelist [i ], node -> name )) {
99101 /* it is a repeat - just bump the slot count */
100102 ++ node -> slots ;
103+ opal_output_verbose (10 , orte_ras_base_framework .framework_output ,
104+ "ras/lsf: +++ Node (%s) [slots=%d]" , node -> name , node -> slots );
101105 continue ;
102106 }
103107
@@ -107,7 +111,11 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes)
107111 node -> slots_inuse = 0 ;
108112 node -> slots_max = 0 ;
109113 node -> slots = 1 ;
114+ node -> state = ORTE_NODE_STATE_UP ;
110115 opal_list_append (nodes , & node -> super );
116+
117+ opal_output_verbose (10 , orte_ras_base_framework .framework_output ,
118+ "ras/lsf: New Node (%s) [slots=%d]" , node -> name , node -> slots );
111119 }
112120
113121 /* release the nodelist from lsf */
@@ -141,14 +149,20 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes)
141149 if (!OPAL_BINDING_POLICY_IS_SET (opal_hwloc_binding_policy )) {
142150 OPAL_SET_BINDING_POLICY (opal_hwloc_binding_policy , OPAL_BIND_TO_HWTHREAD );
143151 }
144- /* get the apps and set the hostfile attribute in each to point to
145- * the hostfile */
146- for (i = 0 ; i < jdata -> apps -> size ; i ++ ) {
147- if (NULL == (app = (orte_app_context_t * )opal_pointer_array_get_item (jdata -> apps , i ))) {
148- continue ;
149- }
150- orte_set_attribute (& app -> attributes , ORTE_APP_HOSTFILE , true, (void * )affinity_file , OPAL_STRING );
152+ /*
153+ * Do not set the hostfile attribute on each app_context since that
154+ * would confuse the sequential mapper when it tries to assign bindings
155+ * when running an MPMD job.
156+ * Instead just overwrite the orte_default_hostfile so it will be
157+ * general for all of the app_contexts.
158+ */
159+ if ( NULL != orte_default_hostfile ) {
160+ free (orte_default_hostfile );
161+ orte_default_hostfile = NULL ;
151162 }
163+ orte_default_hostfile = strdup (affinity_file );
164+ opal_output_verbose (10 , orte_ras_base_framework .framework_output ,
165+ "ras/lsf: Set default_hostfile to %s" ,orte_default_hostfile );
152166
153167 return ORTE_SUCCESS ;
154168 }
0 commit comments