1212 * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved.
1313 * Copyright (c) 2011-2012 Los Alamos National Security, LLC.
1414 * All rights reserved.
15- * Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
15+ * Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
1616 * Copyright (c) 2016 Research Organization for Information Science
1717 * and Technology (RIST). All rights reserved.
1818 * $COPYRIGHT$
@@ -50,8 +50,9 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
5050{
5151 orte_job_t * jdata ;
5252 orte_job_map_t * map ;
53+ orte_node_t * node ;
5354 int rc , i ;
54- bool did_map ;
55+ bool did_map , given ;
5556 orte_rmaps_base_selected_module_t * mod ;
5657 orte_job_t * parent ;
5758 orte_state_caddy_t * caddy = (orte_state_caddy_t * )cbdata ;
@@ -71,6 +72,47 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
7172 "mca:rmaps: mapping job %s" ,
7273 ORTE_JOBID_PRINT (jdata -> jobid ));
7374
75+ /* compute the number of procs and check validity */
76+ nprocs = 0 ;
77+ for (i = 0 ; i < jdata -> apps -> size ; i ++ ) {
78+ if (NULL != (app = (orte_app_context_t * )opal_pointer_array_get_item (jdata -> apps , i ))) {
79+ if (0 == app -> num_procs ) {
80+ opal_list_t nodes ;
81+ orte_std_cntr_t slots ;
82+ OBJ_CONSTRUCT (& nodes , opal_list_t );
83+ orte_rmaps_base_get_target_nodes (& nodes , & slots , app , ORTE_MAPPING_BYNODE , true, true);
84+ /* if we are in a managed allocation, then all is good - otherwise,
85+ * we have to do a little more checking */
86+ if (!orte_managed_allocation ) {
87+ /* if all the nodes have their slots given, then we are okay */
88+ given = true;
89+ OPAL_LIST_FOREACH (node , & nodes , orte_node_t ) {
90+ if (!ORTE_FLAG_TEST (node , ORTE_NODE_FLAG_SLOTS_GIVEN )) {
91+ given = false;
92+ break ;
93+ }
94+ }
95+ /* if -host or -hostfile was given, and the slots were not,
96+ * then this is no longer allowed */
97+ if (!given &&
98+ (orte_get_attribute (& app -> attributes , ORTE_APP_DASH_HOST , NULL , OPAL_STRING ) ||
99+ orte_get_attribute (& app -> attributes , ORTE_APP_HOSTFILE , NULL , OPAL_STRING ))) {
100+ /* inform the user of the error */
101+ orte_show_help ("help-orte-rmaps-base.txt" , "num-procs-not-specified" , true);
102+ ORTE_ACTIVATE_JOB_STATE (jdata , ORTE_JOB_STATE_MAP_FAILED );
103+ OBJ_RELEASE (caddy );
104+ OPAL_LIST_DESTRUCT (& nodes );
105+ return ;
106+ }
107+ }
108+ OPAL_LIST_DESTRUCT (& nodes );
109+ nprocs += slots ;
110+ } else {
111+ nprocs += app -> num_procs ;
112+ }
113+ }
114+ }
115+
74116 /* NOTE: CHECK FOR JDATA->MAP == NULL. IF IT IS, THEN USE
75117 * THE VALUES THAT WERE READ BY THE LOCAL MCA PARAMS. THE
76118 * PLM PROXY WILL SEND A JOB-OBJECT THAT WILL INCLUDE ANY
@@ -91,22 +133,6 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
91133 OBJ_RELEASE (caddy );
92134 return ;
93135 }
94- /* compute the number of procs */
95- nprocs = 0 ;
96- for (i = 0 ; i < jdata -> apps -> size ; i ++ ) {
97- if (NULL != (app = (orte_app_context_t * )opal_pointer_array_get_item (jdata -> apps , i ))) {
98- if (0 == app -> num_procs ) {
99- opal_list_t nodes ;
100- orte_std_cntr_t slots ;
101- OBJ_CONSTRUCT (& nodes , opal_list_t );
102- orte_rmaps_base_get_target_nodes (& nodes , & slots , app , ORTE_MAPPING_BYNODE , true, true);
103- OPAL_LIST_DESTRUCT (& nodes );
104- nprocs += slots ;
105- } else {
106- nprocs += app -> num_procs ;
107- }
108- }
109- }
110136 opal_output_verbose (5 , orte_rmaps_base_framework .framework_output ,
111137 "mca:rmaps: nprocs %s" ,
112138 ORTE_VPID_PRINT (nprocs ));
@@ -142,12 +168,7 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
142168 }
143169 /* check for oversubscribe directives */
144170 if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE (orte_rmaps_base .mapping ))) {
145- if (orte_managed_allocation ) {
146- /* by default, we do not allow oversubscription in managed environments */
147- ORTE_SET_MAPPING_DIRECTIVE (map -> mapping , ORTE_MAPPING_NO_OVERSUBSCRIBE );
148- } else {
149- ORTE_UNSET_MAPPING_DIRECTIVE (map -> mapping , ORTE_MAPPING_NO_OVERSUBSCRIBE );
150- }
171+ ORTE_SET_MAPPING_DIRECTIVE (map -> mapping , ORTE_MAPPING_NO_OVERSUBSCRIBE );
151172 } else {
152173 /* pass along the directive */
153174 if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE (orte_rmaps_base .mapping )) {
@@ -179,13 +200,6 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
179200 if (!jdata -> map -> display_map ) {
180201 jdata -> map -> display_map = orte_rmaps_base .display_map ;
181202 }
182- /* compute the number of procs */
183- nprocs = 0 ;
184- for (i = 0 ; i < jdata -> apps -> size ; i ++ ) {
185- if (NULL != (app = (orte_app_context_t * )opal_pointer_array_get_item (jdata -> apps , i ))) {
186- nprocs += app -> num_procs ;
187- }
188- }
189203 /* set the default mapping policy IFF it wasn't provided */
190204 if (!ORTE_MAPPING_POLICY_IS_SET (jdata -> map -> mapping )) {
191205 /* default based on number of procs */
@@ -215,12 +229,7 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
215229 }
216230 /* check for oversubscribe directives */
217231 if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE (orte_rmaps_base .mapping ))) {
218- if (orte_managed_allocation ) {
219- /* by default, we do not allow oversubscription in managed environments */
220- ORTE_SET_MAPPING_DIRECTIVE (jdata -> map -> mapping , ORTE_MAPPING_NO_OVERSUBSCRIBE );
221- } else {
222- ORTE_UNSET_MAPPING_DIRECTIVE (jdata -> map -> mapping , ORTE_MAPPING_NO_OVERSUBSCRIBE );
223- }
232+ ORTE_SET_MAPPING_DIRECTIVE (jdata -> map -> mapping , ORTE_MAPPING_NO_OVERSUBSCRIBE );
224233 } else {
225234 /* pass along the directive */
226235 if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE (orte_rmaps_base .mapping )) {
0 commit comments