Skip to content
This repository was archived by the owner on Sep 30, 2022. It is now read-only.

Commit 1dbc692

Browse files
committed
Merge pull request #1077 from rhc54/cmr2.0/oversub
Ensure that we exit with a non-zero status when oversubscribe fails
2 parents e9c8e08 + 159f6d0 commit 1dbc692

File tree

6 files changed

+110
-6
lines changed

6 files changed

+110
-6
lines changed

orte/mca/rmaps/mindist/rmaps_mindist_module.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* Copyright (c) 2006-2011 Cisco Systems, Inc. All rights reserved.
1313
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
1414
* All rights reserved.
15-
* Copyright (c) 2014 Intel, Inc. All rights reserved.
15+
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
1616
* Copyright (c) 2014 Research Organization for Information Science
1717
* and Technology (RIST). All rights reserved.
1818
* $COPYRIGHT$
@@ -226,6 +226,7 @@ static int mindist_map(orte_job_t *jdata)
226226
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
227227
true, app->num_procs, app->app);
228228
rc = ORTE_ERR_SILENT;
229+
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
229230
goto error;
230231
} else {
231232
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_OVERSUBSCRIBED);
@@ -329,6 +330,7 @@ static int mindist_map(orte_job_t *jdata)
329330
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
330331
true, app->num_procs, app->app);
331332
rc = ORTE_ERR_SILENT;
333+
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
332334
goto error;
333335
}
334336
opal_output_verbose(2, orte_rmaps_base_framework.framework_output,

orte/mca/rmaps/ppr/rmaps_ppr.c

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
33
* Copyright (c) 2011 Los Alamos National Security, LLC.
44
* All rights reserved.
5-
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
5+
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
6+
* Copyright (c) 2015 Research Organization for Information Science
7+
* and Technology (RIST). All rights reserved.
68
* $COPYRIGHT$
79
*
810
* Additional copyrights may follow
@@ -339,13 +341,32 @@ static int ppr_mapper(orte_job_t *jdata)
339341
if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
340342
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
341343
true, node->num_procs, app->app);
344+
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
342345
rc = ORTE_ERR_SILENT;
343346
goto error;
344347
}
345348
/* flag the node as oversubscribed so that sched-yield gets
346349
* properly set
347350
*/
348351
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_OVERSUBSCRIBED);
352+
/* check for permission */
353+
if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_SLOTS_GIVEN)) {
354+
/* if we weren't given a directive either way, then we will error out
355+
* as the #slots were specifically given, either by the host RM or
356+
* via hostfile/dash-host */
357+
if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
358+
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
359+
true, app->num_procs, app->app);
360+
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
361+
return ORTE_ERR_SILENT;
362+
} else if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
363+
/* if we were explicitly told not to oversubscribe, then don't */
364+
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
365+
true, app->num_procs, app->app);
366+
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
367+
return ORTE_ERR_SILENT;
368+
}
369+
}
349370
}
350371

351372
/* if we haven't mapped all the procs, continue on to the

orte/mca/rmaps/rank_file/rmaps_rank_file.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
* All rights reserved.
1515
* Copyright (c) 2008 Voltaire. All rights reserved
1616
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
17-
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
17+
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
1818
* Copyright (c) 2015 Research Organization for Information Science
1919
* and Technology (RIST). All rights reserved.
2020
*
@@ -290,6 +290,7 @@ static int orte_rmaps_rf_map(orte_job_t *jdata)
290290
if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
291291
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
292292
true, node->num_procs, app->app);
293+
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
293294
rc = ORTE_ERR_SILENT;
294295
goto error;
295296
}

orte/mca/rmaps/resilient/rmaps_resilient.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
* Corporation. All rights reserved.
66
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
77
* All rights reserved.
8-
* Copyright (c) 2014 Intel, Inc. All rights reserved.
8+
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
99
*
1010
* $COPYRIGHT$
1111
*
@@ -833,6 +833,7 @@ static int map_to_ftgrps(orte_job_t *jdata)
833833
if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
834834
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
835835
true, nd->num_procs, app->app);
836+
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
836837
return ORTE_ERR_SILENT;
837838
}
838839
/* flag the node as oversubscribed so that sched-yield gets

orte/mca/rmaps/round_robin/rmaps_rr_mappers.c

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@
1010
* Copyright (c) 2004-2005 The Regents of the University of California.
1111
* All rights reserved.
1212
* Copyright (c) 2009-2013 Cisco Systems, Inc. All rights reserved.
13-
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
13+
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
14+
* Copyright (c) 2015 Research Organization for Information Science
15+
* and Technology (RIST). All rights reserved.
1416
* $COPYRIGHT$
1517
*
1618
* Additional copyrights may follow
@@ -58,6 +60,7 @@ int orte_rmaps_rr_byslot(orte_job_t *jdata,
5860
if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
5961
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
6062
true, app->num_procs, app->app);
63+
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
6164
return ORTE_ERR_SILENT;
6265
}
6366
}
@@ -184,6 +187,24 @@ int orte_rmaps_rr_byslot(orte_job_t *jdata,
184187
* properly set
185188
*/
186189
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_OVERSUBSCRIBED);
190+
/* check for permission */
191+
if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_SLOTS_GIVEN)) {
192+
/* if we weren't given a directive either way, then we will error out
193+
* as the #slots were specifically given, either by the host RM or
194+
* via hostfile/dash-host */
195+
if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
196+
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
197+
true, app->num_procs, app->app);
198+
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
199+
return ORTE_ERR_SILENT;
200+
} else if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
201+
/* if we were explicitly told not to oversubscribe, then don't */
202+
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
203+
true, app->num_procs, app->app);
204+
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
205+
return ORTE_ERR_SILENT;
206+
}
207+
}
187208
}
188209
/* if we have mapped everything, then we are done */
189210
if (nprocs_mapped == app->num_procs) {
@@ -219,6 +240,7 @@ int orte_rmaps_rr_bynode(orte_job_t *jdata,
219240
if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
220241
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
221242
true, app->num_procs, app->app);
243+
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
222244
return ORTE_ERR_SILENT;
223245
}
224246
oversubscribed = true;
@@ -349,6 +371,24 @@ int orte_rmaps_rr_bynode(orte_job_t *jdata,
349371
* properly set
350372
*/
351373
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_OVERSUBSCRIBED);
374+
/* check for permission */
375+
if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_SLOTS_GIVEN)) {
376+
/* if we weren't given a directive either way, then we will error out
377+
* as the #slots were specifically given, either by the host RM or
378+
* via hostfile/dash-host */
379+
if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
380+
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
381+
true, app->num_procs, app->app);
382+
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
383+
return ORTE_ERR_SILENT;
384+
} else if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
385+
/* if we were explicitly told not to oversubscribe, then don't */
386+
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
387+
true, app->num_procs, app->app);
388+
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
389+
return ORTE_ERR_SILENT;
390+
}
391+
}
352392
}
353393
if (nprocs_mapped == app->num_procs) {
354394
/* we are done */
@@ -452,6 +492,7 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata,
452492
if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
453493
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
454494
true, app->num_procs, app->app);
495+
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
455496
return ORTE_ERR_SILENT;
456497
}
457498
}
@@ -550,6 +591,24 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata,
550591
* properly set
551592
*/
552593
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_OVERSUBSCRIBED);
594+
/* check for permission */
595+
if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_SLOTS_GIVEN)) {
596+
/* if we weren't given a directive either way, then we will error out
597+
* as the #slots were specifically given, either by the host RM or
598+
* via hostfile/dash-host */
599+
if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
600+
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
601+
true, app->num_procs, app->app);
602+
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
603+
return ORTE_ERR_SILENT;
604+
} else if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
605+
/* if we were explicitly told not to oversubscribe, then don't */
606+
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
607+
true, app->num_procs, app->app);
608+
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
609+
return ORTE_ERR_SILENT;
610+
}
611+
}
553612
}
554613
if (nprocs_mapped == app->num_procs) {
555614
/* we are done */
@@ -593,6 +652,7 @@ static int byobj_span(orte_job_t *jdata,
593652
if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
594653
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
595654
true, app->num_procs, app->app);
655+
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
596656
return ORTE_ERR_SILENT;
597657
}
598658
}

orte/mca/rmaps/seq/rmaps_seq.c

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* Copyright (c) 2006-2011 Cisco Systems, Inc. All rights reserved.
1313
* Copyright (c) 2011 Los Alamos National Security, LLC.
1414
* All rights reserved.
15-
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
15+
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
1616
* Copyright (c) 2015 Research Organization for Information Science
1717
* and Technology (RIST). All rights reserved.
1818
* $COPYRIGHT$
@@ -363,13 +363,32 @@ static int orte_rmaps_seq_map(orte_job_t *jdata)
363363
if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
364364
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
365365
true, node->num_procs, app->app);
366+
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
366367
rc = ORTE_ERR_SILENT;
367368
goto error;
368369
}
369370
/* flag the node as oversubscribed so that sched-yield gets
370371
* properly set
371372
*/
372373
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_OVERSUBSCRIBED);
374+
/* check for permission */
375+
if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_SLOTS_GIVEN)) {
376+
/* if we weren't given a directive either way, then we will error out
377+
* as the #slots were specifically given, either by the host RM or
378+
* via hostfile/dash-host */
379+
if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
380+
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
381+
true, app->num_procs, app->app);
382+
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
383+
return ORTE_ERR_SILENT;
384+
} else if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
385+
/* if we were explicitly told not to oversubscribe, then don't */
386+
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
387+
true, app->num_procs, app->app);
388+
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
389+
return ORTE_ERR_SILENT;
390+
}
391+
}
373392
}
374393
/* assign the vpid */
375394
proc->name.vpid = vpid++;

0 commit comments

Comments
 (0)