Skip to content

Commit 7f0ed02

Browse files
authored
Merge pull request #3387 from rhc54/cmr2x/slurm
Enable slurm operations on Cray with constraints
2 parents f2cfc3e + 78b59a5 commit 7f0ed02

File tree

8 files changed

+58
-28
lines changed

8 files changed

+58
-28
lines changed

orte/mca/plm/alps/help-plm-alps.txt

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
# University of Stuttgart. All rights reserved.
1111
# Copyright (c) 2004-2005 The Regents of the University of California.
1212
# All rights reserved.
13+
# Copyright (c) 2017 Intel, Inc. All rights reserved.
1314
# $COPYRIGHT$
1415
#
1516
# Additional copyrights may follow
@@ -39,7 +40,3 @@ the map for this application. This can be caused by a lack of
3940
an allocation, or by an error in the Open MPI code. Please check
4041
to ensure you have a ALPS allocation. If you do, then please pass
4142
the error to the Open MPI user's mailing list for assistance.
42-
#
43-
[slurm-not-supported]
44-
mpirun is not a supported launcher on Cray XC using Native SLURM.
45-
srun must be used to launch jobs on these systems.

orte/mca/plm/alps/plm_alps.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
* University of Stuttgart. All rights reserved.
1010
* Copyright (c) 2004-2005 The Regents of the University of California.
1111
* All rights reserved.
12+
* Copyright (c) 2017 Intel, Inc. All rights reserved.
1213
* $COPYRIGHT$
1314
*
1415
* Additional copyrights may follow
@@ -46,7 +47,6 @@ ORTE_MODULE_DECLSPEC extern orte_plm_alps_component_t
4647
mca_plm_alps_component;
4748
ORTE_DECLSPEC extern orte_plm_base_module_t
4849
orte_plm_alps_module;
49-
extern bool mca_plm_alps_using_aprun;
5050

5151
END_C_DECLS
5252

orte/mca/plm/alps/plm_alps_component.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
* All rights reserved.
1313
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
1414
* reserved.
15+
* Copyright (c) 2017 Intel, Inc. All rights reserved.
1516
* $COPYRIGHT$
1617
*
1718
* Additional copyrights may follow
@@ -43,7 +44,6 @@
4344
*/
4445
const char *mca_plm_alps_component_version_string =
4546
"Open MPI alps plm MCA component version " ORTE_VERSION;
46-
bool mca_plm_alps_using_aprun = {true};
4747

4848

4949
/*
@@ -158,7 +158,11 @@ static int orte_plm_alps_component_query(mca_base_module_t **module, int *priori
158158
}
159159

160160
if((NULL != wlm_detected) && !strcmp(slurm, wlm_detected)) {
161-
mca_plm_alps_using_aprun = false;
161+
/* we are in a Cray SLURM environment, so we don't want
162+
* this plm component */
163+
*priority = 0;
164+
*module = NULL;
165+
return ORTE_ERROR;
162166
}
163167
#endif
164168

orte/mca/plm/alps/plm_alps_module.c

Lines changed: 1 addition & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
* Copyright (c) 2006-2011 Cisco Systems, Inc. All rights reserved.
1414
* Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights
1515
* reserved.
16-
* Copyright (c) 2014 Intel Corporation. All rights reserved.
16+
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
1717
* $COPYRIGHT$
1818
*
1919
* Additional copyrights may follow
@@ -121,23 +121,6 @@ static int plm_alps_init(void)
121121
return rc;
122122
}
123123

124-
/*
125-
* owing to way the SLURM PLM component works, we can't use
126-
* it on Cray XC systems as currently designed. The problem
127-
* is the MPI processes launched on the head node (where the
128-
* ORTE_PROC_IS_HNP evalues to true) get launched by a daemon
129-
* (mpirun) which is not a child of a slurmd daemon. This
130-
* means that any RDMA credentials obtained via the odls/alps
131-
* local launcher are incorrect.
132-
*
133-
* So for now, we just don't support mpirun launched jobs
134-
* on Cray XC systems using Native SLURM.
135-
*/
136-
if (false == mca_plm_alps_using_aprun) {
137-
orte_show_help("help-plm-alps.txt", "slurm-not-supported", true);
138-
exit(-1);
139-
}
140-
141124
if (orte_do_not_launch) {
142125
/* must map daemons since we won't be launching them */
143126
orte_plm_globals.daemon_nodes_assigned_at_launch = true;

orte/mca/plm/slurm/help-plm-slurm.txt

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
# University of Stuttgart. All rights reserved.
1111
# Copyright (c) 2004-2005 The Regents of the University of California.
1212
# All rights reserved.
13-
# Copyright (c) 2014 Intel, Inc. All rights reserved.
13+
# Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
1414
# $COPYRIGHT$
1515
#
1616
# Additional copyrights may follow
@@ -49,3 +49,18 @@ are running.
4949

5050
Please consult with your system administrator about obtaining
5151
such support.
52+
[no-local-support]
53+
The SLURM process starter cannot start processes local to
54+
mpirun when executing under a Cray environment. The problem
55+
is that mpirun is not itself a child of a slurmd daemon. Thus,
56+
any processes mpirun itself starts will inherit incorrect
57+
RDMA credentials.
58+
59+
Your application will be mapped and run (assuming adequate
60+
resources) on the remaining allocated nodes. If adequate
61+
resources are not available, you will need to exit and obtain
62+
a larger allocation.
63+
64+
This situation will be fixed in a future release. Meantime,
65+
you can turn "off" this warning by setting the plm_slurm_warning
66+
MCA param to 0.

orte/mca/plm/slurm/plm_slurm.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
* University of Stuttgart. All rights reserved.
1010
* Copyright (c) 2004-2005 The Regents of the University of California.
1111
* All rights reserved.
12+
* Copyright (c) 2017 Intel, Inc. All rights reserved.
1213
* $COPYRIGHT$
1314
*
1415
* Additional copyrights may follow
@@ -29,6 +30,7 @@ BEGIN_C_DECLS
2930
struct orte_plm_slurm_component_t {
3031
orte_plm_base_component_t super;
3132
char *custom_args;
33+
bool slurm_warning_msg;
3234
};
3335
typedef struct orte_plm_slurm_component_t orte_plm_slurm_component_t;
3436

orte/mca/plm/slurm/plm_slurm_component.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
* All rights reserved.
1313
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
1414
* reserved.
15+
* Copyright (c) 2017 Intel, Inc. All rights reserved.
1516
* $COPYRIGHT$
1617
*
1718
* Additional copyrights may follow
@@ -28,7 +29,9 @@
2829
#include "orte_config.h"
2930
#include "orte/constants.h"
3031

32+
#include "opal/util/opal_environ.h"
3133
#include "orte/util/name_fns.h"
34+
#include "orte/util/show_help.h"
3235
#include "orte/runtime/orte_globals.h"
3336

3437
#include "orte/mca/plm/plm.h"
@@ -99,6 +102,13 @@ static int plm_slurm_register(void)
99102
MCA_BASE_VAR_SCOPE_READONLY,
100103
&mca_plm_slurm_component.custom_args);
101104

105+
mca_plm_slurm_component.slurm_warning_msg = true;
106+
(void) mca_base_component_var_register (comp, "warning", "Turn off warning message",
107+
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
108+
OPAL_INFO_LVL_9,
109+
MCA_BASE_VAR_SCOPE_READONLY,
110+
&mca_plm_slurm_component.slurm_warning_msg);
111+
102112
return ORTE_SUCCESS;
103113
}
104114

orte/mca/plm/slurm/plm_slurm_module.c

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved.
1313
* Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights
1414
* reserved.
15-
* Copyright (c) 2014 Intel Corporation. All rights reserved.
15+
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
1616
* $COPYRIGHT$
1717
*
1818
* Additional copyrights may follow
@@ -65,7 +65,7 @@
6565
#include "orte/runtime/orte_wait.h"
6666
#include "orte/runtime/orte_quit.h"
6767
#include "orte/mca/errmgr/errmgr.h"
68-
#include "orte/mca/rmaps/rmaps.h"
68+
#include "orte/mca/rmaps/base/base.h"
6969
#include "orte/mca/state/state.h"
7070

7171
#include "orte/orted/orted.h"
@@ -193,6 +193,25 @@ static void launch_daemons(int fd, short args, void *cbdata)
193193
"%s plm:slurm: LAUNCH DAEMONS CALLED",
194194
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
195195

196+
#if SLURM_CRAY_ENV
197+
/* if we are in a Cray-SLURM environment, then we cannot
198+
* launch procs local to the HNP. The problem
199+
* is the MPI processes launched on the head node (where the
200+
* ORTE_PROC_IS_HNP evalues to true) get launched by a daemon
201+
* (mpirun) which is not a child of a slurmd daemon. This
202+
* means that any RDMA credentials obtained via the odls/alps
203+
* local launcher are incorrect. So warn the user and set
204+
* the envar for no_schedule_local if mpirun is not on a
205+
* system management node (i.e. is part of the allocation)
206+
* and the "no_use_local" flag hasn't been set */
207+
if (mca_plm_slurm_component.slurm_warning_msg &&
208+
(orte_hnp_is_allocated && !(ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping) & ORTE_MAPPING_NO_USE_LOCAL))) {
209+
orte_show_help("help-plm-slurm.txt", "no-local-support", true);
210+
ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_NO_USE_LOCAL);
211+
mca_plm_slurm_component.slurm_warning_msg = false; // only do this once
212+
}
213+
#endif
214+
196215
/* if we are launching debugger daemons, then just go
197216
* do it - no new daemons will be launched
198217
*/

0 commit comments

Comments
 (0)