Skip to content

Commit a143800

Browse files
author
Ralph Castain
committed
Enable full operations under SLURM on Cray systems by co-locating a daemon with mpirun when mpirun is executing on a compute node in that environment. This allows local application procs to inherit their security credential from the daemon as it will have been launched via SLURM
Signed-off-by: Ralph Castain <[email protected]>
1 parent 88948f7 commit a143800

File tree

5 files changed

+43
-46
lines changed

5 files changed

+43
-46
lines changed

config/orte_check_slurm.m4

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved.
1414
# Copyright (c) 2016 Los Alamos National Security, LLC. All rights
1515
# reserved.
16+
# Copyright (c) 2017 Intel, Inc. All rights reserved.
1617
# $COPYRIGHT$
1718
#
1819
# Additional copyrights may follow
@@ -68,6 +69,15 @@ AC_DEFUN([ORTE_CHECK_SLURM],[
6869
[orte_check_slurm_happy="yes"],
6970
[orte_check_slurm_happy="no"])])
7071

72+
# check to see if this is a Cray nativized slurm env.
73+
74+
slurm_cray_env=0
75+
OPAL_CHECK_ALPS([orte_slurm_cray],
76+
[slurm_cray_env=1])
77+
78+
AC_DEFINE_UNQUOTED([SLURM_CRAY_ENV],[$slurm_cray_env],
79+
[defined to 1 if slurm cray env, 0 otherwise])
80+
7181
OPAL_SUMMARY_ADD([[Resource Managers]],[[Slurm]],[$1],[$orte_check_slurm_happy])
7282
fi
7383

orte/mca/plm/slurm/configure.m4

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
1414
# Copyright (c) 2011-2016 Los Alamos National Security, LLC.
1515
# All rights reserved.
16+
# Copyright (c) 2017 Intel, Inc. All rights reserved.
1617
# $COPYRIGHT$
1718
#
1819
# Additional copyrights may follow
@@ -38,12 +39,4 @@ AC_DEFUN([MCA_orte_plm_slurm_CONFIG],[
3839
AC_SUBST([plm_slurm_LDFLAGS])
3940
AC_SUBST([plm_slurm_LIBS])
4041

41-
# check to see if this is a Cray nativized slurm env.
42-
43-
slurm_cray_env=0
44-
OPAL_CHECK_ALPS([plm_slurm_cray],
45-
[slurm_cray_env=1])
46-
47-
AC_DEFINE_UNQUOTED([SLURM_CRAY_ENV],[$slurm_cray_env],
48-
[defined to 1 if slurm cray env, 0 otherwise])
4942
])dnl

orte/mca/plm/slurm/help-plm-slurm.txt

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -49,18 +49,3 @@ are running.
4949

5050
Please consult with your system administrator about obtaining
5151
such support.
52-
[no-local-support]
53-
The SLURM process starter cannot start processes local to
54-
mpirun when executing under a Cray environment. The problem
55-
is that mpirun is not itself a child of a slurmd daemon. Thus,
56-
any processes mpirun itself starts will inherit incorrect
57-
RDMA credentials.
58-
59-
Your application will be mapped and run (assuming adequate
60-
resources) on the remaining allocated nodes. If adequate
61-
resources are not available, you will need to exit and obtain
62-
a larger allocation.
63-
64-
This situation will be fixed in a future release. Meantime,
65-
you can turn "off" this warning by setting the plm_slurm_warning
66-
MCA param to 0.

orte/mca/plm/slurm/plm_slurm_module.c

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -193,25 +193,6 @@ static void launch_daemons(int fd, short args, void *cbdata)
193193
"%s plm:slurm: LAUNCH DAEMONS CALLED",
194194
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
195195

196-
#if SLURM_CRAY_ENV
197-
/* if we are in a Cray-SLURM environment, then we cannot
198-
* launch procs local to the HNP. The problem
199-
* is the MPI processes launched on the head node (where the
200-
* ORTE_PROC_IS_HNP evalues to true) get launched by a daemon
201-
* (mpirun) which is not a child of a slurmd daemon. This
202-
* means that any RDMA credentials obtained via the odls/alps
203-
* local launcher are incorrect. So warn the user and set
204-
* the envar for no_schedule_local if mpirun is not on a
205-
* system management node (i.e. is part of the allocation)
206-
* and the "no_use_local" flag hasn't been set */
207-
if (mca_plm_slurm_component.slurm_warning_msg &&
208-
(orte_hnp_is_allocated && !(ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping) & ORTE_MAPPING_NO_USE_LOCAL))) {
209-
orte_show_help("help-plm-slurm.txt", "no-local-support", true);
210-
ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_NO_USE_LOCAL);
211-
mca_plm_slurm_component.slurm_warning_msg = false; // only do this once
212-
}
213-
#endif
214-
215196
/* if we are launching debugger daemons, then just go
216197
* do it - no new daemons will be launched
217198
*/

orte/mca/ras/base/ras_base_node.c

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
* All rights reserved.
1212
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
1313
* reserved.
14-
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
14+
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
1515
* Copyright (c) 2015 Research Organization for Information Science
1616
* and Technology (RIST). All rights reserved.
1717
* $COPYRIGHT$
@@ -30,6 +30,7 @@
3030
#include "opal/util/if.h"
3131

3232
#include "orte/mca/errmgr/errmgr.h"
33+
#include "orte/mca/rmaps/base/base.h"
3334
#include "orte/util/name_fns.h"
3435
#include "orte/runtime/orte_globals.h"
3536

@@ -46,7 +47,7 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
4647
int rc, i;
4748
orte_node_t *node, *hnp_node, *nptr;
4849
char *ptr;
49-
bool hnp_alone = true;
50+
bool hnp_alone = true, skiphnp = false;
5051
orte_attribute_t *kv;
5152
char **alias=NULL, **nalias;
5253

@@ -77,6 +78,33 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
7778

7879
/* get the hnp node's info */
7980
hnp_node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0);
81+
#if SLURM_CRAY_ENV
82+
/* if we are in a Cray-SLURM environment, then we cannot
83+
* launch procs local to the HNP. The problem
84+
* is the MPI processes launched on the head node (where the
85+
* ORTE_PROC_IS_HNP evalues to true) get launched by a daemon
86+
* (mpirun) which is not a child of a slurmd daemon. This
87+
* means that any RDMA credentials obtained via the odls/alps
88+
* local launcher are incorrect. Test for this condition. If
89+
* found, then take steps to ensure we launch a daemon on
90+
* the same node as mpirun and that it gets used to fork
91+
* local procs instead of mpirun so they get the proper
92+
* credential */
93+
if (NULL != hnp_node) {
94+
OPAL_LIST_FOREACH(node, nodes, orte_node_t) {
95+
if (orte_ifislocal(node->name)) {
96+
orte_hnp_is_allocated = true;
97+
break;
98+
}
99+
}
100+
if (orte_hnp_is_allocated && !(ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping) & ORTE_MAPPING_NO_USE_LOCAL)) {
101+
hnp_node->name = strdup("mpirun");
102+
skiphnp = true;
103+
ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_NO_USE_LOCAL);
104+
}
105+
}
106+
#endif
107+
80108

81109
/* cycle through the list */
82110
while (NULL != (item = opal_list_remove_first(nodes))) {
@@ -86,7 +114,7 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
86114
* first position since it is the first one entered. We need to check to see
87115
* if this node is the same as the HNP's node so we don't double-enter it
88116
*/
89-
if (NULL != hnp_node && orte_ifislocal(node->name)) {
117+
if (!skiphnp && NULL != hnp_node && orte_ifislocal(node->name)) {
90118
OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
91119
"%s ras:base:node_insert updating HNP [%s] info to %ld slots",
92120
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
@@ -189,7 +217,7 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
189217
* ensure we don't have any domain info in the node record
190218
* for the hnp
191219
*/
192-
if (!orte_have_fqdn_allocation && !hnp_alone) {
220+
if (NULL != hnp_node && !orte_have_fqdn_allocation && !hnp_alone) {
193221
if (NULL != (ptr = strchr(hnp_node->name, '.'))) {
194222
*ptr = '\0';
195223
}

0 commit comments

Comments
 (0)