|
12 | 12 | * Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. |
13 | 13 | * Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights |
14 | 14 | * reserved. |
15 | | - * Copyright (c) 2014 Intel Corporation. All rights reserved. |
| 15 | + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. |
16 | 16 | * $COPYRIGHT$ |
17 | 17 | * |
18 | 18 | * Additional copyrights may follow |
|
65 | 65 | #include "orte/runtime/orte_wait.h" |
66 | 66 | #include "orte/runtime/orte_quit.h" |
67 | 67 | #include "orte/mca/errmgr/errmgr.h" |
68 | | -#include "orte/mca/rmaps/rmaps.h" |
| 68 | +#include "orte/mca/rmaps/base/base.h" |
69 | 69 | #include "orte/mca/state/state.h" |
70 | 70 |
|
71 | 71 | #include "orte/orted/orted.h" |
@@ -193,6 +193,25 @@ static void launch_daemons(int fd, short args, void *cbdata) |
193 | 193 | "%s plm:slurm: LAUNCH DAEMONS CALLED", |
194 | 194 | ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); |
195 | 195 |
|
| 196 | +#if SLURM_CRAY_ENV |
| 197 | + /* if we are in a Cray-SLURM environment, then we cannot |
| 198 | + * launch procs local to the HNP. The problem |
| 199 | + * is the MPI processes launched on the head node (where the |
| 200 | + * ORTE_PROC_IS_HNP evalues to true) get launched by a daemon |
| 201 | + * (mpirun) which is not a child of a slurmd daemon. This |
| 202 | + * means that any RDMA credentials obtained via the odls/alps |
| 203 | + * local launcher are incorrect. So warn the user and set |
| 204 | + * the envar for no_schedule_local if mpirun is not on a |
| 205 | + * system management node (i.e. is part of the allocation) |
| 206 | + * and the "no_use_local" flag hasn't been set */ |
| 207 | + if (mca_plm_slurm_component.slurm_warning_msg && |
| 208 | + (orte_hnp_is_allocated && !(ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping) & ORTE_MAPPING_NO_USE_LOCAL))) { |
| 209 | + orte_show_help("help-plm-slurm.txt", "no-local-support", true); |
| 210 | + ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_NO_USE_LOCAL); |
| 211 | + mca_plm_slurm_component.slurm_warning_msg = false; // only do this once |
| 212 | + } |
| 213 | +#endif |
| 214 | + |
196 | 215 | /* if we are launching debugger daemons, then just go |
197 | 216 | * do it - no new daemons will be launched |
198 | 217 | */ |
|
0 commit comments