Skip to content

Commit b0476b9

Browse files
committed
move NVHPC mpirun fix to parse hook
1 parent ea24c77 commit b0476b9

File tree

1 file changed

+12
-7
lines changed

1 file changed

+12
-7
lines changed

src/build_tools/hooks_hydra.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,14 @@ def parse_hook(ec, *args, **kwargs): # pylint: disable=unused-argument
318318
ec['dependencies'] = [d for d in ec['dependencies'] if 'libfabric' not in d]
319319
ec.log.info("[parse hook] Removed libfabric from dependency list")
320320

321+
if ec.name == 'NVHPC':
322+
# NVHPC ships with OpenMPI v4 which has an issue between its hwloc
323+
# and Slurm cgroups2 that results in mpirun trying to use unallocated
324+
# cores to the job (see https://github.com/open-mpi/ompi/issues/12470)
325+
# Only mpirun is affected, workaround is to set '--bind-to=none':
326+
ec.log.info("[parse hook] Disable mpirun process binding in NVHPC")
327+
ec['modextravars'].update({'OMPI_MCA_hwloc_base_binding_policy': 'none'})
328+
321329
if ec.name == 'Gurobi':
322330
# use centrally installed Gurobi license file, and don't copy to installdir
323331
ec['license_file'] = '/apps/brussel/licenses/gurobi/gurobi.lic'
@@ -435,7 +443,10 @@ def pre_configure_hook(self, *args, **kwargs): # pylint: disable=unused-argumen
435443

436444

437445
def pre_module_hook(self, *args, **kwargs): # pylint: disable=unused-argument
438-
"""Hook at pre-module level to alter module files"""
446+
"""
447+
Hook at pre-module level to alter module files
448+
WARNING: this hooks triggers *after* sanity checks
449+
"""
439450

440451
# Must be done this way, updating self.cfg['modextravars']
441452
# directly doesn't work due to templating.
@@ -498,12 +509,6 @@ def pre_module_hook(self, *args, **kwargs): # pylint: disable=unused-argument
498509
slurm_mpi_type = 'pmix'
499510
self.log.info("[pre-module hook] Set Slurm MPI type to: %s", slurm_mpi_type)
500511
self.cfg['modextravars'].update({'SLURM_MPI_TYPE': slurm_mpi_type})
501-
# NVHPC ships with OpenMPI v4 which has an issue between its hwloc
502-
# and Slurm cgroups2 that results in mpirun trying to use unallocated
503-
# cores to the job (see https://github.com/open-mpi/ompi/issues/12470)
504-
# Only mpirun is affected, workaround is to set '--bind-to=none':
505-
self.log.info("[pre-module hook] Disable mpirun process binding in NVHPC")
506-
self.cfg['modextravars'].update({'OMPI_MCA_hwloc_base_binding_policy': 'none'})
507512

508513
##########################
509514
# ------ TUNING -------- #

0 commit comments

Comments
 (0)