Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions toolchain/modules
Original file line number Diff line number Diff line change
Expand Up @@ -88,3 +88,12 @@ n-gpu CC=nvc CXX=nvc++ FC=nvfortran
san CSCS Santis
san-all cmake python
san-gpu nvhpc cuda cray-mpich

h hipergator
h-gpu nvhpc/25.9
h-gpu CUDA_HOME="/apps/compilers/cuda/12.8.1"
h-all HPC_OMPI_DIR="/apps/mpi/cuda/12.8.1/nvhpc/25.3/openmpi/5.0.7"
h-all HPC_OMPI_BIN="/apps/mpi/cuda/12.8.1/nvhpc/25.3/openmpi/5.0.7/bin"
h-all OMPI_MCA_pml=ob1 OMPI_MCA_coll_hcoll_enable=0
h-gpu PATH="/apps/mpi/cuda/12.8.1/nvhpc/25.3/openmpi/5.0.7/bin:${PATH}"
Comment on lines +93 to +98
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggestion: Change the NVHPC compiler version from nvhpc/25.9 to nvhpc/25.3 to match the version used for the OpenMPI libraries, preventing potential ABI compatibility issues. [possible issue, importance: 9]

Suggested change
h-gpu nvhpc/25.9
h-gpu CUDA_HOME="/apps/compilers/cuda/12.8.1"
h-all HPC_OMPI_DIR="/apps/mpi/cuda/12.8.1/nvhpc/25.3/openmpi/5.0.7"
h-all HPC_OMPI_BIN="/apps/mpi/cuda/12.8.1/nvhpc/25.3/openmpi/5.0.7/bin"
h-all OMPI_MCA_pml=ob1 OMPI_MCA_coll_hcoll_enable=0
h-gpu PATH="/apps/mpi/cuda/12.8.1/nvhpc/25.3/openmpi/5.0.7/bin:${PATH}"
h-gpu nvhpc/25.3
h-gpu CUDA_HOME="/apps/compilers/cuda/12.8.1"
h-all HPC_OMPI_DIR="/apps/mpi/cuda/12.8.1/nvhpc/25.3/openmpi/5.0.7"
h-all HPC_OMPI_BIN="/apps/mpi/cuda/12.8.1/nvhpc/25.3/openmpi/5.0.7/bin"
h-all OMPI_MCA_pml=ob1 OMPI_MCA_coll_hcoll_enable=0
h-gpu PATH="/apps/mpi/cuda/12.8.1/nvhpc/25.3/openmpi/5.0.7/bin:${PATH}"

h-gpu MFC_CUDA_CC=100 NVHPC_CUDA_HOME="/apps/compilers/cuda/12.8.1"
61 changes: 61 additions & 0 deletions toolchain/templates/hipergator.mako
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#!/usr/bin/env bash

<%namespace name="helpers" file="helpers.mako"/>

% if engine == 'batch':
#SBATCH --nodes=${nodes}
#SBATCH --ntasks-per-node=${tasks_per_node}
#SBATCH --job-name="${name}"
#SBATCH --output="${name}.out"
#SBATCH --time=${walltime}
#SBATCH --cpus-per-task=7
% if gpu:
#SBATCH --gpus-per-task=1
#SBATCH --gpu-bind=closest
% endif
% if account:
#SBATCH --account=${account}
% endif
% if partition:
#SBATCH --partition=${partition}
% else:
#SBATCH --partition=hpg-b200
% endif
% if quality_of_service:
#SBATCH --qos=${quality_of_service}
% endif
% if email:
#SBATCH --mail-user=${email}
#SBATCH --mail-type="BEGIN, END, FAIL"
% endif
% endif

${helpers.template_prologue()}

ok ":) Loading modules:\n"
cd "${MFC_ROOT_DIR}"
% if engine == 'batch':
. ./mfc.sh load -c h -m ${'g' if gpu else 'c'}
% endif
cd - > /dev/null
echo


% for target in targets:
${helpers.run_prologue(target)}

% if not mpi:
(set -x; ${profiler} "${target.get_install_binpath(case)}")
% else:
(set -x; ${profiler} \
mpirun -np ${nodes*tasks_per_node} \
--bind-to none \
"${target.get_install_binpath(case)}")
Comment on lines +50 to +53
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggestion: To improve performance on multi-GPU nodes, change the mpirun binding from --bind-to none to --bind-to socket to ensure better process-to-GPU affinity. [general, importance: 7]

Suggested change
(set -x; ${profiler} \
mpirun -np ${nodes*tasks_per_node} \
--bind-to none \
"${target.get_install_binpath(case)}")
(set -x; ${profiler} \
mpirun -np ${nodes*tasks_per_node} \
--bind-to socket \
--report-bindings \
"${target.get_install_binpath(case)}")

% endif

${helpers.run_epilogue(target)}

echo
% endfor

${helpers.template_epilogue()}
Loading