Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 34 additions & 1 deletion easybuild/easyblocks/generic/nvidiabase.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@
from easybuild.tools import LooseVersion
from easybuild.tools.build_log import EasyBuildError, print_warning
from easybuild.tools.config import build_option
from easybuild.tools.filetools import adjust_permissions, remove, symlink, write_file
from easybuild.tools.filetools import adjust_permissions, remove, symlink
from easybuild.tools.filetools import write_file, apply_regex_substitutions, resolve_path
from easybuild.tools.modules import MODULE_LOAD_ENV_HEADERS, get_software_root, get_software_version
from easybuild.tools.run import run_shell_cmd
from easybuild.tools.systemtools import AARCH64, X86_64, get_cpu_architecture, get_shared_lib_ext
Expand Down Expand Up @@ -447,6 +448,15 @@ def install_step(self):
'NVHPC_STDPAR_CUDACC': self.default_compute_capability[0].replace('.', ''),
})

# Before installing, make sure that NVHPC chooses the CUDA version we desire
# By default, NVHPC calls 'nvc -printcudaversion', which completely ignores our set
# version, and only cares about the supported GPUs and found CUDA driver.
# On a system without GPUs, this may return an incompatible CUDA version to the one
# we define in active_cuda_version.
desired_cuda_var_regex = [(r'DESIREDCUDA=\$(.*)', f'DESIREDCUDA={str(self.active_cuda_version)}')]
apply_regex_substitutions('./install_components/install', desired_cuda_var_regex,
on_missing_match='error')

cmd_env = ' '.join([f'{name}={value}' for name, value in sorted(nvhpc_env_vars.items())])
run_shell_cmd(f"{cmd_env} ./install")

Expand Down Expand Up @@ -543,7 +553,30 @@ def sanity_check_step(self):
os.path.join(prefix, 'comm_libs', 'mpi', 'bin', 'mpicc'),
os.path.join(prefix, 'comm_libs', 'mpi', 'bin', 'mpifort'),
])
if self.cfg['module_add_nccl']:
# Ensure that NCCL path points to correct CUDA version
comm_lib_path = os.path.join(self.installdir, prefix, 'comm_libs')
expected_path = resolve_path(os.path.join(comm_lib_path, str(self.active_cuda_version), 'nccl'))
actual_path = resolve_path(os.path.join(comm_lib_path, 'nccl'))
if actual_path != expected_path:
self.sanity_check_fail_msgs.append(
f"CUDA symlink for NCCL libraries does not match: {expected_path} != {actual_path}")
if self.cfg['module_add_nvshmem']:
# Ensure that NVSHMEM path points to correct CUDA version
comm_lib_path = os.path.join(self.installdir, prefix, 'comm_libs')
expected_path = resolve_path(os.path.join(comm_lib_path, str(self.active_cuda_version), 'nvshmem'))
actual_path = resolve_path(os.path.join(comm_lib_path, 'nvshmem'))
if actual_path != expected_path:
self.sanity_check_fail_msgs.append(
f"CUDA symlink for NVSHMEM libraries does not match: {expected_path} != {actual_path}")
if self.cfg['module_add_math_libs']:
# Ensure that math_libs path points to correct CUDA version
math_lib_path = os.path.join(self.installdir, prefix, 'math_libs')
expected_path = resolve_path(os.path.join(math_lib_path, str(self.active_cuda_version), 'include'))
actual_path = resolve_path(os.path.join(math_lib_path, 'include'))
if actual_path != expected_path:
self.sanity_check_fail_msgs.append(
f"CUDA symlink for math libraries does not match: {expected_path} != {actual_path}")
nvhpc_files.extend([
os.path.join(prefix, 'math_libs', 'lib64', f'libcublas.{shlib_ext}'),
os.path.join(prefix, 'math_libs', 'lib64', f'libcufftw.{shlib_ext}'),
Expand Down