Skip to content

Commit 5e369c3

Browse files
committed
Add some changes to future proof the unified memory build
1 parent 8fef22d commit 5e369c3

File tree

4 files changed

+35
-5
lines changed

4 files changed

+35
-5
lines changed

CMakeLists.txt

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -527,7 +527,12 @@ function(MFC_SETUP_TARGET)
527527

528528
if (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI")
529529
find_package(CUDAToolkit REQUIRED)
530-
target_link_libraries(${a_target} PRIVATE CUDA::nvToolsExt)
530+
if (TARGET CUDA::nvToolsExt) # CUDA <= 12.8
531+
target_link_libraries(${a_target} PRIVATE CUDA::nvToolsExt)
532+
else() # CUDA >= 12.9
533+
target_link_libraries(${a_target} PRIVATE nvhpcwrapnvtx)
534+
target_link_options(${a_target} PRIVATE "-cudalib=nvtx3")
535+
endif()
531536
endif()
532537
endforeach()
533538

src/common/include/macros.fpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,17 @@
2020
#ifdef MFC_SIMULATION
2121
#ifdef __NVCOMPILER_GPU_UNIFIED_MEM
2222
block
23+
! Beginning in the 25.3 release, the structure of the cudafor module has been changed slightly.
24+
! The module now includes, or “uses” 3 submodules: cuda_runtime_api, gpu_reductions, and sort.
25+
! The cudafor functionality has not changed. But for new users, or users who have needed to
26+
! work-around name conflicts in the module, it may be better to use cuda_runtime_api to expose
27+
! interfaces to the CUDA runtime calls described in Chapter 4 of this guide.
28+
! https://docs.nvidia.com/hpc-sdk/compilers/cuda-fortran-prog-guide/index.html#fortran-host-modules
29+
#if __NVCOMPILER_MAJOR__ < 25 || (__NVCOMPILER_MAJOR__ == 25 && __NVCOMPILER_MINOR__ < 3)
2330
use cudafor, gpu_sum => sum, gpu_maxval => maxval, gpu_minval => minval
31+
#else
32+
use cuda_runtime_api
33+
#endif
2434
integer :: istat
2535
2636
if (nv_uvm_pref_gpu) then

src/simulation/m_weno.fpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,9 @@ module m_weno
9898
!> @name Indical bounds in the s1-, s2- and s3-directions
9999
!> @{
100100
type(int_bounds_info) :: is1_weno, is2_weno, is3_weno
101+
#ifndef __NVCOMPILER_GPU_UNIFIED_MEM
101102
$:GPU_DECLARE(create='[is1_weno,is2_weno,is3_weno]')
103+
#endif
102104
!
103105
!> @}
104106

toolchain/templates/santis.mako

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,23 @@
2626
% endif
2727
% endif
2828

29-
# NVHPC and CUDA env vars
30-
export NV_ACC_USE_MALLOC=0 # use cudaMallocManaged instead of malloc ( compiled using -gpu=mem:unified:managedalloc )
31-
export NVCOMPILER_ACC_NO_MEMHINTS=1 # disable implicit compiler hints
32-
#export CUDA_BUFFER_PAGE_IN_THRESHOLD_MS=0.001 # workaround for copying to/from unpopulated buffers on GH
29+
# We compiled the code using -gpu=unified:managedalloc, hence we use cudaMallocManaged for the dynamic allocations.
30+
# Using NV_ACC_USE_MALLOC we could change to malloc at runtime. We choose to not do that here and stick with cudaMallocManaged and 2MB page sizes.
31+
# https://docs.nvidia.com/hpc-sdk/compilers/hpc-compilers-user-guide/index.html#memory-model
32+
# https://docs.nvidia.com/hpc-sdk/compilers/hpc-compilers-user-guide/index.html#command-line-options-selecting-compiler-memory-modes
33+
export NV_ACC_USE_MALLOC=0
34+
35+
# For NVIDIA CUDA devices, controls the use of automatic memory hints at data constructs in the managed and unified memory modes.
36+
# Below is a breakdown of the permitted values (case insensitive):
37+
# - DEFAULT: Use the default settings. On NVIDIA Grace Hopper systems, the default is currently ENABLE_ALL; on all other systems, the default is DISABLE.
38+
# - DISABLE: Memory hints are disabled for all data constructs.
39+
# - ENABLE_EXPLICIT: Memory hints are enabled for explicit data constructs only.
40+
# - ENABLE_ALL: Memory hints are enabled for explicit and implicit data constructs.
41+
# https://docs.nvidia.com/hpc-sdk/compilers/hpc-compilers-user-guide/index.html#environment-variables-controlling-device-memory-management
42+
# Here we disable the implicit compiler hints.
43+
# Using NVCOMPILER_ACC_NO_MEMHINTS is the legacy way and is still supported, but users should prefer NVCOMPILER_ACC_MEMHINTS when using newer nvhpc compilers.
44+
export NVCOMPILER_ACC_NO_MEMHINTS=1 # disable implicit compiler hints - legacy way
45+
export NVCOMPILER_ACC_MEMHINTS=DISABLE # disable implicit compiler hints - new way
3346

3447
# Cray MPICH
3548
export MPICH_GPU_SUPPORT_ENABLED=1

0 commit comments

Comments
 (0)