Add some changes to future proof the unified memory build

ntselepidis · ntselepidis · commit 5e369c364003 · 2025-08-11T00:41:13.000-07:00
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -527,7 +527,12 @@ function(MFC_SETUP_TARGET)
 
         if (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI")
             find_package(CUDAToolkit REQUIRED)
-            target_link_libraries(${a_target} PRIVATE CUDA::nvToolsExt)
+            if (TARGET CUDA::nvToolsExt) # CUDA <= 12.8
+                target_link_libraries(${a_target} PRIVATE CUDA::nvToolsExt)
+            else() # CUDA >= 12.9
+                target_link_libraries(${a_target} PRIVATE nvhpcwrapnvtx)
+                target_link_options(${a_target} PRIVATE "-cudalib=nvtx3")
+            endif()
         endif()
     endforeach()
 
diff --git a/src/common/include/macros.fpp b/src/common/include/macros.fpp
@@ -20,7 +20,17 @@
 #ifdef MFC_SIMULATION
 #ifdef __NVCOMPILER_GPU_UNIFIED_MEM
     block
+! Beginning in the 25.3 release, the structure of the cudafor module has been changed slightly.
+! The module now includes, or “uses” 3 submodules: cuda_runtime_api, gpu_reductions, and sort.
+! The cudafor functionality has not changed. But for new users, or users who have needed to
+! work-around name conflicts in the module, it may be better to use cuda_runtime_api to expose
+! interfaces to the CUDA runtime calls described in Chapter 4 of this guide.
+! https://docs.nvidia.com/hpc-sdk/compilers/cuda-fortran-prog-guide/index.html#fortran-host-modules
+#if __NVCOMPILER_MAJOR__ < 25 || (__NVCOMPILER_MAJOR__ == 25 && __NVCOMPILER_MINOR__ < 3)
     use cudafor, gpu_sum => sum, gpu_maxval => maxval, gpu_minval => minval
+#else
+    use cuda_runtime_api
+#endif
     integer :: istat
 
     if (nv_uvm_pref_gpu) then
diff --git a/src/simulation/m_weno.fpp b/src/simulation/m_weno.fpp
@@ -98,7 +98,9 @@ module m_weno
     !> @name Indical bounds in the s1-, s2- and s3-directions
     !> @{
     type(int_bounds_info) :: is1_weno, is2_weno, is3_weno
+#ifndef __NVCOMPILER_GPU_UNIFIED_MEM
     $:GPU_DECLARE(create='[is1_weno,is2_weno,is3_weno]')
+#endif
     !
     !> @}
 
diff --git a/toolchain/templates/santis.mako b/toolchain/templates/santis.mako
@@ -26,10 +26,23 @@
 % endif
 % endif
 
-# NVHPC and CUDA env vars
-export NV_ACC_USE_MALLOC=0                    # use cudaMallocManaged instead of malloc ( compiled using -gpu=mem:unified:managedalloc )
-export NVCOMPILER_ACC_NO_MEMHINTS=1           # disable implicit compiler hints
-#export CUDA_BUFFER_PAGE_IN_THRESHOLD_MS=0.001 # workaround for copying to/from unpopulated buffers on GH
+# We compiled the code using -gpu=unified:managedalloc, hence we use cudaMallocManaged for the dynamic allocations.
+# Using NV_ACC_USE_MALLOC we could change to malloc at runtime. We choose to not do that here and stick with cudaMallocManaged and 2MB page sizes.
+# https://docs.nvidia.com/hpc-sdk/compilers/hpc-compilers-user-guide/index.html#memory-model
+# https://docs.nvidia.com/hpc-sdk/compilers/hpc-compilers-user-guide/index.html#command-line-options-selecting-compiler-memory-modes
+export NV_ACC_USE_MALLOC=0
+
+# For NVIDIA CUDA devices, controls the use of automatic memory hints at data constructs in the managed and unified memory modes.
+# Below is a breakdown of the permitted values (case insensitive):
+# - DEFAULT: Use the default settings. On NVIDIA Grace Hopper systems, the default is currently ENABLE_ALL; on all other systems, the default is DISABLE.
+# - DISABLE: Memory hints are disabled for all data constructs.
+# - ENABLE_EXPLICIT: Memory hints are enabled for explicit data constructs only.
+# - ENABLE_ALL: Memory hints are enabled for explicit and implicit data constructs.
+# https://docs.nvidia.com/hpc-sdk/compilers/hpc-compilers-user-guide/index.html#environment-variables-controlling-device-memory-management
+# Here we disable the implicit compiler hints.
+# Using NVCOMPILER_ACC_NO_MEMHINTS is the legacy way and is still supported, but users should prefer NVCOMPILER_ACC_MEMHINTS when using newer nvhpc compilers.
+export NVCOMPILER_ACC_NO_MEMHINTS=1           # disable implicit compiler hints - legacy way
+export NVCOMPILER_ACC_MEMHINTS=DISABLE        # disable implicit compiler hints - new way
 
 # Cray MPICH
 export MPICH_GPU_SUPPORT_ENABLED=1

Original file line number	Diff line number	Diff line change
`@@ -98,7 +98,9 @@ module m_weno`
`98`	`98`	`!> @name Indical bounds in the s1-, s2- and s3-directions`
`99`	`99`	`!> @{`
`100`	`100`	`type(int_bounds_info) :: is1_weno, is2_weno, is3_weno`
	`101`	`+#ifndef __NVCOMPILER_GPU_UNIFIED_MEM`
`101`	`102`	`$:GPU_DECLARE(create='[is1_weno,is2_weno,is3_weno]')`
	`103`	`+#endif`
`102`	`104`	`!`
`103`	`105`	`!> @}`
`104`	`106`