Skip to content

Commit cfb792c

Browse files
author
Benjamin Wilfong
committed
ENV Vars to case file options and code structure changes
1 parent 4065c02 commit cfb792c

File tree

12 files changed

+215
-198
lines changed

12 files changed

+215
-198
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ if (CMAKE_BUILD_TYPE STREQUAL "Release")
234234
message(STATUS "LTO/IPO is not supported in NVHPC Version < 23.11. Use a newer version of NVHPC for best performance.")
235235
else()
236236
message(STATUS "Performing IPO using -Mextract followed by -Minline")
237-
set(NVHPC_USE_TWO_PASS_IPO TRUE)
237+
set(NVHPC_USE_TWO_PASS_IPO FALSE)
238238
endif()
239239
else()
240240
CHECK_IPO_SUPPORTED(RESULT SUPPORTS_IPO OUTPUT IPO_ERROR)

examples/3D_IGR_TaylorGreenVortex_nvidia/case.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,8 @@
4747
"cyl_coord": "F",
4848
"dt": dt,
4949
"t_step_start": 0,
50-
"t_step_stop": Nt,
51-
"t_step_save": int(Nt / 100),
50+
"t_step_stop": 10, #Nt,
51+
"t_step_save": 10, #int(Nt / 100),
5252
# Simulation Algorithm Parameters
5353
"num_patches": 1,
5454
"model_eqns": 2,
@@ -96,6 +96,9 @@
9696
"fluid_pp(1)%gamma": 1.0e00 / (1.4 - 1),
9797
"fluid_pp(1)%pi_inf": 0,
9898
"fluid_pp(1)%Re(1)": 1 / mu,
99+
# NVIDIA UVM Options
100+
"nv_uvm_igr_temps_on_gpu": 3,
101+
"nv_uvm_pref_gpu": "T",
99102
}
100103
)
101104
)

src/common/include/macros.fpp

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,20 +18,8 @@
1818
block
1919
use cudafor, gpu_sum => sum, gpu_maxval => maxval, gpu_minval => minval
2020
integer :: istat
21-
integer :: prefer_gpu_mode
22-
character(len=10) :: prefer_gpu_mode_str
23-
24-
! environment variable
25-
call get_environment_variable("NVIDIA_MANUAL_GPU_HINTS", prefer_gpu_mode_str)
26-
if (trim(prefer_gpu_mode_str) == "0") then ! OFF
27-
prefer_gpu_mode = 0
28-
elseif (trim(prefer_gpu_mode_str) == "1") then ! ON
29-
prefer_gpu_mode = 1
30-
else ! default
31-
prefer_gpu_mode = 0
32-
endif
33-
34-
if (prefer_gpu_mode .eq. 1) then
21+
22+
if (nv_uvm_pref_gpu) then
3523
#:for arg in args
3624
!print*, "Moving ${arg}$ to GPU => ", SHAPE(${arg}$)
3725
! set preferred location GPU

src/simulation/m_checker.fpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ contains
3030

3131
if (igr) then
3232
call s_check_inputs_igr
33+
call s_check_inputs_nvidia_uvm
3334
else
3435
if (recon_type == WENO_TYPE) then
3536
call s_check_inputs_weno
@@ -411,4 +412,13 @@ contains
411412
@:PROHIBIT(powell .and. fd_order == dflt_int, "fd_order must be set if Powell's method is enabled")
412413
end subroutine s_check_inputs_mhd
413414
415+
impure subroutine s_check_inputs_nvidia_uvm
416+
#ifdef __NVCOMPILER_GPU_UNIFIED_MEM
417+
@:PROHIBIT(nv_uvm_igr_temps_on_gpu > 3 .or. nv_uvm_igr_temps_on_gpu < 0, &
418+
"nv_uvm_igr_temps_on_gpu must be in the range [0, 3]")
419+
@:PROHIBIT(nv_uvm_igr_temps_on_gpu == 3 .and. igr_iter_solver == 2, &
420+
"nv_uvm_igr_temps_on_gpu must be in the range [0, 2] for igr_iter_solver == 2")
421+
#endif
422+
end subroutine s_check_inputs_nvidia_uvm
423+
414424
end module m_checker

src/simulation/m_global_parameters.fpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,15 @@ module m_global_parameters
156156
logical :: viscous !< Viscous effects
157157
#:endif
158158

159+
!> @name Variables for our of core IGR computation on NVIDIA
160+
!> @{
161+
integer :: nv_uvm_igr_temps_on_gpu ! 0 => jac, jac_rhs, and jac_old on CPU
162+
! 1 => jac on GPU, jac_rhs and jac_old on CPU
163+
! 2 => jac and jac_rhs on GPU, jac_old on CPU
164+
! 4 => jac, jac_rhs, and jac_old on GPU (default)
165+
logical :: nv_uvm_pref_gpu ! Enable pinned gpu memory (default TRUE)
166+
!> @}
167+
159168
real(wp) :: weno_eps !< Binding for the WENO nonlinear weights
160169
real(wp) :: teno_CT !< Smoothness threshold for TENO
161170
logical :: mp_weno !< Monotonicity preserving (MP) WENO
@@ -570,6 +579,10 @@ contains
570579
t_stop = dflt_real
571580
t_save = dflt_real
572581
582+
! NVIDIA UVM options
583+
nv_uvm_igr_temps_on_gpu = 3 ! => jac, jac_rhs, and jac_old on GPU (default)
584+
nv_uvm_pref_gpu = .true.
585+
573586
! Simulation algorithm parameters
574587
model_eqns = dflt_int
575588
mpp_lim = .false.

src/simulation/m_igr.fpp

Lines changed: 24 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,11 @@ module m_igr
2525
s_finalize_igr_module
2626

2727
#ifdef __NVCOMPILER_GPU_UNIFIED_MEM
28-
integer, dimension(3) :: temp_on_gpu
28+
integer, dimension(3) :: nv_uvm_temp_on_gpu
2929
real(wp), pointer, contiguous, dimension(:, :, :) :: jac,jac_rhs,jac_old
30-
real(wp), allocatable, dimension(:, :, :), pinned, target :: pool_host1
31-
real(wp), allocatable, dimension(:, :, :), pinned, target :: pool_host2
32-
real(wp), allocatable, dimension(:, :, :), pinned, target :: pool_host3
30+
real(wp), allocatable, dimension(:, :, :), pinned, target :: jac_host_pool
31+
real(wp), allocatable, dimension(:, :, :), pinned, target :: jac_rhs_host_pool
32+
real(wp), allocatable, dimension(:, :, :), pinned, target :: jac_old_host_pool
3333
#else
3434
real(wp), allocatable, dimension(:, :, :) :: jac, jac_rhs, jac_old
3535
$:GPU_DECLARE(create='[jac, jac_rhs, jac_old]')
@@ -81,7 +81,6 @@ module m_igr
8181
5._wp/6._wp, & ! Index 0
8282
2._wp/6._wp & ! Index 1
8383
]
84-
8584
#:endif
8685
#:endif
8786

@@ -90,29 +89,6 @@ module m_igr
9089
contains
9190

9291
subroutine s_initialize_igr_module()
93-
#ifdef __NVCOMPILER_GPU_UNIFIED_MEM
94-
integer :: igr_temps_on_gpu = 3
95-
character(len=10) :: igr_temps_on_gpu_str
96-
97-
call get_environment_variable("NVIDIA_IGR_TEMPS_ON_GPU", igr_temps_on_gpu_str)
98-
99-
if (trim(igr_temps_on_gpu_str) == "0") then
100-
igr_temps_on_gpu = 0 ! jac, jac_rhs and jac_old on CPU
101-
else if (trim(igr_temps_on_gpu_str) == "1") then
102-
igr_temps_on_gpu = 1 ! jac on GPU, jac_rhs on CPU, jac_old on CPU
103-
else if (trim(igr_temps_on_gpu_str) == "2") then
104-
igr_temps_on_gpu = 2 ! jac and jac_rhs on GPU, jac_old on CPU
105-
else if (trim(igr_temps_on_gpu_str) == "3") then
106-
igr_temps_on_gpu = 3 ! jac, jac_rhs and jac_old on GPU
107-
else ! default on GPU
108-
igr_temps_on_gpu = 3
109-
end if
110-
111-
! create map
112-
temp_on_gpu(1:3) = 0
113-
temp_on_gpu(1:igr_temps_on_gpu) = 1
114-
!print*, temp_on_gpu(1:3)
115-
#endif
11692

11793
if (viscous) then
11894
@:ALLOCATE(Res(1:2, 1:maxval(Re_size)))
@@ -138,48 +114,47 @@ contains
138114
idwbuff(3)%beg:idwbuff(3)%end))
139115
end if
140116
#else
117+
! create map
118+
nv_uvm_temp_on_gpu(1:3) = 0
119+
nv_uvm_temp_on_gpu(1:nv_uvm_igr_temps_on_gpu) = 1
141120

142-
if ( temp_on_gpu(1) == 1 ) then
121+
if (nv_uvm_temp_on_gpu(1) == 1) then
143122
@:ALLOCATE(jac(idwbuff(1)%beg:idwbuff(1)%end, &
144123
idwbuff(2)%beg:idwbuff(2)%end, &
145124
idwbuff(3)%beg:idwbuff(3)%end))
146125
@:PREFER_GPU(jac)
147126
else
148-
!print*, 'jac on CPU'
149-
allocate(pool_host1(idwbuff(1)%beg:idwbuff(1)%end, &
127+
allocate(jac_host_pool(idwbuff(1)%beg:idwbuff(1)%end, &
150128
idwbuff(2)%beg:idwbuff(2)%end, &
151129
idwbuff(3)%beg:idwbuff(3)%end))
152130

153131
jac(idwbuff(1)%beg:idwbuff(1)%end, &
154132
idwbuff(2)%beg:idwbuff(2)%end, &
155-
idwbuff(3)%beg:idwbuff(3)%end) => pool_host1(:,:,:)
133+
idwbuff(3)%beg:idwbuff(3)%end) => jac_host_pool(:,:,:)
156134
end if
157135

158-
if ( temp_on_gpu(2) == 1 ) then
136+
if (nv_uvm_temp_on_gpu(2) == 1) then
159137
@:ALLOCATE(jac_rhs(-1:m,-1:n,-1:p))
160138
@:PREFER_GPU(jac_rhs)
161139
else
162-
!print*, 'jac_rhs on CPU'
163-
allocate(pool_host2(-1:m,-1:n,-1:p))
164-
165-
jac_rhs(-1:m,-1:n,-1:p) => pool_host2(:,:,:)
140+
allocate(jac_rhs_host_pool(-1:m,-1:n,-1:p))
141+
jac_rhs(-1:m,-1:n,-1:p) => jac_rhs_host_pool(:,:,:)
166142
end if
167143

168144
if (igr_iter_solver == 1) then ! Jacobi iteration
169-
if ( temp_on_gpu(3) == 1 ) then
145+
if (nv_uvm_temp_on_gpu(3) == 1) then
170146
@:ALLOCATE(jac_old(idwbuff(1)%beg:idwbuff(1)%end, &
171147
idwbuff(2)%beg:idwbuff(2)%end, &
172148
idwbuff(3)%beg:idwbuff(3)%end))
173149
@:PREFER_GPU(jac_old)
174150
else
175-
!print*, 'jac_old on CPU'
176-
allocate(pool_host3(idwbuff(1)%beg:idwbuff(1)%end, &
151+
allocate(jac_old_host_pool(idwbuff(1)%beg:idwbuff(1)%end, &
177152
idwbuff(2)%beg:idwbuff(2)%end, &
178153
idwbuff(3)%beg:idwbuff(3)%end))
179154

180155
jac_old(idwbuff(1)%beg:idwbuff(1)%end, &
181156
idwbuff(2)%beg:idwbuff(2)%end, &
182-
idwbuff(3)%beg:idwbuff(3)%end) => pool_host3(:,:,:)
157+
idwbuff(3)%beg:idwbuff(3)%end) => jac_old_host_pool(:,:,:)
183158
end if
184159
end if
185160
#endif
@@ -203,7 +178,7 @@ contains
203178

204179
#:if not MFC_CASE_OPTIMIZATION
205180
if (igr_order == 3) then
206-
vidxb = -1; vidxe = 2;
181+
vidxb = -1; vidxe = 2;
207182
$:GPU_UPDATE(device='[vidxb, vidxe]')
208183

209184
@:ALLOCATE(coeff_L(0:2))
@@ -219,7 +194,7 @@ contains
219194
$:GPU_UPDATE(device='[coeff_R]')
220195

221196
elseif (igr_order == 5) then
222-
vidxb = -2; vidxe = 3;
197+
vidxb = -2; vidxe = 3;
223198
$:GPU_UPDATE(device='[vidxb, vidxe]')
224199

225200
@:ALLOCATE(coeff_L(-1:3))
@@ -2699,26 +2674,26 @@ contains
26992674
@:DEALLOCATE(jac_old)
27002675
end if
27012676
#else
2702-
if (temp_on_gpu(1) == 1) then
2677+
if (nv_uvm_temp_on_gpu(1) == 1) then
27032678
@:DEALLOCATE(jac)
27042679
else
27052680
nullify(jac)
2706-
deallocate(pool_host1)
2681+
deallocate(jac_host_pool)
27072682
end if
27082683

2709-
if (temp_on_gpu(2) == 1) then
2684+
if (nv_uvm_temp_on_gpu(2) == 1) then
27102685
@:DEALLOCATE(jac_rhs)
27112686
else
27122687
nullify(jac_rhs)
2713-
deallocate(pool_host2)
2688+
deallocate(jac_rhs_host_pool)
27142689
end if
27152690

27162691
if (igr_iter_solver == 1) then ! Jacobi iteration
2717-
if (temp_on_gpu(3) == 1) then
2692+
if (nv_uvm_temp_on_gpu(3) == 1) then
27182693
@:DEALLOCATE(jac_old)
27192694
else
27202695
nullify(jac_old)
2721-
deallocate(pool_host3)
2696+
deallocate(jac_old_host_pool)
27222697
end if
27232698
end if
27242699
#endif

src/simulation/m_mpi_proxy.fpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,10 @@ contains
237237
#:endfor
238238
end do
239239
240+
! NVIDIA UVM variables
241+
call MPI_BCAST(nv_uvm_igr_temps_on_gpu, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
242+
call MPI_BCAST(nv_uvm_pref_gpu, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
243+
240244
#endif
241245
242246
end subroutine s_mpi_bcast_user_inputs

src/simulation/m_start_up.fpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -185,9 +185,10 @@ contains
185185
surface_tension, bubbles_lagrange, lag_params, &
186186
hyperelasticity, R0ref, num_bc_patches, Bx0, powell, &
187187
cont_damage, tau_star, cont_damage_s, alpha_bar, &
188-
alf_factor, num_igr_iters, &
189-
num_igr_warm_start_iters, &
190-
int_comp, ic_eps, ic_beta
188+
alf_factor, num_igr_iters, num_igr_warm_start_iters, &
189+
int_comp, ic_eps, ic_beta, nv_uvm_igr_temps_on_gpu, &
190+
nv_uvm_pref_gpu
191+
191192
! Checking that an input file has been provided by the user. If it
192193
! has, then the input file is read in, otherwise, simulation exits.
193194
inquire (FILE=trim(file_path), EXIST=file_exist)

0 commit comments

Comments
 (0)