Skip to content

Commit ee1277d

Browse files
committed
Allow control in placement of IGR temps
1 parent 7054b7b commit ee1277d

File tree

1 file changed

+124
-0
lines changed

1 file changed

+124
-0
lines changed

src/simulation/m_igr.fpp

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,16 @@ module m_igr
2424
s_igr_flux_add, &
2525
s_finalize_igr_module
2626

27+
#ifdef __NVCOMPILER_GPU_UNIFIED_MEM
28+
integer, dimension(3) :: temp_on_gpu
29+
real(wp), pointer, contiguous, dimension(:, :, :) :: jac,jac_rhs,jac_old
30+
real(wp), allocatable, dimension(:, :, :), pinned, target :: pool_host1
31+
real(wp), allocatable, dimension(:, :, :), pinned, target :: pool_host2
32+
real(wp), allocatable, dimension(:, :, :), pinned, target :: pool_host3
33+
#else
2734
real(wp), allocatable, dimension(:, :, :) :: jac, jac_rhs, jac_old
2835
$:GPU_DECLARE(create='[jac, jac_rhs, jac_old]')
36+
#endif
2937

3038
real(wp), allocatable, dimension(:, :) :: Res
3139
$:GPU_DECLARE(create='[Res]')
@@ -82,6 +90,47 @@ module m_igr
8290
contains
8391

8492
subroutine s_initialize_igr_module()
93+
#ifdef __NVCOMPILER_GPU_UNIFIED_MEM
94+
integer :: igr_temps_total
95+
integer :: igr_temps_on_gpu
96+
integer :: igr_temps_on_cpu
97+
character(len=10) :: igr_temps_on_gpu_str
98+
99+
! initialize
100+
if (igr_iter_solver == 1) then ! Jacobi iteration
101+
igr_temps_total = 3
102+
else
103+
igr_temps_total = 2
104+
end if
105+
igr_temps_on_gpu = igr_temps_total
106+
igr_temps_on_cpu = 0
107+
108+
call get_environment_variable("NVIDIA_IGR_TEMPS_ON_GPU", igr_temps_on_gpu_str)
109+
110+
if (trim(igr_temps_on_gpu_str) == "0") then
111+
igr_temps_on_gpu = 0 ! jac, jac_rhs and jac_old on CPU
112+
else if (trim(igr_temps_on_gpu_str) == "1") then
113+
igr_temps_on_gpu = 1 ! jac on GPU, jac_rhs on CPU, jac_old on CPU
114+
else if (trim(igr_temps_on_gpu_str) == "2") then
115+
igr_temps_on_gpu = 2 ! jac and jac_rhs on GPU, jac_old on CPU
116+
else if (trim(igr_temps_on_gpu_str) == "3") then
117+
igr_temps_on_gpu = 3 ! jac, jac_rhs and jac_old on GPU
118+
else ! default on GPU
119+
igr_temps_on_gpu = 3
120+
end if
121+
122+
! trim if needed
123+
if ( igr_temps_on_gpu > igr_temps_total ) then
124+
igr_temps_on_gpu = igr_temps_total
125+
end if
126+
igr_temps_on_cpu = igr_temps_total - igr_temps_on_gpu
127+
128+
! create map
129+
temp_on_gpu(1:3) = -1
130+
temp_on_gpu(1:igr_temps_total) = 0
131+
temp_on_gpu(1:igr_temps_on_gpu) = 1
132+
print*, temp_on_gpu(1:3)
133+
#endif
85134

86135
if (viscous) then
87136
@:ALLOCATE(Res(1:2, 1:maxval(Re_size)))
@@ -95,6 +144,7 @@ contains
95144
@:PREFER_GPU(Re_idx)
96145
end if
97146

147+
#ifndef __NVCOMPILER_GPU_UNIFIED_MEM
98148
@:ALLOCATE(jac(idwbuff(1)%beg:idwbuff(1)%end, &
99149
idwbuff(2)%beg:idwbuff(2)%end, &
100150
idwbuff(3)%beg:idwbuff(3)%end))
@@ -109,6 +159,55 @@ contains
109159
idwbuff(3)%beg:idwbuff(3)%end))
110160
@:PREFER_GPU(jac_old)
111161
end if
162+
#else
163+
164+
if ( temp_on_gpu(1) == 1 ) then
165+
@:ALLOCATE(jac(idwbuff(1)%beg:idwbuff(1)%end, &
166+
idwbuff(2)%beg:idwbuff(2)%end, &
167+
idwbuff(3)%beg:idwbuff(3)%end))
168+
@:PREFER_GPU(jac)
169+
else
170+
print*, 'jac on CPU'
171+
172+
allocate(pool_host1(idwbuff(1)%beg:idwbuff(1)%end, &
173+
idwbuff(2)%beg:idwbuff(2)%end, &
174+
idwbuff(3)%beg:idwbuff(3)%end))
175+
176+
jac(idwbuff(1)%beg:idwbuff(1)%end, &
177+
idwbuff(2)%beg:idwbuff(2)%end, &
178+
idwbuff(3)%beg:idwbuff(3)%end) => pool_host1(:,:,:)
179+
end if
180+
181+
if ( temp_on_gpu(2) == 1 ) then
182+
@:ALLOCATE(jac_rhs(-1:m,-1:n,-1:p))
183+
@:PREFER_GPU(jac_rhs)
184+
else
185+
print*, 'jac_rhs on CPU'
186+
187+
allocate(pool_host2(-1:m,-1:n,-1:p))
188+
189+
jac_rhs(-1:m,-1:n,-1:p) => pool_host2(:,:,:)
190+
end if
191+
192+
if (igr_iter_solver == 1) then ! Jacobi iteration
193+
if ( temp_on_gpu(3) == 1 ) then
194+
@:ALLOCATE(jac_old(idwbuff(1)%beg:idwbuff(1)%end, &
195+
idwbuff(2)%beg:idwbuff(2)%end, &
196+
idwbuff(3)%beg:idwbuff(3)%end))
197+
@:PREFER_GPU(jac_old)
198+
else
199+
print*, 'jac_old on CPU'
200+
201+
allocate(pool_host3(idwbuff(1)%beg:idwbuff(1)%end, &
202+
idwbuff(2)%beg:idwbuff(2)%end, &
203+
idwbuff(3)%beg:idwbuff(3)%end))
204+
205+
jac_old(idwbuff(1)%beg:idwbuff(1)%end, &
206+
idwbuff(2)%beg:idwbuff(2)%end, &
207+
idwbuff(3)%beg:idwbuff(3)%end) => pool_host3(:,:,:)
208+
end if
209+
end if
210+
#endif
112211

113212
$:GPU_PARALLEL_LOOP(collapse=3)
114213
do l = idwbuff(3)%beg, idwbuff(3)%end
@@ -2618,11 +2717,36 @@ contains
26182717
@:DEALLOCATE(Res)
26192718
end if
26202719

2720+
#ifndef __NVCOMPILER_GPU_UNIFIED_MEM
26212721
@:DEALLOCATE(jac, jac_rhs)
26222722

26232723
if (igr_iter_solver == 1) then ! Jacobi iteration
26242724
@:DEALLOCATE(jac_old)
26252725
end if
2726+
#else
2727+
if (temp_on_gpu(1) == 1) then
2728+
@:DEALLOCATE(jac)
2729+
else
2730+
nullify(jac)
2731+
deallocate(pool_host1)
2732+
end if
2733+
2734+
if (temp_on_gpu(2) == 1) then
2735+
@:DEALLOCATE(jac_rhs)
2736+
else
2737+
nullify(jac_rhs)
2738+
deallocate(pool_host2)
2739+
end if
2740+
2741+
if (igr_iter_solver == 1) then ! Jacobi iteration
2742+
if (temp_on_gpu(3) == 1) then
2743+
@:DEALLOCATE(jac_old)
2744+
else
2745+
nullify(jac_old)
2746+
deallocate(pool_host3)
2747+
end if
2748+
end if
2749+
#endif
26262750

26272751
#:if not MFC_CASE_OPTIMIZATION
26282752
@:DEALLOCATE(coeff_L, coeff_R)

0 commit comments

Comments
 (0)