@@ -24,8 +24,16 @@ module m_igr
2424 s_igr_flux_add, &
2525 s_finalize_igr_module
2626
27+ #ifdef __NVCOMPILER_GPU_UNIFIED_MEM
28+ integer , dimension (3 ) :: temp_on_gpu
29+ real (wp), pointer, contiguous, dimension (:, :, :) :: jac,jac_rhs,jac_old
30+ real (wp), allocatable, dimension (:, :, :), pinned, target :: pool_host1
31+ real (wp), allocatable, dimension (:, :, :), pinned, target :: pool_host2
32+ real (wp), allocatable, dimension (:, :, :), pinned, target :: pool_host3
33+ #else
2734 real (wp), allocatable, dimension (:, :, :) :: jac, jac_rhs, jac_old
2835 $:GPU_DECLARE(create= ' [jac, jac_rhs, jac_old]' )
36+ #endif
2937
3038 real (wp), allocatable, dimension (:, :) :: Res
3139 $:GPU_DECLARE(create= ' [Res]' )
@@ -82,6 +90,47 @@ module m_igr
8290contains
8391
8492 subroutine s_initialize_igr_module ()
93+ #ifdef __NVCOMPILER_GPU_UNIFIED_MEM
94+ integer :: igr_temps_total
95+ integer :: igr_temps_on_gpu
96+ integer :: igr_temps_on_cpu
97+ character (len= 10 ) :: igr_temps_on_gpu_str
98+
99+ ! initialize
100+ if (igr_iter_solver == 1 ) then ! Jacobi iteration
101+ igr_temps_total = 3
102+ else
103+ igr_temps_total = 2
104+ end if
105+ igr_temps_on_gpu = igr_temps_total
106+ igr_temps_on_cpu = 0
107+
108+ call get_environment_variable(" NVIDIA_IGR_TEMPS_ON_GPU" , igr_temps_on_gpu_str)
109+
110+ if (trim (igr_temps_on_gpu_str) == " 0" ) then
111+ igr_temps_on_gpu = 0 ! jac, jac_rhs and jac_old on CPU
112+ else if (trim (igr_temps_on_gpu_str) == " 1" ) then
113+ igr_temps_on_gpu = 1 ! jac on GPU, jac_rhs on CPU, jac_old on CPU
114+ else if (trim (igr_temps_on_gpu_str) == " 2" ) then
115+ igr_temps_on_gpu = 2 ! jac and jac_rhs on GPU, jac_old on CPU
116+ else if (trim (igr_temps_on_gpu_str) == " 3" ) then
117+ igr_temps_on_gpu = 3 ! jac, jac_rhs and jac_old on GPU
118+ else ! default on GPU
119+ igr_temps_on_gpu = 3
120+ end if
121+
122+ ! trim if needed
123+ if ( igr_temps_on_gpu > igr_temps_total ) then
124+ igr_temps_on_gpu = igr_temps_total
125+ end if
126+ igr_temps_on_cpu = igr_temps_total - igr_temps_on_gpu
127+
128+ ! create map
129+ temp_on_gpu(1 :3 ) = - 1
130+ temp_on_gpu(1 :igr_temps_total) = 0
131+ temp_on_gpu(1 :igr_temps_on_gpu) = 1
132+ print * , temp_on_gpu(1 :3 )
133+ #endif
85134
86135 if (viscous) then
87136 @:ALLOCATE(Res(1 :2 , 1 :maxval (Re_size)))
@@ -95,6 +144,7 @@ contains
95144 @:PREFER_GPU(Re_idx)
96145 end if
97146
147+ #ifndef __NVCOMPILER_GPU_UNIFIED_MEM
98148 @:ALLOCATE(jac(idwbuff(1 )%beg:idwbuff(1 )%end, &
99149 idwbuff(2 )%beg:idwbuff(2 )%end, &
100150 idwbuff(3 )%beg:idwbuff(3 )%end))
@@ -109,6 +159,55 @@ contains
109159 idwbuff(3 )%beg:idwbuff(3 )%end))
110160 @:PREFER_GPU(jac_old)
111161 end if
162+ #else
163+
164+ if ( temp_on_gpu(1 ) == 1 ) then
165+ @:ALLOCATE(jac(idwbuff(1 )%beg:idwbuff(1 )%end, &
166+ idwbuff(2 )%beg:idwbuff(2 )%end, &
167+ idwbuff(3 )%beg:idwbuff(3 )%end))
168+ @:PREFER_GPU(jac)
169+ else
170+ print * , ' jac on CPU'
171+
172+ allocate(pool_host1(idwbuff(1 )%beg:idwbuff(1 )%end, &
173+ idwbuff(2 )%beg:idwbuff(2 )%end, &
174+ idwbuff(3 )%beg:idwbuff(3 )%end))
175+
176+ jac(idwbuff(1 )%beg:idwbuff(1 )%end, &
177+ idwbuff(2 )%beg:idwbuff(2 )%end, &
178+ idwbuff(3 )%beg:idwbuff(3 )%end) = > pool_host1(:,:,:)
179+ end if
180+
181+ if ( temp_on_gpu(2 ) == 1 ) then
182+ @:ALLOCATE(jac_rhs(- 1 :m,- 1 :n,- 1 :p))
183+ @:PREFER_GPU(jac_rhs)
184+ else
185+ print * , ' jac_rhs on CPU'
186+
187+ allocate(pool_host2(- 1 :m,- 1 :n,- 1 :p))
188+
189+ jac_rhs(- 1 :m,- 1 :n,- 1 :p) = > pool_host2(:,:,:)
190+ end if
191+
192+ if (igr_iter_solver == 1 ) then ! Jacobi iteration
193+ if ( temp_on_gpu(3 ) == 1 ) then
194+ @:ALLOCATE(jac_old(idwbuff(1 )%beg:idwbuff(1 )%end, &
195+ idwbuff(2 )%beg:idwbuff(2 )%end, &
196+ idwbuff(3 )%beg:idwbuff(3 )%end))
197+ @:PREFER_GPU(jac_old)
198+ else
199+ print * , ' jac_old on CPU'
200+
201+ allocate(pool_host3(idwbuff(1 )%beg:idwbuff(1 )%end, &
202+ idwbuff(2 )%beg:idwbuff(2 )%end, &
203+ idwbuff(3 )%beg:idwbuff(3 )%end))
204+
205+ jac_old(idwbuff(1 )%beg:idwbuff(1 )%end, &
206+ idwbuff(2 )%beg:idwbuff(2 )%end, &
207+ idwbuff(3 )%beg:idwbuff(3 )%end) = > pool_host3(:,:,:)
208+ end if
209+ end if
210+ #endif
112211
113212 $:GPU_PARALLEL_LOOP(collapse= 3 )
114213 do l = idwbuff(3 )%beg, idwbuff(3 )%end
@@ -2618,11 +2717,36 @@ contains
26182717 @:DEALLOCATE(Res)
26192718 end if
26202719
2720+ #ifndef __NVCOMPILER_GPU_UNIFIED_MEM
26212721 @:DEALLOCATE(jac, jac_rhs)
26222722
26232723 if (igr_iter_solver == 1 ) then ! Jacobi iteration
26242724 @:DEALLOCATE(jac_old)
26252725 end if
2726+ #else
2727+ if (temp_on_gpu(1 ) == 1 ) then
2728+ @:DEALLOCATE(jac)
2729+ else
2730+ nullify(jac)
2731+ deallocate(pool_host1)
2732+ end if
2733+
2734+ if (temp_on_gpu(2 ) == 1 ) then
2735+ @:DEALLOCATE(jac_rhs)
2736+ else
2737+ nullify(jac_rhs)
2738+ deallocate(pool_host2)
2739+ end if
2740+
2741+ if (igr_iter_solver == 1 ) then ! Jacobi iteration
2742+ if (temp_on_gpu(3 ) == 1 ) then
2743+ @:DEALLOCATE(jac_old)
2744+ else
2745+ nullify(jac_old)
2746+ deallocate(pool_host3)
2747+ end if
2748+ end if
2749+ #endif
26262750
26272751 #:if not MFC_CASE_OPTIMIZATION
26282752 @:DEALLOCATE(coeff_L, coeff_R)
0 commit comments