Skip to content

Commit 5c01cf4

Browse files
committed
Fixed cylindrical
1 parent 2e37629 commit 5c01cf4

File tree

3 files changed

+82
-77
lines changed

3 files changed

+82
-77
lines changed

src/common/include/omp_macros.fpp

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -340,28 +340,4 @@
340340
$:code
341341
#:endif
342342
#:enddef
343-
344-
#:def DEF_AMD(code)
345-
#:if MFC_COMPILER == AMD_COMPILER_ID
346-
$:code
347-
#:endif
348-
#:enddef
349-
350-
#:def UNDEF_CCE(code)
351-
#:if MFC_COMPILER != CCE_COMPILER_ID
352-
$:code
353-
#:endif
354-
#:enddef
355-
356-
#:def DEF_CCE(code)
357-
#:if MFC_COMPILER == CCE_COMPILER_ID
358-
$:code
359-
#:endif
360-
#:enddef
361-
362-
#:def UNDEF_NVIDIA(code)
363-
#:if MFC_COMPILER != NVIDIA_COMPILER_ID and MFC_COMPILER != PGI_COMPILER_ID
364-
$:code
365-
#:endif
366-
#:enddef
367343
! New line at end of file is required for FYPP

src/common/include/parallel_macros.fpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,4 +193,31 @@
193193
#endif
194194

195195
#:enddef
196+
197+
#:def DEF_AMD(code)
198+
#:if MFC_COMPILER == AMD_COMPILER_ID
199+
$:code
200+
#:endif
201+
#:enddef
202+
203+
#:def UNDEF_CCE(code)
204+
#:if MFC_COMPILER != CCE_COMPILER_ID
205+
$:code
206+
#:endif
207+
#:enddef
208+
209+
#:def DEF_CCE(code)
210+
#:if MFC_COMPILER == CCE_COMPILER_ID
211+
$:code
212+
#:endif
213+
#:enddef
214+
215+
#:def UNDEF_NVIDIA(code)
216+
#:if MFC_COMPILER != NVIDIA_COMPILER_ID and MFC_COMPILER != PGI_COMPILER_ID
217+
$:code
218+
#:endif
219+
#:enddef
220+
221+
#:set USING_NVHPC = (MFC_COMPILER == NVIDIA_COMPILER_ID or MFC_COMPILER == PGI_COMPILER_ID)
222+
#:set USING_CCE = (MFC_COMPILER == CCE_COMPILER_ID)
196223
! New line at end of file is required for FYPP

src/simulation/m_fftw.fpp

Lines changed: 55 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -161,117 +161,119 @@ contains
161161
end do
162162
#:endcall GPU_PARALLEL_LOOP
163163

164+
#:if not USING_NVHPC
164165
p_real => data_real_gpu
165166
p_cmplx => data_cmplx_gpu
166167
p_fltr_cmplx => data_fltr_cmplx_gpu
168+
#:endif
167169

168170
#:call GPU_DATA(attach='[p_real, p_cmplx, p_fltr_cmplx]')
169171
#:call GPU_HOST_DATA(use_device_ptr='[p_real, p_cmplx, p_fltr_cmplx]')
170172
#if defined(__PGI)
171-
ierr = cufftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu)
173+
ierr = cufftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu)
172174
#else
173175
ierr = hipfftExecD2Z(fwd_plan_gpu, c_loc(p_real), c_loc(p_cmplx))
174176
call hipCheck(hipDeviceSynchronize())
175177
#endif
176178
#:endcall GPU_HOST_DATA
177-
Nfq = 3
178-
$:GPU_UPDATE(device='[Nfq]')
179+
Nfq = 3
180+
$:GPU_UPDATE(device='[Nfq]')
179181

180-
#:call GPU_PARALLEL_LOOP(collapse=3)
181-
do k = 1, sys_size
182-
do j = 0, m
183-
do l = 1, Nfq
184-
data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size)
185-
end do
182+
#:call GPU_PARALLEL_LOOP(collapse=3)
183+
do k = 1, sys_size
184+
do j = 0, m
185+
do l = 1, Nfq
186+
data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size)
186187
end do
187188
end do
188-
#:endcall GPU_PARALLEL_LOOP
189+
end do
190+
#:endcall GPU_PARALLEL_LOOP
189191

190192
#:call GPU_HOST_DATA(use_device_ptr='[p_real, p_fltr_cmplx]')
191193
#if defined(__PGI)
192-
ierr = cufftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu)
194+
ierr = cufftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu)
193195
#else
194196
ierr = hipfftExecZ2D(bwd_plan_gpu, c_loc(p_fltr_cmplx), c_loc(p_real))
195197
call hipCheck(hipDeviceSynchronize())
196198
#endif
197199
#:endcall GPU_HOST_DATA
198200

201+
#:call GPU_PARALLEL_LOOP(collapse=3)
202+
do k = 1, sys_size
203+
do j = 0, m
204+
do l = 0, p
205+
data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp)
206+
q_cons_vf(k)%sf(j, 0, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)
207+
end do
208+
end do
209+
end do
210+
#:endcall GPU_PARALLEL_LOOP
211+
212+
do i = 1, fourier_rings
213+
199214
#:call GPU_PARALLEL_LOOP(collapse=3)
200215
do k = 1, sys_size
201216
do j = 0, m
202-
do l = 0, p
203-
data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp)
204-
q_cons_vf(k)%sf(j, 0, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)
217+
do l = 1, cmplx_size
218+
data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = (0_dp, 0_dp)
205219
end do
206220
end do
207221
end do
208222
#:endcall GPU_PARALLEL_LOOP
209223

210-
do i = 1, fourier_rings
211-
212-
#:call GPU_PARALLEL_LOOP(collapse=3)
213-
do k = 1, sys_size
214-
do j = 0, m
215-
do l = 1, cmplx_size
216-
data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = (0_dp, 0_dp)
217-
end do
218-
end do
219-
end do
220-
#:endcall GPU_PARALLEL_LOOP
221-
222-
#:call GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]')
223-
do k = 1, sys_size
224-
do j = 0, m
225-
do l = 0, p
226-
data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = q_cons_vf(k)%sf(j, i, l)
227-
end do
224+
#:call GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]')
225+
do k = 1, sys_size
226+
do j = 0, m
227+
do l = 0, p
228+
data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = q_cons_vf(k)%sf(j, i, l)
228229
end do
229230
end do
230-
#:endcall GPU_PARALLEL_LOOP
231+
end do
232+
#:endcall GPU_PARALLEL_LOOP
231233

232234
#:call GPU_HOST_DATA(use_device_ptr='[p_real, p_cmplx]')
233235
#if defined(__PGI)
234-
ierr = cufftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu)
236+
ierr = cufftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu)
235237
#else
236238
ierr = hipfftExecD2Z(fwd_plan_gpu, c_loc(p_real), c_loc(p_cmplx))
237239
call hipCheck(hipDeviceSynchronize())
238240
#endif
239241
#:endcall GPU_HOST_DATA
240242

241-
Nfq = min(floor(2_dp*real(i, dp)*pi), cmplx_size)
242-
$:GPU_UPDATE(device='[Nfq]')
243+
Nfq = min(floor(2_dp*real(i, dp)*pi), cmplx_size)
244+
$:GPU_UPDATE(device='[Nfq]')
243245

244-
#:call GPU_PARALLEL_LOOP(collapse=3)
245-
do k = 1, sys_size
246-
do j = 0, m
247-
do l = 1, Nfq
248-
data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size)
249-
end do
246+
#:call GPU_PARALLEL_LOOP(collapse=3)
247+
do k = 1, sys_size
248+
do j = 0, m
249+
do l = 1, Nfq
250+
data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size)
250251
end do
251252
end do
252-
#:endcall GPU_PARALLEL_LOOP
253+
end do
254+
#:endcall GPU_PARALLEL_LOOP
253255

254256
#:call GPU_HOST_DATA(use_device_ptr='[p_real, p_fltr_cmplx]')
255257
#if defined(__PGI)
256-
ierr = cufftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu)
258+
ierr = cufftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu)
257259
#else
258260
ierr = hipfftExecZ2D(bwd_plan_gpu, c_loc(p_fltr_cmplx), c_loc(p_real))
259261
call hipCheck(hipDeviceSynchronize())
260262
#endif
261263
#:endcall GPU_HOST_DATA
262264

263-
#:call GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]')
264-
do k = 1, sys_size
265-
do j = 0, m
266-
do l = 0, p
267-
data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp)
268-
q_cons_vf(k)%sf(j, i, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)
269-
end do
265+
#:call GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]')
266+
do k = 1, sys_size
267+
do j = 0, m
268+
do l = 0, p
269+
data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp)
270+
q_cons_vf(k)%sf(j, i, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)
270271
end do
271272
end do
272-
#:endcall GPU_PARALLEL_LOOP
273+
end do
274+
#:endcall GPU_PARALLEL_LOOP
273275

274-
end do
276+
end do
275277
#:endcall GPU_DATA
276278

277279
#else

0 commit comments

Comments
 (0)