Skip to content

Commit 41c8280

Browse files
committed
Combined end directives with the beginning directive
1 parent 62cac03 commit 41c8280

File tree

3 files changed

+36
-21
lines changed

3 files changed

+36
-21
lines changed

src/common/include/parallel_macros.fpp

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@
132132

133133
#:def GEN_COLLAPSE_STR(collapse)
134134
#:if collapse is not None
135+
#:set collapse = int(collapse)
135136
#:assert isinstance(collapse, int)
136137
#:assert collapse > 1
137138
#:set collapse_val = 'collapse(' + str(collapse) + ') '
@@ -327,8 +328,13 @@
327328
$:acc_directive
328329
#:enddef
329330

330-
#:def GPU_DATA(copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, no_create=None, present=None, deviceptr=None, attach=None, default=None, extraAccArgs=None)
331-
331+
#:def GPU_DATA(code, copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, no_create=None, present=None, deviceptr=None, attach=None, default=None, extraAccArgs=None)
332+
#:assert code is not None
333+
#:assert isinstance(code, str)
334+
#:if code == '' or code.isspace()
335+
#:stop 'GPU_DATA macro has no effect on the code as it is not surrounding any code'
336+
#:endif
337+
332338
#:set copy_val = GEN_COPY_STR(copy)
333339

334340
#:set copyin_val = GEN_COPYIN_STR(copyin, False).strip('\n') + GEN_COPYIN_STR(copyinReadOnly, True).strip('\n')
@@ -355,21 +361,27 @@
355361
& deviceptr_val.strip('\n') + attach_val.strip('\n') + &
356362
& default_val.strip('\n')
357363
#:set acc_directive = '!$acc data ' + clause_val + extraAccArgs_val.strip('\n')
364+
#:set end_acc_directive = '!$acc end data'
358365
$:acc_directive
366+
$:code
367+
$:end_acc_directive
359368
#:enddef
360369

361-
#:def GPU_HOST_DATA(use_device=None, extraAccArgs=None)
370+
#:def GPU_HOST_DATA(code, use_device=None, extraAccArgs=None)
371+
#:assert code is not None
372+
#:assert isinstance(code, str)
373+
#:if code == '' or code.isspace()
374+
#:stop 'GPU_HOST_DATA macro has no effect on the code as it is not surrounding any code'
375+
#:endif
362376
#:set use_device_val = GEN_USE_DEVICE_STR(use_device)
363377
#:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs)
364378

365379
#:set clause_val = use_device_val.strip('\n')
366380
#:set acc_directive = '!$acc host_data ' + clause_val + extraAccArgs_val.strip('\n')
381+
#:set end_acc_directive = '!$acc end host_data'
367382
$:acc_directive
368-
#:enddef
369-
370-
#:def GPU_END_HOST_DATA()
371-
#:set acc_directive = '!$acc end host_data'
372-
$:acc_directive
383+
$:code
384+
$:end_acc_directive
373385
#:enddef
374386

375387
#:def GPU_ENTER_DATA(copyin=None, copyinReadOnly=None, create=None, attach=None, extraAccArgs=None)

src/simulation/m_fftw.fpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -161,15 +161,15 @@ contains
161161
p_cmplx => data_cmplx_gpu
162162
p_fltr_cmplx => data_fltr_cmplx_gpu
163163

164-
$:GPU_DATA(attach='[p_real, p_cmplx, p_fltr_cmplx]')
165-
$:GPU_HOST_DATA(use_device='[p_real, p_cmplx, p_fltr_cmplx]')
164+
#:call GPU_DATA(attach='[p_real, p_cmplx, p_fltr_cmplx]')
165+
#:call GPU_HOST_DATA(use_device='[p_real, p_cmplx, p_fltr_cmplx]')
166166
#if defined(__PGI)
167167
ierr = cufftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu)
168168
#else
169169
ierr = hipfftExecD2Z(fwd_plan_gpu, c_loc(p_real), c_loc(p_cmplx))
170170
call hipCheck(hipDeviceSynchronize())
171171
#endif
172-
$:GPU_END_HOST_DATA()
172+
#:endcall GPU_HOST_DATA
173173
Nfq = 3
174174
$:GPU_UPDATE(device='[Nfq]')
175175

@@ -182,14 +182,14 @@ contains
182182
end do
183183
end do
184184

185-
$:GPU_HOST_DATA(use_device='[p_real, p_fltr_cmplx]')
185+
#:call GPU_HOST_DATA(use_device='[p_real, p_fltr_cmplx]')
186186
#if defined(__PGI)
187187
ierr = cufftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu)
188188
#else
189189
ierr = hipfftExecZ2D(bwd_plan_gpu, c_loc(p_fltr_cmplx), c_loc(p_real))
190190
call hipCheck(hipDeviceSynchronize())
191191
#endif
192-
$:GPU_END_HOST_DATA()
192+
#:endcall GPU_HOST_DATA
193193

194194
$:GPU_PARALLEL_LOOP(collapse=3)
195195
do k = 1, sys_size
@@ -221,14 +221,14 @@ contains
221221
end do
222222
end do
223223

224-
$:GPU_HOST_DATA(use_device='[p_real, p_cmplx]')
224+
#:call GPU_HOST_DATA(use_device='[p_real, p_cmplx]')
225225
#if defined(__PGI)
226226
ierr = cufftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu)
227227
#else
228228
ierr = hipfftExecD2Z(fwd_plan_gpu, c_loc(p_real), c_loc(p_cmplx))
229229
call hipCheck(hipDeviceSynchronize())
230230
#endif
231-
$:GPU_END_HOST_DATA()
231+
#:endcall GPU_HOST_DATA
232232

233233
Nfq = min(floor(2_dp*real(i, dp)*pi), cmplx_size)
234234
$:GPU_UPDATE(device='[Nfq]')
@@ -242,14 +242,14 @@ contains
242242
end do
243243
end do
244244

245-
$:GPU_HOST_DATA(use_device='[p_real, p_fltr_cmplx]')
245+
#:call GPU_HOST_DATA(use_device='[p_real, p_fltr_cmplx]')
246246
#if defined(__PGI)
247247
ierr = cufftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu)
248248
#else
249249
ierr = hipfftExecZ2D(bwd_plan_gpu, c_loc(p_fltr_cmplx), c_loc(p_real))
250250
call hipCheck(hipDeviceSynchronize())
251251
#endif
252-
$:GPU_END_HOST_DATA()
252+
#:endcall GPU_HOST_DATA
253253

254254
$:GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]')
255255
do k = 1, sys_size
@@ -293,7 +293,7 @@ contains
293293
end do
294294
end do
295295
#endif
296-
!$acc end data
296+
#:endcall GPU_DATA
297297
end subroutine s_apply_fourier_filter
298298

299299
!> The purpose of this subroutine is to destroy the fftw plan

src/simulation/m_weno.fpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1148,15 +1148,17 @@ contains
11481148
block
11491149
use CuTensorEx
11501150

1151-
$:GPU_HOST_DATA(use_device='[v_rs_ws_x, v_rs_ws_y]')
1151+
#:call GPU_HOST_DATA(use_device='[v_rs_ws_x, v_rs_ws_y]')
11521152
v_rs_ws_y = reshape(v_rs_ws_x, shape=[n + 1 + 2*buff_size, m + 2*buff_size + 1, p + 1, sys_size], order=[2, 1, 3, 4])
1153+
#:endcall GPU_HOST_DATA
11531154
end block
11541155
else
11551156
block
11561157
use CuTensorEx
11571158

1158-
$:GPU_HOST_DATA(use_device='[v_rs_ws_x, v_rs_ws_y]')
1159+
#:call GPU_HOST_DATA(use_device='[v_rs_ws_x, v_rs_ws_y]')
11591160
v_rs_ws_y = reshape(v_rs_ws_x, shape=[n + 1 + 2*buff_size, m + 2*buff_size + 1, p + 1 + 2*buff_size, sys_size], order=[2, 1, 3, 4])
1161+
#:endcall GPU_HOST_DATA
11601162
end block
11611163
end if
11621164
else
@@ -1184,8 +1186,9 @@ contains
11841186
block
11851187
use CuTensorEx
11861188

1187-
$:GPU_HOST_DATA(use_device='[v_rs_ws_x, v_rs_ws_z]')
1189+
#:call GPU_HOST_DATA(use_device='[v_rs_ws_x, v_rs_ws_z]')
11881190
v_rs_ws_z = reshape(v_rs_ws_x, shape=[p + 1 + 2*buff_size, n + 2*buff_size + 1, m + 2*buff_size + 1, sys_size], order=[3, 2, 1, 4])
1191+
#:endcall
11891192
end block
11901193
else
11911194
#endif

0 commit comments

Comments
 (0)