Skip to content

Commit ed2bc5e

Browse files
Correct line markers have been recovered
1 parent 7982c58 commit ed2bc5e

File tree

4 files changed

+114
-2
lines changed

4 files changed

+114
-2
lines changed

src/common/include/acc_macros.fpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,35 @@
161161
$:acc_end_directive
162162
#:enddef
163163

164+
#:def NEW_ACC_PARALLEL_LOOP(collapse=None, private=None, parallelism='[gang, vector]', &
165+
& default='present', firstprivate=None, reduction=None, reductionOp=None, &
166+
& copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, &
167+
& no_create=None, present=None, deviceptr=None, attach=None, extraAccArgs=None)
168+
#:set collapse_val = GEN_COLLAPSE_STR(collapse)
169+
#:set parallelism_val = GEN_PARALLELISM_STR(parallelism)
170+
#:set default_val = GEN_DEFAULT_STR(default)
171+
#:set private_val = GEN_PRIVATE_STR(private, False).strip('\n') + GEN_PRIVATE_STR(firstprivate, True).strip('\n')
172+
#:set reduction_val = GEN_REDUCTION_STR(reduction, reductionOp)
173+
#:set copy_val = GEN_COPY_STR(copy)
174+
#:set copyin_val = GEN_COPYIN_STR(copyin, False).strip('\n') + GEN_COPYIN_STR(copyinReadOnly, True).strip('\n')
175+
#:set copyout_val = GEN_COPYOUT_STR(copyout)
176+
#:set create_val = GEN_CREATE_STR(create)
177+
#:set no_create_val = GEN_NOCREATE_STR(no_create)
178+
#:set present_val = GEN_PRESENT_STR(present)
179+
#:set deviceptr_val = GEN_DEVICEPTR_STR(deviceptr)
180+
#:set attach_val = GEN_ATTACH_STR(attach)
181+
#:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs)
182+
#:set clause_val = collapse_val.strip('\n') + parallelism_val.strip('\n') + &
183+
& default_val.strip('\n') + private_val.strip('\n') + reduction_val.strip('\n') + &
184+
& copy_val.strip('\n') + copyin_val.strip('\n') + &
185+
& copyout_val.strip('\n') + create_val.strip('\n') + &
186+
& no_create_val.strip('\n') + present_val.strip('\n') + &
187+
& deviceptr_val.strip('\n') + attach_val.strip('\n')
188+
#:set acc_directive = '!$acc parallel loop ' + &
189+
& clause_val + extraAccArgs_val.strip('\n')
190+
$:acc_directive
191+
#:enddef
192+
164193
#:def ACC_ROUTINE(function_name=None, parallelism=None, nohost=False, extraAccArgs=None)
165194
#:set parallelism_val = GEN_PARALLELISM_STR(parallelism)
166195
#:assert isinstance(nohost, bool)

src/common/include/omp_macros.fpp

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,62 @@
196196
$:omp_end_directive
197197
#:enddef
198198

199+
#:def NEW_OMP_PARALLEL_LOOP(collapse=None, private=None, parallelism='[gang, vector]', &
200+
& default='present', firstprivate=None, reduction=None, reductionOp=None, &
201+
& copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, &
202+
& no_create=None, present=None, deviceptr=None, attach=None, extraOmpArgs=None)
203+
204+
#:set collapse_val = GEN_COLLAPSE_STR(collapse)
205+
#:set parallelism_val = OMP_PARALLELISM_STR(parallelism)
206+
#:set default_val = OMP_DEFAULT_STR(default)
207+
#:set private_val = GEN_PRIVATE_STR(private, False).strip('\n') + GEN_PRIVATE_STR(firstprivate, True).strip('\n')
208+
#:set reduction_val = GEN_REDUCTION_STR(reduction, reductionOp)
209+
#:set copy_val = OMP_COPY_STR(copy)
210+
#:set copyin_val = OMP_COPYIN_STR(copyin).strip('\n') + OMP_COPYIN_STR(copyinReadOnly).strip('\n')
211+
#:set copyout_val = OMP_COPYOUT_STR(copyout)
212+
#:set create_val = OMP_CREATE_STR(create)
213+
#:set no_create_val = OMP_NOCREATE_STR(no_create)
214+
#:set present_val = OMP_PRESENT_STR(present)
215+
#:set deviceptr_val = OMP_DEVICEPTR_STR(deviceptr)
216+
#:set attach_val = OMP_MAP_STR('always,tofrom', attach)
217+
#:set extraOmpArgs_val = GEN_EXTRA_ARGS_STR(extraOmpArgs)
218+
#:set clause_val = collapse_val.strip('\n') + parallelism_val.strip('\n') + &
219+
& default_val.strip('\n') + private_val.strip('\n') + reduction_val.strip('\n') + &
220+
& copy_val.strip('\n') + copyin_val.strip('\n') + &
221+
& copyout_val.strip('\n') + create_val.strip('\n') + &
222+
& no_create_val.strip('\n') + present_val.strip('\n') + &
223+
& deviceptr_val.strip('\n') + attach_val.strip('\n')
224+
#! Hardcoding the parallelism for now
225+
226+
#:if MFC_COMPILER == NVIDIA_COMPILER_ID or MFC_COMPILER == PGI_COMPILER_ID
227+
#:set omp_start_directive = '!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) '
228+
#:elif MFC_COMPILER == CCE_COMPILER_ID
229+
#:set omp_start_directive = '!$omp target teams distribute parallel do simd defaultmap(firstprivate:scalar) '
230+
#:elif MFC_COMPILER == AMD_COMPILER_ID
231+
#:set omp_start_directive = '!$omp target teams distribute parallel do '
232+
#:else
233+
#:set omp_start_directive = '!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) '
234+
#:endif
235+
236+
#:set omp_directive = omp_start_directive + clause_val + extraOmpArgs_val.strip('\n')
237+
$:omp_directive
238+
#:enddef
239+
240+
#:def END_OMP_PARALLEL_LOOP()
241+
242+
#:if MFC_COMPILER == NVIDIA_COMPILER_ID or MFC_COMPILER == PGI_COMPILER_ID
243+
#:set omp_end_directive = '!$omp end target teams loop'
244+
#:elif MFC_COMPILER == CCE_COMPILER_ID
245+
#:set omp_end_directive = '!$omp end target teams distribute parallel do simd'
246+
#:elif MFC_COMPILER == AMD_COMPILER_ID
247+
#:set omp_end_directive = '!$omp end target teams distribute parallel do'
248+
#:else
249+
#:set omp_end_directive = '!$omp end target teams loop'
250+
#:endif
251+
252+
$:omp_end_directive
253+
#:enddef
254+
199255
#:def OMP_ROUTINE(function_name, nohost, extraOmpArgs)
200256
#:assert isinstance(nohost, bool)
201257
#:if nohost == True

src/common/include/parallel_macros.fpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,33 @@
3636
#endif
3737
#:enddef
3838

39+
#:def NEW_GPU_PARALLEL_LOOP(collapse=None, private=None, parallelism='[gang, vector]', &
40+
& default='present', firstprivate=None, reduction=None, reductionOp=None, &
41+
& copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, &
42+
& no_create=None, present=None, deviceptr=None, attach=None, extraAccArgs=None, extraOmpArgs=None)
43+
44+
#if defined(MFC_OpenACC)
45+
#:set directive = NEW_ACC_PARALLEL_LOOP(collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraAccArgs)
46+
#elif defined(MFC_OpenMP)
47+
#:set directive = NEW_OMP_PARALLEL_LOOP(collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraOmpArgs)
48+
#endif
49+
50+
$:directive
51+
52+
#:enddef
53+
54+
#:def END_GPU_PARALLEL_LOOP()
55+
56+
#:set acc_end_directive = '!$acc end parallel loop'
57+
#:set omp_code = END_OMP_PARALLEL_LOOP(code, collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraOmpArgs)
58+
59+
#if defined(MFC_OpenACC)
60+
$:acc_end_directive
61+
#elif defined(MFC_OpenMP)
62+
$:omp_code
63+
#endif
64+
#:enddef
65+
3966
#:def GPU_ROUTINE(function_name=None, parallelism=None, nohost=False, cray_inline=False, extraAccArgs=None, extraOmpArgs=None)
4067
#:assert isinstance(cray_inline, bool)
4168
#:set acc_directive = ACC_ROUTINE(function_name=function_name, parallelism=parallelism, nohost=nohost, extraAccArgs=extraAccArgs)

src/simulation/m_ibm.fpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,7 @@ contains
197197
type(ghost_point) :: gp
198198
type(ghost_point) :: innerp
199199
if (num_gps > 0) then
200-
#:call GPU_PARALLEL_LOOP(private='[physical_loc,dyn_pres,alpha_rho_IP, alpha_IP,pres_IP,vel_IP,vel_g,vel_norm_IP,r_IP, v_IP,pb_IP,mv_IP,nmom_IP,presb_IP,massv_IP,rho, gamma,pi_inf,Re_K,G_K,Gs,gp,innerp,norm,buf, radial_vector, rotation_velocity, j,k,l,q]')
200+
$:NEW_GPU_PARALLEL_LOOP(private='[i,physical_loc,dyn_pres,alpha_rho_IP, alpha_IP,pres_IP,vel_IP,vel_g,vel_norm_IP,r_IP, v_IP,pb_IP,mv_IP,nmom_IP,presb_IP,massv_IP,rho, gamma,pi_inf,Re_K,G_K,Gs,gp,innerp,norm,buf, radial_vector, rotation_velocity, j,k,l,q]')
201201
do i = 1, num_gps
202202

203203
gp = ghost_points(i)
@@ -365,7 +365,7 @@ contains
365365
end do
366366
end if
367367
end do
368-
#:endcall GPU_PARALLEL_LOOP
368+
! $:END_GPU_PARALLEL_LOOP
369369
end if
370370
371371
!Correct the state of the inner points in IBs

0 commit comments

Comments
 (0)