Skip to content

Commit e28423a

Browse files
author
Tanush Prathi
committed
made nonparameterized molecular_weights to compile few AMD kernels, make attach an always map
1 parent cc8cd81 commit e28423a

File tree

4 files changed

+57
-18
lines changed

4 files changed

+57
-18
lines changed

src/common/include/omp_macros.fpp

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -84,13 +84,15 @@
8484
$:deviceptr_val
8585
#:enddef
8686

87-
#:def OMP_ATTACH_STR(attach)
88-
#! #:if attach is not None
89-
#! #:stop 'attach is not supported yet'
90-
#! #:endif
91-
#:set attach_val = ''
92-
$:attach_val
93-
#:enddef
87+
#! #:def OMP_ATTACH_STR(attach)
88+
#! #:set attach_val = OMP_MAP_STR('always,to', attach)
89+
#! $:attach_val
90+
#! #:enddef
91+
92+
#! #:def OMP_DETACH_STR(detach)
93+
#! #:set detach_val = OMP_MAP_STR('always,from', detach)
94+
#! $:detach_val
95+
#! #:enddef
9496

9597
#:def OMP_TO_STR(to)
9698
#:set to_val = GEN_PARENTHESES_CLAUSE('to', to)
@@ -130,7 +132,7 @@
130132
#:set no_create_val = OMP_NOCREATE_STR(no_create)
131133
#:set present_val = OMP_PRESENT_STR(present)
132134
#:set deviceptr_val = OMP_DEVICEPTR_STR(deviceptr)
133-
#:set attach_val = OMP_MAP_STR('tofrom', attach)
135+
#:set attach_val = OMP_MAP_STR('always,tofrom', attach)
134136
#:set extraOmpArgs_val = GEN_EXTRA_ARGS_STR(extraOmpArgs)
135137
#:set omp_clause_val = default_val.strip('\n') + private_val.strip('\n') + reduction_val.strip('\n') + &
136138
& copy_val.strip('\n') + copyin_val.strip('\n') + &
@@ -164,7 +166,7 @@
164166
#:set no_create_val = OMP_NOCREATE_STR(no_create)
165167
#:set present_val = OMP_PRESENT_STR(present)
166168
#:set deviceptr_val = OMP_DEVICEPTR_STR(deviceptr)
167-
#:set attach_val = OMP_MAP_STR('tofrom', attach)
169+
#:set attach_val = OMP_MAP_STR('always,tofrom', attach)
168170
#:set extraOmpArgs_val = GEN_EXTRA_ARGS_STR(extraOmpArgs)
169171
#:set clause_val = collapse_val.strip('\n') + parallelism_val.strip('\n') + &
170172
& default_val.strip('\n') + private_val.strip('\n') + reduction_val.strip('\n') + &
@@ -255,7 +257,7 @@
255257
#:set no_create_val = OMP_NOCREATE_STR(no_create)
256258
#:set present_val = OMP_PRESENT_STR(present)
257259
#:set deviceptr_val = OMP_DEVICEPTR_STR(deviceptr)
258-
#:set attach_val = OMP_MAP_STR('tofrom', attach)
260+
#:set attach_val = OMP_MAP_STR('always,tofrom', attach)
259261
#:set default_val = OMP_DEFAULT_STR(default)
260262
#:set extraOmpArgs_val = GEN_EXTRA_ARGS_STR(extraOmpArgs)
261263
#:set clause_val = copy_val.strip('\n') + copyin_val.strip('\n') + &
@@ -273,7 +275,7 @@
273275
#:def OMP_ENTER_DATA(copyin=None, copyinReadOnly=None, create=None, attach=None, extraOmpArgs=None)
274276
#:set copyin_val = OMP_COPYIN_STR(copyin).strip('\n') + OMP_COPYIN_STR(copyinReadOnly).strip('\n')
275277
#:set create_val = OMP_CREATE_STR(create)
276-
#:set attach_val = OMP_MAP_STR('to', attach)
278+
#:set attach_val = OMP_MAP_STR('always,to', attach)
277279
#:set extraOmpArgs_val = GEN_EXTRA_ARGS_STR(extraOmpArgs)
278280
#:set omp_clause_val = copyin_val.strip('\n') + create_val.strip('\n') + attach_val.strip('\n')
279281
#:set omp_directive = '!$omp target enter data ' + omp_clause_val + extraOmpArgs_val.strip('\n')
@@ -283,7 +285,7 @@
283285
#:def OMP_EXIT_DATA(copyout=None, delete=None, detach=None, extraOmpArgs=None)
284286
#:set copyout_val = OMP_COPYOUT_STR(copyout)
285287
#:set delete_val = OMP_DELETE_STR(delete)
286-
#:set detach_val = OMP_MAP_STR('from', detach)
288+
#:set detach_val = OMP_MAP_STR('always,from', detach)
287289
#:set extraOmpArgs_val = GEN_EXTRA_ARGS_STR(extraOmpArgs)
288290
#:set clause_val = copyout_val.strip('\n') + delete_val.strip('\n') + detach_val.strip('\n')
289291
#:set omp_directive = '!$omp target exit data ' + clause_val + extraOmpArgs_val.strip('\n')
@@ -339,12 +341,24 @@
339341
#:endif
340342
#:enddef
341343

344+
#:def DEF_AMD(code)
345+
#:if MFC_COMPILER == AMD_COMPILER_ID
346+
$:code
347+
#:endif
348+
#:enddef
349+
342350
#:def UNDEF_CCE(code)
343351
#:if MFC_COMPILER != CCE_COMPILER_ID
344352
$:code
345353
#:endif
346354
#:enddef
347355

356+
#:def DEF_CCE(code)
357+
#:if MFC_COMPILER == CCE_COMPILER_ID
358+
$:code
359+
#:endif
360+
#:enddef
361+
348362
#:def UNDEF_NVIDIA(code)
349363
#:if MFC_COMPILER != NVIDIA_COMPILER_ID and MFC_COMPILER != PGI_COMPILER_ID
350364
$:code

src/common/m_chemistry.fpp

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,13 @@ module m_chemistry
1515
use m_global_parameters
1616

1717
implicit none
18+
19+
#:block DEF_AMD
20+
real(dp) :: molecular_weights_nonparameter(10) = &
21+
(/ 2.016d0, 1.008d0, 15.999d0, 31.998d0, 17.007d0, 18.015d0, 33.006d0, &
22+
34.014d0, 39.95d0, 28.014d0 /)
23+
$:GPU_DECLARE(create='[molecular_weights_nonparameter]')
24+
#:endblock DEF_AMD
1825

1926
contains
2027

@@ -99,7 +106,6 @@ contains
99106
real(wp), dimension(num_species) :: Ys
100107
real(wp), dimension(num_species) :: omega
101108

102-
#:block UNDEF_AMD
103109
#:call GPU_PARALLEL_LOOP(collapse=3, private='[Ys, omega]')
104110
do z = bounds(3)%beg, bounds(3)%end
105111
do y = bounds(2)%beg, bounds(2)%end
@@ -117,9 +123,12 @@ contains
117123

118124
$:GPU_LOOP(parallelism='[seq]')
119125
do eqn = chemxb, chemxe
120-
126+
#:block UNDEF_AMD
121127
omega_m = molecular_weights(eqn - chemxb + 1)*omega(eqn - chemxb + 1)
122-
128+
#:endblock UNDEF_AMD
129+
#:block DEF_AMD
130+
omega_m = molecular_weights_nonparameter(eqn - chemxb + 1)*omega(eqn - chemxb + 1)
131+
#:endblock DEF_AMD
123132
rhs_vf(eqn)%sf(x, y, z) = rhs_vf(eqn)%sf(x, y, z) + omega_m
124133

125134
end do
@@ -128,7 +137,6 @@ contains
128137
end do
129138
end do
130139
#:endcall GPU_PARALLEL_LOOP
131-
#:endblock UNDEF_AMD
132140

133141
end subroutine s_compute_chemistry_reaction_flux
134142

src/simulation/m_cbc.fpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@ module m_cbc
3737
molecular_weights, get_species_specific_heats_r, &
3838
get_mole_fractions, get_species_specific_heats_r
3939

40+
#:block DEF_AMD
41+
use m_chemistry, only: molecular_weights_nonparameter
42+
#:endblock DEF_AMD
4043
implicit none
4144

4245
private; public :: s_initialize_cbc_module, s_cbc, s_finalize_cbc_module
@@ -773,7 +776,6 @@ contains
773776
end if
774777
775778
! FD2 or FD4 of RHS at j = 0
776-
#:block UNDEF_AMD
777779
#:call GPU_PARALLEL_LOOP(collapse=2, private='[alpha_rho, vel, adv_local, mf, dvel_ds, dadv_ds, Re_cbc, dalpha_rho_ds,dvel_dt, dadv_dt, dalpha_rho_dt, L, lambda, Ys, dYs_dt, dYs_ds, h_k, Cp_i, Gamma_i, Xs]')
778780
do r = is3%beg, is3%end
779781
do k = is2%beg, is2%end
@@ -1050,8 +1052,15 @@ contains
10501052
sum_Enthalpies = 0._wp
10511053
$:GPU_LOOP(parallelism='[seq]')
10521054
do i = 1, num_species
1055+
#:block UNDEF_AMD
10531056
h_k(i) = h_k(i)*gas_constant/molecular_weights(i)*T
10541057
sum_Enthalpies = sum_Enthalpies + (rho*h_k(i) - pres*Mw/molecular_weights(i)*Cp/R_gas)*dYs_dt(i)
1058+
#:endblock UNDEF_AMD
1059+
1060+
#:block DEF_AMD
1061+
h_k(i) = h_k(i)*gas_constant/molecular_weights_nonparameter(i)*T
1062+
sum_Enthalpies = sum_Enthalpies + (rho*h_k(i) - pres*Mw/molecular_weights_nonparameter(i)*Cp/R_gas)*dYs_dt(i)
1063+
#:endblock DEF_AMD
10551064
end do
10561065
flux_rs${XYZ}$_vf_l(-1, k, r, E_idx) = flux_rs${XYZ}$_vf_l(0, k, r, E_idx) &
10571066
+ ds(0)*((E/rho + pres/rho)*drho_dt + rho*vel_dv_dt_sum + Cp*T*L(2)/(c*c) + sum_Enthalpies)
@@ -1106,7 +1115,6 @@ contains
11061115
end do
11071116
end do
11081117
#:endcall GPU_PARALLEL_LOOP
1109-
#:endblock UNDEF_AMD
11101118
end if
11111119
#:endfor
11121120

src/simulation/m_start_up.fpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1453,6 +1453,15 @@ contains
14531453

14541454
$:GPU_UPDATE(device='[igr, igr_order]')
14551455

1456+
#:block DEF_AMD
1457+
block
1458+
use m_thermochem, only: molecular_weights
1459+
use m_chemistry, only: molecular_weights_nonparameter
1460+
molecular_weights_nonparameter(:) = molecular_weights(:)
1461+
$:GPU_UPDATE(device='[molecular_weights_nonparameter]')
1462+
end block
1463+
#:endblock
1464+
14561465
end subroutine s_initialize_gpu_vars
14571466

14581467
impure subroutine s_finalize_modules

0 commit comments

Comments
 (0)