Skip to content

Commit 3e9e29b

Browse files
author
Anand Radhakrishnan
committed
Bubbles cases run 20 times faster (fixed one of the kernels) + Retains speedup for other cases + All tests pass
1 parent 9bacfb3 commit 3e9e29b

File tree

5 files changed

+53
-66
lines changed

5 files changed

+53
-66
lines changed

src/simulation/m_bubbles.f90

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -97,15 +97,15 @@ subroutine s_compute_bubble_source(bub_adv_src, bub_r_src, bub_v_src, bub_p_src,
9797
real(kind(0d0)) :: n_tait, B_tait
9898

9999
real(kind(0d0)), dimension(nb) :: Rtmp, Vtmp
100-
real(kind(0d0)) :: myR, myV, alf, myP, myRho, R2Vav
100+
real(kind(0d0)) :: myR, myV, alf, myP, myRho, R2Vav, R3
101101
real(kind(0d0)), dimension(num_fluids) :: myalpha, myalpha_rho
102+
real(kind(0d0)) :: start, finish
102103

103104
real(kind(0d0)), dimension(2) :: Re !< Reynolds number
104105

105106
integer :: i, j, k, l, q, ii !< Loop variables
106107
integer :: ndirs !< Number of coordinate directions
107108

108-
109109
!$acc parallel loop collapse(3) gang vector default(present) private(Rtmp, Vtmp)
110110
do l = 0, p
111111
do k = 0, n
@@ -123,8 +123,6 @@ subroutine s_compute_bubble_source(bub_adv_src, bub_r_src, bub_v_src, bub_p_src,
123123
end do
124124
end do
125125

126-
127-
128126
!$acc parallel loop collapse(3) gang vector default(present) private(Rtmp, Vtmp)
129127
do l = 0, p
130128
do k = 0, n
@@ -136,10 +134,21 @@ subroutine s_compute_bubble_source(bub_adv_src, bub_r_src, bub_v_src, bub_p_src,
136134
Vtmp(q) = q_prim_vf(vs(q))%sf(j, k, l)
137135
end do
138136

139-
call s_comp_n_from_prim(q_prim_vf(alf_idx)%sf(j, k, l), &
140-
Rtmp, nbub(j, k, l))
137+
R3 = 0d0
138+
139+
!$acc loop seq
140+
do q = 1, nb
141+
R3 = R3 + weight(q)*Rtmp(q)**3.d0
142+
end do
143+
144+
nbub(j, k, l) = (3.d0/(4.d0*pi))*q_prim_vf(alf_idx)%sf(j, k, l)/R3
145+
146+
R2Vav = 0d0
141147

142-
call s_quad((Rtmp**2.d0)*Vtmp, R2Vav)
148+
!$acc loop seq
149+
do q = 1, nb
150+
R2Vav = R2Vav + weight(q)*Rtmp(q)**2.d0*Vtmp(q)
151+
end do
143152

144153
bub_adv_src(j, k, l) = 4.d0*pi*nbub(j, k, l)*R2Vav
145154

src/simulation/m_global_parameters.fpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -491,9 +491,10 @@ contains
491491
! Determining the degree of the WENO polynomials
492492
weno_polyn = (weno_order - 1)/2
493493
!$acc update device(weno_polyn)
494+
!$acc update device(nb)
494495
#:endif
495496

496-
!$acc update device(nb)
497+
497498

498499
! Initializing the number of fluids for which viscous effects will
499500
! be non-negligible, the number of distinctive material interfaces

src/simulation/m_qbmm.fpp

Lines changed: 24 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ contains
182182

183183
coeffs = 0d0
184184

185-
do i1 = 0, 2; do i2 = 0, 2
185+
do i2 = 0, 2; do i1 = 0, 2
186186
if ((i1 + i2) <= 2) then
187187
if (bubble_model == 3) then
188188
! RPE
@@ -223,7 +223,8 @@ contains
223223
real(kind(0d0)), dimension(nb) :: Rvec
224224
real(kind(0d0)), dimension(nnode, nb) :: wght, abscX, abscY
225225
real(kind(0d0)), dimension(nterms, 0:2, 0:2) :: mom3d_terms, coeff
226-
real(kind(0d0)) :: pres, rho, nbub, c, alf
226+
real(kind(0d0)) :: pres, rho, nbub, c, alf, R3, momsum
227+
real(kind(0d0)) :: start, finish
227228
real(kind(0d0)) :: n_tait, B_tait
228229

229230
integer :: j, k, l, q, r, s !< Loop variables
@@ -234,7 +235,8 @@ contains
234235

235236
!$acc update device(is1, is2, is3)
236237

237-
!$acc parallel loop collapse(3) gang vector default(present) private(moms, Rvec, wght, abscX, abscY, mom3d_terms, coeff)
238+
239+
!$acc parallel loop collapse(3) gang vector default(present) private(moms, wght, abscX, abscY, coeff)
238240
do id3 = is3%beg, is3%end
239241
do id2 = is2%beg, is2%end
240242
do id1 = is1%beg, is1%end
@@ -261,12 +263,14 @@ contains
261263

262264
if (alf > small_alf) then
263265

266+
R3 = 0d0
267+
264268
!$acc loop seq
265269
do q = 1, nb
266-
Rvec(q) = q_prim_vf(bubrs(q))%sf(id1, id2, id3)
270+
R3 = R3 + weight(q)*q_prim_vf(bubrs(q))%sf(id1, id2, id3)**3d0
267271
end do
268272

269-
call s_comp_n_from_prim(alf, Rvec, nbub)
273+
nbub = (3.d0/(4.d0*pi))*alf/R3
270274

271275
!$acc loop seq
272276
do q = 1, nb
@@ -275,48 +279,29 @@ contains
275279
moms(r) = q_prim_vf(bubmoms(q, r))%sf(id1, id2, id3)
276280
end do
277281

278-
! IF(id1==0) THEN
279-
! PRINT*, 'pres: ', pres
280-
! PRINT*, 'nb : ', nbub
281-
! PRINT*, 'alf: ', alf
282-
! DO s = 1,nmom
283-
! PRINT*, 'mom: ', moms(s)
284-
! END DO
285-
! END IF
282+
286283

287284
call s_chyqmom(moms, wght(:, q), abscX(:, q), abscY(:, q))
288285

289-
!$acc loop seq
290-
do j = 1, nterms
291-
!$acc loop seq
292-
do i2 = 0, 2
293-
!$acc loop seq
294-
do i1 = 0, 2
295-
if ((i1 + i2) <= 2) then
296-
297-
mom3d_terms(j, i1, i2) = coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q)) &
298-
*f_quad2D(abscX(:, q), abscY(:, q), wght(:, q), momrhs(:, i1, i2, j, q))
299-
end if
300-
end do
301-
end do
302-
end do
303286

304287
!$acc loop seq
305-
do i1 = 0, 2
288+
do i2 = 0, 2
306289
!$acc loop seq
307-
do i2 = 0, 2
290+
do i1 = 0, 2
308291
if ((i1 + i2) <= 2) then
309-
moms3d(i1, i2, q)%sf(id1, id2, id3) = nbub*sum(mom3d_terms(:, i1, i2))
310-
! IF (moms3d(i1,i2,q)%sf(id1,id2,id3) .NE. moms3d(i1,i2,q)%sf(id1,id2,id3)) THEN
311-
! PRINT*, 'nan in mom3d', i1,i2,id1
312-
! PRINT*, 'nbu: ', nbub
313-
! PRINT*, 'alf: ', alf
314-
! PRINT*, 'moms: ', moms(:)
315-
! CALL s_mpi_abort()
316-
! END IF
292+
momsum = 0d0
293+
!$acc loop seq
294+
do j = 1, nterms
295+
momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q)) &
296+
*f_quad2D(abscX(:, q), abscY(:, q), wght(:, q), momrhs(:, i1, i2, j, q))
297+
end do
298+
moms3d(i1, i2, q)%sf(id1, id2, id3) = nbub * momsum
299+
317300
end if
318301
end do
319302
end do
303+
304+
320305
end do
321306

322307
momsp(1)%sf(id1, id2, id3) = f_quad(abscX, abscY, wght, 3d0, 0d0, 0d0)
@@ -329,19 +314,7 @@ contains
329314
momsp(4)%sf(id1, id2, id3) = f_quad(abscX, abscY, wght, 3d0*(1d0 - gam), 0d0, 3d0*gam)
330315
end if
331316
332-
!!$acc loop seq
333-
!do i1 = 1, 4
334-
! if (momsp(i1)%sf(id1, id2, id3) /= momsp(i1)%sf(id1, id2, id3)) then
335-
! print *, 'NaN in sp moment', i1, 'location', id1, id2, id3
336-
! print *, 'Rs', Rvec(:)
337-
! print *, 'alpha', alf
338-
! print *, 'nbub', nbub
339-
! print *, 'abscX', abscX(:, :)
340-
! print *, 'abscY', abscY(:, :)
341-
! print *, 'wght', wght(:, :)
342-
! call s_mpi_abort()
343-
!end if
344-
!end do
317+
345318
else
346319
!$acc loop seq
347320
do q = 1, nb
@@ -365,6 +338,7 @@ contains
365338
end do
366339
end do
367340
341+
368342
end subroutine s_mom_inv
369343
370344
subroutine s_chyqmom(momin, wght, abscX, abscY)

src/simulation/m_rhs.f90

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -817,7 +817,6 @@ subroutine s_compute_rhs(q_cons_vf, q_prim_vf, rhs_vf, t_step) ! -------
817817

818818
end if
819819

820-
821820
call nvtxStartRange("RHS-CONVERT")
822821
call s_convert_conservative_to_primitive_variables( &
823822
q_cons_qp%vf, &
@@ -826,6 +825,7 @@ subroutine s_compute_rhs(q_cons_vf, q_prim_vf, rhs_vf, t_step) ! -------
826825
ix, iy, iz)
827826
call nvtxEndRange
828827

828+
829829

830830
if (t_step == t_step_stop) return
831831
! ==================================================================
@@ -928,6 +928,7 @@ subroutine s_compute_rhs(q_cons_vf, q_prim_vf, rhs_vf, t_step) ! -------
928928
call nvtxStartRange("RHS-Riemann")
929929

930930
! Computing Riemann Solver Flux and Source Flux =================
931+
931932
call s_riemann_solver(qR_rsx_vf, qR_rsy_vf, qR_rsz_vf, &
932933
dqR_prim_dx_n(id)%vf, &
933934
dqR_prim_dy_n(id)%vf, &

src/simulation/m_riemann_solvers.fpp

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1075,6 +1075,7 @@ contains
10751075
real(kind(0d0)) :: blkmod1, blkmod2
10761076
real(kind(0d0)) :: rho_Star, E_Star, p_Star, p_K_Star
10771077
real(kind(0d0)) :: pres_SL, pres_SR, Ms_L, Ms_R
1078+
real(kind(0d0)) :: start, finish
10781079
integer :: i, j, k, l, q !< Generic loop iterators
10791080
integer :: idx1, idxi
10801081
@@ -1816,8 +1817,9 @@ contains
18161817
end do
18171818
end do
18181819
end do
1820+
18191821
elseif (model_eqns == 2 .and. bubbles) then
1820-
!$acc parallel loop collapse(3) gang vector default(present) private(R0_L, R0_R, V0_L, V0_R, P0_L, P0_R, pbw_L, pbw_R, vel_L, vel_R, &
1822+
!$acc parallel loop collapse(3) gang vector default(present) private(R0_L, R0_R, V0_L, V0_R, P0_L, P0_R, pbw_L, pbw_R, vel_L, vel_R, &
18211823
!$acc rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, nbub_L, nbub_R, ptilde_L, ptilde_R, vel_avg_rms)
18221824
do l = is3%beg, is3%end
18231825
do k = is2%beg, is2%end
@@ -1918,7 +1920,7 @@ contains
19181920
end do
19191921
19201922
nbub_L = (3.d0/(4.d0*pi))*qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + num_fluids)/nbub_L_denom
1921-
nbub_R = (3.d0/(4.d0*pi))*qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + num_fluids)/nbub_R_denom
1923+
nbub_R = (3.d0/(4.d0*pi))*qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + num_fluids)/nbub_R_denom
19221924
19231925
!$acc loop seq
19241926
do i = 1, nb
@@ -1968,17 +1970,17 @@ contains
19681970
end do
19691971
end if
19701972
1971-
if (qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + num_fluids) < small_alf .or. R3Lbar < small_alf) then
1973+
if (qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + num_fluids) < small_alf .or. R3Lbar < small_alf) then
19721974
ptilde_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + num_fluids)*pres_L
19731975
else
1974-
ptilde_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + num_fluids)*(pres_L - PbwR3Lbar/R3Lbar - &
1976+
ptilde_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + num_fluids)*(pres_L - PbwR3Lbar/R3Lbar - &
19751977
rho_L*R3V2Lbar/R3Lbar)
19761978
end if
19771979
1978-
if (qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + num_fluids) < small_alf .or. R3Rbar < small_alf) then
1980+
if (qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + num_fluids) < small_alf .or. R3Rbar < small_alf) then
19791981
ptilde_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + num_fluids)*pres_R
19801982
else
1981-
ptilde_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + num_fluids)*(pres_R - PbwR3Rbar/R3Rbar - &
1983+
ptilde_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + num_fluids)*(pres_R - PbwR3Rbar/R3Rbar - &
19821984
rho_R*R3V2Rbar/R3Rbar)
19831985
end if
19841986
@@ -2035,7 +2037,7 @@ contains
20352037
pi_infs(1))/gammas(1)
20362038
blkmod2 = ((gammas(2) + 1d0)*pres_L + &
20372039
pi_infs(2))/gammas(2)
2038-
c_L = 1d0/(rho_L*(qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + 1)/blkmod1 + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + 2)/blkmod2))
2040+
c_L = 1d0/(rho_L*(qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + 1)/blkmod1 + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + 2)/blkmod2))
20392041
20402042
blkmod1 = ((gammas(1) + 1d0)*pres_R + &
20412043
pi_infs(1))/gammas(1)

0 commit comments

Comments
 (0)