Bubbles cases run 20 times faster (fixed one of the kernels) + Retains speedup for other cases + All tests pass

Anand Radhakrishnan · Anand Radhakrishnan · commit 3e9e29b44f50 · 2022-11-30T06:36:01.000-05:00
diff --git a/src/simulation/m_bubbles.f90 b/src/simulation/m_bubbles.f90
@@ -97,15 +97,15 @@ subroutine s_compute_bubble_source(bub_adv_src, bub_r_src, bub_v_src, bub_p_src,
         real(kind(0d0)) :: n_tait, B_tait
 
         real(kind(0d0)), dimension(nb) :: Rtmp, Vtmp
-        real(kind(0d0)) :: myR, myV, alf, myP, myRho, R2Vav
+        real(kind(0d0)) :: myR, myV, alf, myP, myRho, R2Vav, R3
         real(kind(0d0)), dimension(num_fluids) :: myalpha, myalpha_rho
+        real(kind(0d0)) :: start, finish
 
         real(kind(0d0)), dimension(2) :: Re !< Reynolds number
 
         integer :: i, j, k, l, q, ii !< Loop variables
         integer :: ndirs  !< Number of coordinate directions
 
-
         !$acc parallel loop collapse(3) gang vector default(present) private(Rtmp, Vtmp)
         do l = 0, p
             do k = 0, n
@@ -123,8 +123,6 @@ subroutine s_compute_bubble_source(bub_adv_src, bub_r_src, bub_v_src, bub_p_src,
             end do
         end do
 
-
-
         !$acc parallel loop collapse(3) gang vector default(present) private(Rtmp, Vtmp)
         do l = 0, p
             do k = 0, n
@@ -136,10 +134,21 @@ subroutine s_compute_bubble_source(bub_adv_src, bub_r_src, bub_v_src, bub_p_src,
                         Vtmp(q) = q_prim_vf(vs(q))%sf(j, k, l)
                     end do
 
-                    call s_comp_n_from_prim(q_prim_vf(alf_idx)%sf(j, k, l), &
-                                            Rtmp, nbub(j, k, l))
+                    R3 = 0d0
+
+                    !$acc loop seq
+                    do q = 1, nb
+                        R3 = R3 + weight(q)*Rtmp(q)**3.d0
+                    end do
+
+                    nbub(j, k, l) = (3.d0/(4.d0*pi))*q_prim_vf(alf_idx)%sf(j, k, l)/R3
+
+                    R2Vav = 0d0
 
-                    call s_quad((Rtmp**2.d0)*Vtmp, R2Vav)
+                    !$acc loop seq
+                    do q = 1, nb
+                        R2Vav = R2Vav + weight(q)*Rtmp(q)**2.d0*Vtmp(q)
+                    end do
 
                     bub_adv_src(j, k, l) = 4.d0*pi*nbub(j, k, l)*R2Vav
 
diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp
@@ -491,9 +491,10 @@ contains
             ! Determining the degree of the WENO polynomials
             weno_polyn = (weno_order - 1)/2
 !$acc update device(weno_polyn)
+!$acc update device(nb)
         #:endif
 
-!$acc update device(nb)
+
 
         ! Initializing the number of fluids for which viscous effects will
         ! be non-negligible, the number of distinctive material interfaces
diff --git a/src/simulation/m_qbmm.fpp b/src/simulation/m_qbmm.fpp
@@ -182,7 +182,7 @@ contains
 
         coeffs = 0d0
 
-        do i1 = 0, 2; do i2 = 0, 2
+        do i2 = 0, 2; do i1 = 0, 2
                 if ((i1 + i2) <= 2) then
                     if (bubble_model == 3) then
                         ! RPE
@@ -223,7 +223,8 @@ contains
         real(kind(0d0)), dimension(nb) :: Rvec
         real(kind(0d0)), dimension(nnode, nb) :: wght, abscX, abscY
         real(kind(0d0)), dimension(nterms, 0:2, 0:2) :: mom3d_terms, coeff
-        real(kind(0d0)) :: pres, rho, nbub, c, alf
+        real(kind(0d0)) :: pres, rho, nbub, c, alf, R3, momsum
+        real(kind(0d0)) :: start, finish
         real(kind(0d0)) :: n_tait, B_tait
 
         integer :: j, k, l, q, r, s !< Loop variables
@@ -234,7 +235,8 @@ contains
 
         !$acc update device(is1, is2, is3)
 
-!$acc parallel loop collapse(3) gang vector default(present) private(moms, Rvec, wght, abscX, abscY, mom3d_terms, coeff)
+
+!$acc parallel loop collapse(3) gang vector default(present) private(moms, wght, abscX, abscY, coeff)
         do id3 = is3%beg, is3%end
             do id2 = is2%beg, is2%end
                 do id1 = is1%beg, is1%end
@@ -261,12 +263,14 @@ contains
 
                     if (alf > small_alf) then
 
+                        R3 = 0d0
+
                         !$acc loop seq
                         do q = 1, nb
-                            Rvec(q) = q_prim_vf(bubrs(q))%sf(id1, id2, id3)
+                            R3 = R3 + weight(q)*q_prim_vf(bubrs(q))%sf(id1, id2, id3)**3d0
                         end do
 
-                        call s_comp_n_from_prim(alf, Rvec, nbub)
+                        nbub = (3.d0/(4.d0*pi))*alf/R3
 
                         !$acc loop seq
                         do q = 1, nb
@@ -275,48 +279,29 @@ contains
                                 moms(r) = q_prim_vf(bubmoms(q, r))%sf(id1, id2, id3)
                             end do
 
-                            ! IF(id1==0) THEN
-                            !     PRINT*, 'pres: ', pres
-                            !     PRINT*, 'nb : ', nbub
-                            !     PRINT*, 'alf: ', alf
-                            !     DO s = 1,nmom
-                            !         PRINT*, 'mom: ', moms(s)
-                            !     END DO
-                            ! END IF
+                           
 
                             call s_chyqmom(moms, wght(:, q), abscX(:, q), abscY(:, q))
 
-                            !$acc loop seq
-                            do j = 1, nterms
-                                !$acc loop seq
-                                do i2 = 0, 2
-                                    !$acc loop seq
-                                    do i1 = 0, 2
-                                        if ((i1 + i2) <= 2) then
-
-                                            mom3d_terms(j, i1, i2) = coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q)) &
-                                                            *f_quad2D(abscX(:, q), abscY(:, q), wght(:, q), momrhs(:, i1, i2, j, q))
-                                        end if
-                                    end do
-                                end do
-                            end do
 
                             !$acc loop seq
-                            do i1 = 0, 2
+                            do i2 = 0, 2
                                 !$acc loop seq
-                                do i2 = 0, 2
+                                do i1 = 0, 2
                                     if ((i1 + i2) <= 2) then
-                                        moms3d(i1, i2, q)%sf(id1, id2, id3) = nbub*sum(mom3d_terms(:, i1, i2))
-                                        ! IF (moms3d(i1,i2,q)%sf(id1,id2,id3) .NE. moms3d(i1,i2,q)%sf(id1,id2,id3)) THEN
-                                        !     PRINT*, 'nan in mom3d', i1,i2,id1
-                                        !     PRINT*, 'nbu: ', nbub
-                                        !     PRINT*, 'alf: ', alf
-                                        !     PRINT*, 'moms: ', moms(:)
-                                        !     CALL s_mpi_abort()
-                                        ! END IF
+                                        momsum = 0d0
+                                        !$acc loop seq
+                                        do j = 1, nterms           
+                                            momsum = momsum  + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q)) &
+                                                            *f_quad2D(abscX(:, q), abscY(:, q), wght(:, q), momrhs(:, i1, i2, j, q))
+                                        end do
+                                        moms3d(i1, i2, q)%sf(id1, id2, id3) = nbub * momsum
+
                                     end if
                                 end do
                             end do
+
+                            
                         end do
 
                         momsp(1)%sf(id1, id2, id3) = f_quad(abscX, abscY, wght, 3d0, 0d0, 0d0)
@@ -329,19 +314,7 @@ contains
                             momsp(4)%sf(id1, id2, id3) = f_quad(abscX, abscY, wght, 3d0*(1d0 - gam), 0d0, 3d0*gam)
                         end if
 
-                    !!$acc loop seq
-                        !do i1 = 1, 4
-                        ! if (momsp(i1)%sf(id1, id2, id3) /= momsp(i1)%sf(id1, id2, id3)) then
-                        !     print *, 'NaN in sp moment', i1, 'location', id1, id2, id3
-                        !     print *, 'Rs', Rvec(:)
-                        !     print *, 'alpha', alf
-                        !     print *, 'nbub', nbub
-                        !     print *, 'abscX', abscX(:, :)
-                        !     print *, 'abscY', abscY(:, :)
-                        !     print *, 'wght', wght(:, :)
-                        !    call s_mpi_abort()
-                        !end if
-                        !end do
+                    
                     else
                         !$acc loop seq
                         do q = 1, nb
@@ -365,6 +338,7 @@ contains
             end do
         end do
 
+
     end subroutine s_mom_inv
 
     subroutine s_chyqmom(momin, wght, abscX, abscY)
diff --git a/src/simulation/m_rhs.f90 b/src/simulation/m_rhs.f90
@@ -817,7 +817,6 @@ subroutine s_compute_rhs(q_cons_vf, q_prim_vf, rhs_vf, t_step) ! -------
 
         end if
 
-
         call nvtxStartRange("RHS-CONVERT")
         call s_convert_conservative_to_primitive_variables( &
             q_cons_qp%vf, &
@@ -826,6 +825,7 @@ subroutine s_compute_rhs(q_cons_vf, q_prim_vf, rhs_vf, t_step) ! -------
             ix, iy, iz)
         call nvtxEndRange
 
+
         
         if (t_step == t_step_stop) return
         ! ==================================================================
@@ -928,6 +928,7 @@ subroutine s_compute_rhs(q_cons_vf, q_prim_vf, rhs_vf, t_step) ! -------
             call nvtxStartRange("RHS-Riemann")
 
             ! Computing Riemann Solver Flux and Source Flux =================
+
             call s_riemann_solver(qR_rsx_vf, qR_rsy_vf, qR_rsz_vf, &
                                   dqR_prim_dx_n(id)%vf, &
                                   dqR_prim_dy_n(id)%vf, &
diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp
@@ -1075,6 +1075,7 @@ contains
         real(kind(0d0)) :: blkmod1, blkmod2
         real(kind(0d0)) :: rho_Star, E_Star, p_Star, p_K_Star
         real(kind(0d0)) :: pres_SL, pres_SR, Ms_L, Ms_R
+        real(kind(0d0)) :: start, finish
         integer :: i, j, k, l, q !< Generic loop iterators
         integer :: idx1, idxi
 
@@ -1816,8 +1817,9 @@ contains
                             end do
                         end do
                     end do
+                
                 elseif (model_eqns == 2 .and. bubbles) then
-                    !$acc parallel loop collapse(3) gang vector default(present) private(R0_L, R0_R, V0_L, V0_R, P0_L, P0_R, pbw_L, pbw_R, vel_L, vel_R, &
+                    !$acc parallel loop collapse(3) gang vector default(present) private(R0_L, R0_R, V0_L, V0_R, P0_L, P0_R, pbw_L, pbw_R, vel_L, vel_R, & 
                     !$acc rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, nbub_L, nbub_R, ptilde_L, ptilde_R, vel_avg_rms)
                     do l = is3%beg, is3%end
                         do k = is2%beg, is2%end
@@ -1918,7 +1920,7 @@ contains
                                     end do
 
                                     nbub_L = (3.d0/(4.d0*pi))*qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + num_fluids)/nbub_L_denom
-                                   nbub_R = (3.d0/(4.d0*pi))*qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + num_fluids)/nbub_R_denom
+                                    nbub_R = (3.d0/(4.d0*pi))*qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + num_fluids)/nbub_R_denom
 
 !$acc loop seq
                                     do i = 1, nb
@@ -1968,17 +1970,17 @@ contains
                                         end do
                                     end if
 
-                                if (qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + num_fluids) < small_alf .or. R3Lbar < small_alf) then
+                                    if (qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + num_fluids) < small_alf .or. R3Lbar < small_alf) then
                                         ptilde_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + num_fluids)*pres_L
                                     else
-                                    ptilde_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + num_fluids)*(pres_L - PbwR3Lbar/R3Lbar - &
+                                        ptilde_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + num_fluids)*(pres_L - PbwR3Lbar/R3Lbar - &
                                                                                                            rho_L*R3V2Lbar/R3Lbar)
                                     end if
 
-                            if (qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + num_fluids) < small_alf .or. R3Rbar < small_alf) then
+                                     if (qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + num_fluids) < small_alf .or. R3Rbar < small_alf) then
                                         ptilde_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + num_fluids)*pres_R
                                     else
-                                ptilde_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + num_fluids)*(pres_R - PbwR3Rbar/R3Rbar - &
+                                        ptilde_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + num_fluids)*(pres_R - PbwR3Rbar/R3Rbar - &
                                                                                                               rho_R*R3V2Rbar/R3Rbar)
                                     end if
 
@@ -2035,7 +2037,7 @@ contains
                                                pi_infs(1))/gammas(1)
                                     blkmod2 = ((gammas(2) + 1d0)*pres_L + &
                                                pi_infs(2))/gammas(2)
-   c_L = 1d0/(rho_L*(qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + 1)/blkmod1 + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + 2)/blkmod2))
+                                    c_L = 1d0/(rho_L*(qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + 1)/blkmod1 + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + 2)/blkmod2))
 
                                     blkmod1 = ((gammas(1) + 1d0)*pres_R + &
                                                pi_infs(1))/gammas(1)