From d8730b1d40133028f29d7378686df830cee1c361 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Tue, 3 Jun 2025 14:19:38 -0400 Subject: [PATCH 01/75] Created parallel loop macro --- src/common/include/macros.fpp | 49 +++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/src/common/include/macros.fpp b/src/common/include/macros.fpp index cdaa48ff58..b738676c9b 100644 --- a/src/common/include/macros.fpp +++ b/src/common/include/macros.fpp @@ -102,3 +102,52 @@ //${message or '"No error description."'}$) end if #:enddef + +#:def parallel_loop(collapse=None, private=None, parallelism=["gang", "vector"], default="present", firstprivate=None) + #:if collapse is not None + #:assert isinstance(collapse, int) + #:assert collapse > 0 + #:set collapse_val = 'collapse(' + str(collapse) + ') ' + #:else + #:set collapse_val = "" + #:endif + + #:if private is not None + #:assert isinstance(private, list) + #:assert len(private) != 0 + #:assert all(type(element) == str for element in private) + #:set private_val = 'private(' + ', '.join(private) + ') ' + #:else + #:set private_val = "" + #:endif + + #:if default is not None + #:assert isinstance(default, str) + #:assert (default == "present" or default == "none") + #:set default_val = 'default(' + default + ') ' + #:else + #:set default_val = "" + #:endif + + #:if parallelism is not None + #:assert isinstance(parallelism, list) + #:assert len(parallelism) != 0 + #:assert all(type(element) == str for element in parallelism) + #:set parallelism_val = " ".join(parallelism) + " " + #:else + #:set parallelism_val = "" + #:endif + + #:if firstprivate is not None + #:assert isinstance(firstprivate, list) + #:assert len(firstprivate) != 0 + #:assert all(type(element) == str for element in firstprivate) + #:set firstprivate_val = 'firstprivate(' + ', '.join(firstprivate) + ') ' + #:else + #:set firstprivate_val = "" + #:endif + + #:set clause_val = collapse_val + parallelism_val + default_val + private_val + firstprivate_val + !$acc parallel loop ${clause_val}$ + +#:enddef From 8eec3d65f25411354a5dd6add634bbcb0b071b41 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Tue, 3 Jun 2025 16:52:40 -0400 Subject: [PATCH 02/75] Added more args to parallel loop macro and replaced some directives --- src/common/include/macros.fpp | 47 +++++++++++++++++++++++++++++-- src/common/m_boundary_common.fpp | 24 ++++++++-------- src/common/m_chemistry.fpp | 3 +- src/simulation/m_acoustic_src.fpp | 2 +- src/simulation/m_bubbles_EL.fpp | 2 +- src/simulation/m_cbc.fpp | 7 ++++- src/simulation/m_hypoelastic.fpp | 2 +- src/simulation/m_rhs.fpp | 4 +-- src/simulation/m_viscous.fpp | 18 ++++++------ 9 files changed, 78 insertions(+), 31 deletions(-) diff --git a/src/common/include/macros.fpp b/src/common/include/macros.fpp index b738676c9b..1e692f6220 100644 --- a/src/common/include/macros.fpp +++ b/src/common/include/macros.fpp @@ -103,7 +103,7 @@ end if #:enddef -#:def parallel_loop(collapse=None, private=None, parallelism=["gang", "vector"], default="present", firstprivate=None) +#:def parallel_loop(collapse=None, private=None, parallelism=["gang", "vector"], default="present", firstprivate=None, copy=None, copyin=None, copyinReadOnly=False, copyout=None, create=None) #:if collapse is not None #:assert isinstance(collapse, int) #:assert collapse > 0 @@ -133,6 +133,7 @@ #:assert isinstance(parallelism, list) #:assert len(parallelism) != 0 #:assert all(type(element) == str for element in parallelism) + #:assert all((element == "gang" or element == "worker" or element == "vector") for element in parallelism) #:set parallelism_val = " ".join(parallelism) + " " #:else #:set parallelism_val = "" @@ -147,7 +148,49 @@ #:set firstprivate_val = "" #:endif - #:set clause_val = collapse_val + parallelism_val + default_val + private_val + firstprivate_val + #:if copy is not None + #:assert isinstance(copy, list) + #:assert len(copy) != 0 + #:assert all(type(element) == str for element in copy) + #:set copy_val = 'copy(' + ', '.join(copy) + ') ' + #:else + #:set copy_val = "" + #:endif + + #:if copyin is not None + #:assert isinstance(copyin, list) + #:assert len(copyin) != 0 + #:assert all(type(element) == str for element in copyin) + #:assert isinstance(copyinReadOnly, bool) + #:if copyinReadOnly == True + #:set copyin_val = 'copyin(readonly:' + ', '.join(copyin) + ') ' + #:else + #:set copyin_val = 'copyin(' + ', '.join(copyin) + ') ' + #:endif + #:else + #:set copyin_val = "" + #:endif + + #:if copyout is not None + #:assert isinstance(copyout, list) + #:assert len(copyout) != 0 + #:assert all(type(element) == str for element in copyout) + #:set copyout_val = 'copyout(' + ', '.join(copyout) + ') ' + #:else + #:set copyout_val = "" + #:endif + + #:if create is not None + #:assert isinstance(create, list) + #:assert len(create) != 0 + #:assert all(type(element) == str for element in create) + #:set create_val = 'create(' + ', '.join(create) + ') ' + #:else + #:set create_val = "" + #:endif + + + #:set clause_val = collapse_val + parallelism_val + default_val + private_val + firstprivate_val + copy_val + copyin_val + copyout_val + create_val !$acc parallel loop ${clause_val}$ #:enddef diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index ea72026dfc..1432e53a7c 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -83,7 +83,7 @@ contains if (bcxb >= 0) then call s_mpi_sendrecv_variables_buffers(q_prim_vf, pb, mv, 1, -1) else - !$acc parallel loop collapse(2) gang vector default(present) + $:parallel_loop(collapse=2) do l = 0, p do k = 0, n select case (int(bc_type(1, -1)%sf(0, k, l))) @@ -107,7 +107,7 @@ contains if (bcxe >= 0) then call s_mpi_sendrecv_variables_buffers(q_prim_vf, pb, mv, 1, 1) else - !$acc parallel loop collapse(2) gang vector default(present) + $:parallel_loop(collapse=2) do l = 0, p do k = 0, n select case (int(bc_type(1, 1)%sf(0, k, l))) @@ -135,7 +135,7 @@ contains if (bcyb >= 0) then call s_mpi_sendrecv_variables_buffers(q_prim_vf, pb, mv, 2, -1) else - !$acc parallel loop collapse(2) gang vector default(present) + $:parallel_loop(collapse=2) do l = 0, p do k = -buff_size, m + buff_size select case (int(bc_type(2, -1)%sf(k, 0, l))) @@ -161,7 +161,7 @@ contains if (bcye >= 0) then call s_mpi_sendrecv_variables_buffers(q_prim_vf, pb, mv, 2, 1) else - !$acc parallel loop collapse(2) gang vector default(present) + $:parallel_loop(collapse=2) do l = 0, p do k = -buff_size, m + buff_size select case (int(bc_type(2, 1)%sf(k, 0, l))) @@ -189,7 +189,7 @@ contains if (bczb >= 0) then call s_mpi_sendrecv_variables_buffers(q_prim_vf, pb, mv, 3, -1) else - !$acc parallel loop collapse(2) gang vector default(present) + $:parallel_loop(collapse=2) do l = -buff_size, n + buff_size do k = -buff_size, m + buff_size select case (int(bc_type(3, -1)%sf(k, l, 0))) @@ -213,7 +213,7 @@ contains if (bcze >= 0) then call s_mpi_sendrecv_variables_buffers(q_prim_vf, pb, mv, 3, 1) else - !$acc parallel loop collapse(2) gang vector default(present) + $:parallel_loop(collapse=2) do l = -buff_size, n + buff_size do k = -buff_size, m + buff_size select case (int(bc_type(3, 1)%sf(k, l, 0))) @@ -1167,7 +1167,7 @@ contains if (bcxb >= 0) then call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 1, -1) else - !$acc parallel loop collapse(2) gang vector default(present) + $:parallel_loop(collapse=2) do l = 0, p do k = 0, n select case (bc_type(1, -1)%sf(0, k, l)) @@ -1185,7 +1185,7 @@ contains if (bcxe >= 0) then call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 1, 1) else - !$acc parallel loop collapse(2) gang vector default(present) + $:parallel_loop(collapse=2) do l = 0, p do k = 0, n select case (bc_type(1, 1)%sf(0, k, l)) @@ -1206,7 +1206,7 @@ contains if (bcyb >= 0) then call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 2, -1) else - !$acc parallel loop collapse(2) gang vector default(present) + $:parallel_loop(collapse=2) do l = 0, p do k = -buff_size, m + buff_size select case (bc_type(2, -1)%sf(k, 0, l)) @@ -1224,7 +1224,7 @@ contains if (bcye >= 0) then call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 2, 1) else - !$acc parallel loop collapse(2) gang vector default(present) + $:parallel_loop(collapse=2) do l = 0, p do k = -buff_size, m + buff_size select case (bc_type(2, 1)%sf(k, 0, l)) @@ -1245,7 +1245,7 @@ contains if (bczb >= 0) then call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 3, -1) else - !$acc parallel loop collapse(2) gang vector default(present) + $:parallel_loop(collapse=2) do l = -buff_size, n + buff_size do k = -buff_size, m + buff_size select case (bc_type(3, -1)%sf(k, l, 0)) @@ -1263,7 +1263,7 @@ contains if (bcze >= 0) then call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 3, 1) else - !$acc parallel loop collapse(2) gang vector default(present) + $:parallel_loop(collapse=2) do l = -buff_size, n + buff_size do k = -buff_size, m + buff_size select case (bc_type(3, 1)%sf(k, l, 0)) diff --git a/src/common/m_chemistry.fpp b/src/common/m_chemistry.fpp index d61c42b1af..ad3e3c7187 100644 --- a/src/common/m_chemistry.fpp +++ b/src/common/m_chemistry.fpp @@ -99,8 +99,7 @@ contains real(wp), dimension(num_species) :: Ys real(wp), dimension(num_species) :: omega - !$acc parallel loop collapse(3) gang vector default(present) & - !$acc private(Ys, omega) + $:parallel_loop(collapse=3, private=["Ys", "omega"]) do z = bounds(3)%beg, bounds(3)%end do y = bounds(2)%beg, bounds(2)%end do x = bounds(1)%beg, bounds(1)%end diff --git a/src/simulation/m_acoustic_src.fpp b/src/simulation/m_acoustic_src.fpp index 0205e4043b..3a02ae1b23 100644 --- a/src/simulation/m_acoustic_src.fpp +++ b/src/simulation/m_acoustic_src.fpp @@ -159,7 +159,7 @@ contains sim_time = t_step*dt - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = 0, p do k = 0, n do j = 0, m diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index 013d91988e..768a886fd8 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -614,7 +614,7 @@ contains if (adap_dt .and. adap_dt_stop_max > 0) call s_mpi_abort("Adaptive time stepping failed to converge.") ! Bubbles remain in a fixed position - !$acc parallel loop collapse(2) gang vector default(present) private(k) copyin(stage) + $:parallel_loop(collapse=2, private=["k"], copyin=["stage"]) do k = 1, nBubs do l = 1, 3 mtn_dposdt(k, l, stage) = 0._wp diff --git a/src/simulation/m_cbc.fpp b/src/simulation/m_cbc.fpp index 1c7f1a4506..b0b86cc388 100644 --- a/src/simulation/m_cbc.fpp +++ b/src/simulation/m_cbc.fpp @@ -772,7 +772,12 @@ contains end if ! FD2 or FD4 of RHS at j = 0 - !$acc parallel loop collapse(2) gang vector default(present) private(alpha_rho, vel, adv, mf, dvel_ds, dadv_ds, Re_cbc, dalpha_rho_ds,dvel_dt, dadv_dt, dalpha_rho_dt,L, lambda,Ys,dYs_dt,dYs_ds,h_k,Cp_i,Gamma_i,Xs) + $:parallel_loop(collapse=2, private=["alpha_rho", "vel", "adv", & + "mf", "dvel_ds", "dadv_ds", "Re_cbc", & + "dalpha_rho_ds","dvel_dt", "dadv_dt", & + "dalpha_rho_dt", "L", "lambda", "Ys", & + "dYs_dt", "dYs_ds", "h_k", "Cp_i", & + "Gamma_i", "Xs"]) do r = is3%beg, is3%end do k = is2%beg, is2%end diff --git a/src/simulation/m_hypoelastic.fpp b/src/simulation/m_hypoelastic.fpp index 56f36e7ca3..5e082b4450 100644 --- a/src/simulation/m_hypoelastic.fpp +++ b/src/simulation/m_hypoelastic.fpp @@ -405,7 +405,7 @@ contains end do elseif (p == 0) then q = 0 - !$acc parallel loop collapse(2) gang vector default(present) + $:parallel_loop(collapse=2) do l = 0, n do k = 0, m ! Maximum principal stress diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index deb869c11b..f534f2ca65 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -1496,7 +1496,7 @@ contains idwbuff(1), idwbuff(2), idwbuff(3)) end if - !$acc parallel loop collapse(2) gang vector default(present) + $:parallel_loop(collapse=2) do l = 0, p do j = 0, m !$acc loop seq @@ -1564,7 +1564,7 @@ contains end do if (viscous) then - !$acc parallel loop collapse(2) gang vector default(present) + $:parallel_loop(collapse=2) do l = 0, p do j = 0, m !$acc loop seq diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp index 615448e0d3..e790651ad9 100644 --- a/src/simulation/m_viscous.fpp +++ b/src/simulation/m_viscous.fpp @@ -1354,7 +1354,7 @@ contains end do end if - !$acc parallel loop collapse(2) gang vector default(present) + $:parallel_loop(collapse=2) do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end grad_x%sf(idwbuff(1)%beg, k, l) = & @@ -1366,7 +1366,7 @@ contains end do end do if (n > 0) then - !$acc parallel loop collapse(2) gang vector default(present) + $:parallel_loop(collapse=2) do l = idwbuff(3)%beg, idwbuff(3)%end do j = idwbuff(1)%beg, idwbuff(1)%end grad_y%sf(j, idwbuff(2)%beg, l) = & @@ -1378,7 +1378,7 @@ contains end do end do if (p > 0) then - !$acc parallel loop collapse(2) gang vector default(present) + $:parallel_loop(collapse=2) do k = idwbuff(2)%beg, idwbuff(2)%end do j = idwbuff(1)%beg, idwbuff(1)%end grad_z%sf(j, k, idwbuff(3)%beg) = & @@ -1393,7 +1393,7 @@ contains end if if (bc_x%beg <= BC_GHOST_EXTRAPOLATION) then - !$acc parallel loop collapse(2) gang vector default(present) + $:parallel_loop(collapse=2) do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end grad_x%sf(0, k, l) = (-3._wp*var%sf(0, k, l) + 4._wp*var%sf(1, k, l) - var%sf(2, k, l))/ & @@ -1402,7 +1402,7 @@ contains end do end if if (bc_x%end <= BC_GHOST_EXTRAPOLATION) then - !$acc parallel loop collapse(2) gang vector default(present) + $:parallel_loop(collapse=2) do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end grad_x%sf(m, k, l) = (3._wp*var%sf(m, k, l) - 4._wp*var%sf(m - 1, k, l) + var%sf(m - 2, k, l))/ & @@ -1412,7 +1412,7 @@ contains end if if (n > 0) then if (bc_y%beg <= BC_GHOST_EXTRAPOLATION .and. bc_y%beg /= BC_NULL) then - !$acc parallel loop collapse(2) gang vector default(present) + $:parallel_loop(collapse=2) do l = idwbuff(3)%beg, idwbuff(3)%end do j = idwbuff(1)%beg, idwbuff(1)%end grad_y%sf(j, 0, l) = (-3._wp*var%sf(j, 0, l) + 4._wp*var%sf(j, 1, l) - var%sf(j, 2, l))/ & @@ -1421,7 +1421,7 @@ contains end do end if if (bc_y%end <= BC_GHOST_EXTRAPOLATION) then - !$acc parallel loop collapse(2) gang vector default(present) + $:parallel_loop(collapse=2) do l = idwbuff(3)%beg, idwbuff(3)%end do j = idwbuff(1)%beg, idwbuff(1)%end grad_y%sf(j, n, l) = (3._wp*var%sf(j, n, l) - 4._wp*var%sf(j, n - 1, l) + var%sf(j, n - 2, l))/ & @@ -1431,7 +1431,7 @@ contains end if if (p > 0) then if (bc_z%beg <= BC_GHOST_EXTRAPOLATION) then - !$acc parallel loop collapse(2) gang vector default(present) + $:parallel_loop(collapse=2) do k = idwbuff(2)%beg, idwbuff(2)%end do j = idwbuff(1)%beg, idwbuff(1)%end grad_z%sf(j, k, 0) = & @@ -1441,7 +1441,7 @@ contains end do end if if (bc_z%end <= BC_GHOST_EXTRAPOLATION) then - !$acc parallel loop collapse(2) gang vector default(present) + $:parallel_loop(collapse=2) do k = idwbuff(2)%beg, idwbuff(2)%end do j = idwbuff(1)%beg, idwbuff(1)%end grad_z%sf(j, k, p) = & From 4056f84d3846e6dc6a486714b73745036d7bc160 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Tue, 3 Jun 2025 17:34:06 -0400 Subject: [PATCH 03/75] Replaced more directives and added reduction option --- src/common/include/macros.fpp | 17 ++++++++++++++--- src/common/m_phase_change.fpp | 5 ++++- src/common/m_variables_conversion.fpp | 10 ++++++---- src/simulation/m_acoustic_src.fpp | 2 +- src/simulation/m_body_forces.fpp | 8 ++++---- src/simulation/m_bubbles_EE.fpp | 10 +++++----- 6 files changed, 34 insertions(+), 18 deletions(-) diff --git a/src/common/include/macros.fpp b/src/common/include/macros.fpp index 1e692f6220..75980a54f8 100644 --- a/src/common/include/macros.fpp +++ b/src/common/include/macros.fpp @@ -103,7 +103,7 @@ end if #:enddef -#:def parallel_loop(collapse=None, private=None, parallelism=["gang", "vector"], default="present", firstprivate=None, copy=None, copyin=None, copyinReadOnly=False, copyout=None, create=None) +#:def parallel_loop(collapse=None, private=None, parallelism=["gang", "vector"], default="present", firstprivate=None, reduction=None, reductionOp=None, copy=None, copyin=None, copyinReadOnly=False, copyout=None, create=None) #:if collapse is not None #:assert isinstance(collapse, int) #:assert collapse > 0 @@ -148,6 +148,18 @@ #:set firstprivate_val = "" #:endif + #:if reduction is not None and reductionOp is not None + #:assert isinstance(reduction, list) + #:assert len(reduction) != 0 + #:assert all(type(element) == str for element in reduction) + #:assert isintance(reductionOp, str) + #:set reduction_val = 'reduction(' + reductionOp + ':' + ', '.join(reduction) + ') ' + #:elif reduction is not None or reductionOp is not None + #:stop "Cannot set the reduction list or reduction operation without setting the other" + #:else + #:set reduction_val = "" + #:endif + #:if copy is not None #:assert isinstance(copy, list) #:assert len(copy) != 0 @@ -189,8 +201,7 @@ #:set create_val = "" #:endif - - #:set clause_val = collapse_val + parallelism_val + default_val + private_val + firstprivate_val + copy_val + copyin_val + copyout_val + create_val + #:set clause_val = collapse_val + parallelism_val + default_val + private_val + firstprivate_val + reduction_val + copy_val + copyin_val + copyout_val + create_val !$acc parallel loop ${clause_val}$ #:enddef diff --git a/src/common/m_phase_change.fpp b/src/common/m_phase_change.fpp index 52565515f1..28439680a3 100644 --- a/src/common/m_phase_change.fpp +++ b/src/common/m_phase_change.fpp @@ -97,7 +97,10 @@ contains !$acc declare create(p_infOV, p_infpT, p_infSL, sk, hk, gk, ek, rhok) ! starting equilibrium solver - !$acc parallel loop collapse(3) gang vector default(present) private(p_infOV, p_infpT, p_infSL, sk, hk, gk, ek, rhok,pS, pSOV, pSSL, TS, TSOV, TSatOV, TSatSL, TSSL, rhoe, dynE, rhos, rho, rM, m1, m2, MCT, TvF) + $:parallel_loop(collapse=3, private=["p_infOV", "p_infpT", "p_infSL", & + "sk", "hk", "gk", "ek", "rhok", "pS", "pSOV", "pSSL", & + "TS", "TSOV", "TSatOV", "TSatSL", "TSSL", "rhoe", & + "dynE", "rhos", "rho", "rM", "m1", "m2", "MCT", "TvF"]) do j = 0, m do k = 0, n do l = 0, p diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp index 288c2fb0f3..f0c4678f8f 100644 --- a/src/common/m_variables_conversion.fpp +++ b/src/common/m_variables_conversion.fpp @@ -883,9 +883,10 @@ contains end if #:endif - !$acc parallel loop collapse(3) gang vector default(present) & - !$acc private(alpha_K, alpha_rho_K, Re_K, nRtmp, rho_K, gamma_K, & - !$acc pi_inf_K, qv_K, dyn_pres_K, rhoYks, B) + + $:parallel_loop(collapse=3, private=["alpha_K", "alpha_rho_K", "Re_K", & + "nRtmp", "rho_K", "gamma_K", "pi_inf_K", "qv_K", & + "dyn_pres_K", "rhoYks", "B"]) do l = ibounds(3)%beg, ibounds(3)%end do k = ibounds(2)%beg, ibounds(2)%end do j = ibounds(1)%beg, ibounds(1)%end @@ -1480,7 +1481,8 @@ contains ! Computing the flux variables from the primitive variables, without ! accounting for the contribution of either viscosity or capillarity #ifdef MFC_SIMULATION - !$acc parallel loop collapse(3) gang vector default(present) private(alpha_rho_K, vel_K, alpha_K, Re_K, Y_K) + $:parallel_loop(collapse=3, private=["alpha_rho_K", "vel_K", & + "alpha_K", "Re_K", "Y_K"]) do l = is3b, is3e do k = is2b, is2e do j = is1b, is1e diff --git a/src/simulation/m_acoustic_src.fpp b/src/simulation/m_acoustic_src.fpp index 3a02ae1b23..7b5d8a5fee 100644 --- a/src/simulation/m_acoustic_src.fpp +++ b/src/simulation/m_acoustic_src.fpp @@ -312,7 +312,7 @@ contains end do ! Update the rhs variables - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = 0, p do k = 0, n do j = 0, m diff --git a/src/simulation/m_body_forces.fpp b/src/simulation/m_body_forces.fpp index 1ef74cbcee..7813017011 100644 --- a/src/simulation/m_body_forces.fpp +++ b/src/simulation/m_body_forces.fpp @@ -79,7 +79,7 @@ contains type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf integer :: i, j, k, l !< standard iterators - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -122,7 +122,7 @@ contains if (bf_x) then ! x-direction body forces - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -137,7 +137,7 @@ contains if (bf_y) then ! y-direction body forces - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -152,7 +152,7 @@ contains if (bf_z) then ! z-direction body forces - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = 0, p do k = 0, n do j = 0, m diff --git a/src/simulation/m_bubbles_EE.fpp b/src/simulation/m_bubbles_EE.fpp index d2bb02cbc5..cccde36056 100644 --- a/src/simulation/m_bubbles_EE.fpp +++ b/src/simulation/m_bubbles_EE.fpp @@ -76,7 +76,7 @@ contains real(wp) :: nR3bar integer(wp) :: i, j, k, l - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -103,7 +103,7 @@ contains if (idir == 1) then if (.not. qbmm) then - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -119,7 +119,7 @@ contains elseif (idir == 2) then - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -133,7 +133,7 @@ contains elseif (idir == 3) then - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -173,7 +173,7 @@ contains integer :: dmBub_id !< Dummy variables for unified subgrid bubble subroutines real(wp) :: dmMass_v, dmMass_n, dmBeta_c, dmBeta_t, dmCson - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = 0, p do k = 0, n do j = 0, m From 7a81f7a1623a5cf2824d00bbcaf22364a56855b9 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Tue, 3 Jun 2025 18:17:26 -0400 Subject: [PATCH 04/75] Multi-reduction support and more directives replaced --- src/common/include/macros.fpp | 26 +++++++++++++++++++++----- src/simulation/m_bubbles_EE.fpp | 7 ++++--- src/simulation/m_bubbles_EL.fpp | 19 ++++++++++--------- 3 files changed, 35 insertions(+), 17 deletions(-) diff --git a/src/common/include/macros.fpp b/src/common/include/macros.fpp index 75980a54f8..ee90eee5b6 100644 --- a/src/common/include/macros.fpp +++ b/src/common/include/macros.fpp @@ -149,11 +149,27 @@ #:endif #:if reduction is not None and reductionOp is not None - #:assert isinstance(reduction, list) - #:assert len(reduction) != 0 - #:assert all(type(element) == str for element in reduction) - #:assert isintance(reductionOp, str) - #:set reduction_val = 'reduction(' + reductionOp + ':' + ', '.join(reduction) + ') ' + #:if isinstance(reduction, list) and isinstance(reductionOp, list) + #:assert isinstance(reduction, list) + #:assert len(reduction) != 0 + #:assert all(type(element) == list for element in reduction) + #:assert all(len(element) != 0 for element in reduction) + #:assert all(type(element) == str for sublist in reduction for element in sublist) + #:assert isinstance(reductionOp, list) + #:assert len(reductionOp) != 0 + #:assert all(type(element) == str for element in reductionOp) + #:assert len(reduction) == len(reductionOp) + #:set reduction_list = ['reduction(' + op + ':' + ', '.join(red) + ') ' for (red, op) in zip(reduction, reductionOp)] + #:set reduction_val = " ". join(reduction_list) + " " + #:elif isinstance(reduction, list) and isinstance(reductionOp, str) + #:assert isinstance(reduction, list) + #:assert len(reduction) != 0 + #:assert all(type(element) == str for element in reduction) + #:assert isinstance(reductionOp, str) + #:set reduction_val = 'reduction(' + reductionOp + ':' + ', '.join(reduction) + ') ' + #:else + #:stop "Invalid datatypes for reduction or reductionOp. Must be list of lists and lists or list and str respectively" + #:endif #:elif reduction is not None or reductionOp is not None #:stop "Cannot set the reduction list or reduction operation without setting the other" #:else diff --git a/src/simulation/m_bubbles_EE.fpp b/src/simulation/m_bubbles_EE.fpp index cccde36056..7730d60f39 100644 --- a/src/simulation/m_bubbles_EE.fpp +++ b/src/simulation/m_bubbles_EE.fpp @@ -191,8 +191,9 @@ contains end do adap_dt_stop_max = 0 - !$acc parallel loop collapse(3) gang vector default(present) private(Rtmp, Vtmp, myalpha_rho, myalpha) & - !$acc reduction(MAX:adap_dt_stop_max) copy(adap_dt_stop_max) + $:parallel_loop(collapse=3, private=["Rtmp", "Vtmp", "myalpha_rho", "myalpha"], & + reduction=["adap_dt_stop_max"], reductionOp="MAX", & + copy=["adap_dt_stop_max"]) do l = 0, p do k = 0, n do j = 0, m @@ -324,7 +325,7 @@ contains if (adap_dt .and. adap_dt_stop_max > 0) call s_mpi_abort("Adaptive time stepping failed to converge.") if (.not. adap_dt) then - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = 0, p do q = 0, n do i = 0, m diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index 768a886fd8..a408c11513 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -680,7 +680,7 @@ contains call s_gradient_dir(q_prim_vf(E_idx), q_beta%vf(3), l) - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do k = 0, p do j = 0, n do i = 0, m @@ -695,7 +695,7 @@ contains end do !source in energy - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do k = idwbuff(3)%beg, idwbuff(3)%end do j = idwbuff(2)%beg, idwbuff(2)%end do i = idwbuff(1)%beg, idwbuff(1)%end @@ -706,7 +706,7 @@ contains call s_gradient_dir(q_beta%vf(3), q_beta%vf(4), l) - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do k = 0, p do j = 0, n do i = 0, m @@ -781,7 +781,7 @@ contains mtn_s, mtn_pos, q_beta) !Store 1-beta - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end do j = idwbuff(1)%beg, idwbuff(1)%end @@ -1290,7 +1290,7 @@ contains if (dir == 1) then ! Gradient in x dir. - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do k = 0, p do j = 0, n do i = 0, m @@ -1305,7 +1305,7 @@ contains else if (dir == 2) then ! Gradient in y dir. - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do k = 0, p do j = 0, n do i = 0, m @@ -1319,7 +1319,7 @@ contains end do else ! Gradient in z dir. - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do k = 0, p do j = 0, n do i = 0, m @@ -1413,8 +1413,9 @@ contains lag_void_max = 0._wp lag_void_avg = 0._wp lag_vol = 0._wp - !$acc parallel loop collapse(3) gang vector default(present) reduction(+:lag_vol,lag_void_avg) & - !$acc reduction(MAX:lag_void_max) copy(lag_vol, lag_void_avg, lag_void_max) + $:parallel_loop(collapse=3, reduction=[["lag_vol", "lag_void_avg"], & + ["lag_void_max"]], reductionOp=["+", "MAX"], & + copy=["lag_vol", "lag_void_avg", "lag_void_max"]) do k = 0, p do j = 0, n do i = 0, m From 23dfe88d01bf13a12de4b4c4bd2ea2a735a4e3ac Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Tue, 3 Jun 2025 20:53:27 -0400 Subject: [PATCH 05/75] Replaced some directives --- src/simulation/m_cbc.fpp | 34 +++++++++--------- src/simulation/m_fftw.fpp | 12 +++---- src/simulation/m_hypoelastic.fpp | 24 ++++++------- src/simulation/m_qbmm.fpp | 2 +- src/simulation/m_rhs.fpp | 26 +++++++------- src/simulation/m_riemann_solvers.fpp | 54 ++++++++++++++-------------- src/simulation/m_surface_tension.fpp | 8 ++--- src/simulation/m_viscous.fpp | 46 ++++++++++++------------ 8 files changed, 103 insertions(+), 103 deletions(-) diff --git a/src/simulation/m_cbc.fpp b/src/simulation/m_cbc.fpp index b0b86cc388..c8861c04ef 100644 --- a/src/simulation/m_cbc.fpp +++ b/src/simulation/m_cbc.fpp @@ -698,7 +698,7 @@ contains F_src_rs${XYZ}$_vf, & is1, is2, is3, idwbuff(2)%beg, idwbuff(3)%beg) - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do i = 1, flux_cbc_index do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -710,7 +710,7 @@ contains end do end do - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do i = advxb, advxe do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1181,7 +1181,7 @@ contains end do end do - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = 0, buff_size @@ -1205,7 +1205,7 @@ contains end do end do - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1228,7 +1228,7 @@ contains end do end do else - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1257,7 +1257,7 @@ contains end do end do - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = 0, buff_size @@ -1281,7 +1281,7 @@ contains end do end do - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1304,7 +1304,7 @@ contains end do end do else - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1333,7 +1333,7 @@ contains end do end do - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = 0, buff_size @@ -1357,7 +1357,7 @@ contains end do end do - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1380,7 +1380,7 @@ contains end do end do else - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1437,7 +1437,7 @@ contains end do end do end do - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1460,7 +1460,7 @@ contains end do end do else - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1489,7 +1489,7 @@ contains end do end do - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1512,7 +1512,7 @@ contains end do end do else - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1542,7 +1542,7 @@ contains end do end do - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1565,7 +1565,7 @@ contains end do end do else - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size diff --git a/src/simulation/m_fftw.fpp b/src/simulation/m_fftw.fpp index 3c18a8c1fe..14c87440db 100644 --- a/src/simulation/m_fftw.fpp +++ b/src/simulation/m_fftw.fpp @@ -139,7 +139,7 @@ contains if (bc_y%beg >= 0) return #if defined(MFC_OpenACC) - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do k = 1, sys_size do j = 0, m do l = 1, cmplx_size @@ -148,7 +148,7 @@ contains end do end do - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do k = 1, sys_size do j = 0, m do l = 0, p @@ -173,7 +173,7 @@ contains Nfq = 3 !$acc update device(Nfq) - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do k = 1, sys_size do j = 0, m do l = 1, Nfq @@ -191,7 +191,7 @@ contains #endif !$acc end host_data - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do k = 1, sys_size do j = 0, m do l = 0, p @@ -203,7 +203,7 @@ contains do i = 1, fourier_rings - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do k = 1, sys_size do j = 0, m do l = 1, cmplx_size @@ -233,7 +233,7 @@ contains Nfq = min(floor(2_dp*real(i, dp)*pi), cmplx_size) !$acc update device(Nfq) - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do k = 1, sys_size do j = 0, m do l = 1, Nfq diff --git a/src/simulation/m_hypoelastic.fpp b/src/simulation/m_hypoelastic.fpp index 5e082b4450..f785cbc355 100644 --- a/src/simulation/m_hypoelastic.fpp +++ b/src/simulation/m_hypoelastic.fpp @@ -104,7 +104,7 @@ contains ! calculate velocity gradients + rho_K and G_K ! TODO: re-organize these loops one by one for GPU efficiency if possible? - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -114,7 +114,7 @@ contains end do !$acc end parallel loop - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -130,7 +130,7 @@ contains !$acc end parallel loop if (ndirs > 1) then - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -140,7 +140,7 @@ contains end do !$acc end parallel loop - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -161,7 +161,7 @@ contains ! 3D if (ndirs == 3) then - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -172,7 +172,7 @@ contains end do !$acc end parallel loop - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -196,7 +196,7 @@ contains end if end if - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -220,7 +220,7 @@ contains end do ! apply rhs source term to elastic stress equation - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -234,7 +234,7 @@ contains end do elseif (idir == 2) then - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -269,7 +269,7 @@ contains end do elseif (idir == 3) then - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -337,7 +337,7 @@ contains if (cyl_coord .and. idir == 2) then - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -419,7 +419,7 @@ contains end do end do else - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do q = 0, p do l = 0, n do k = 0, m diff --git a/src/simulation/m_qbmm.fpp b/src/simulation/m_qbmm.fpp index 1c52cee2fa..e69c126e44 100644 --- a/src/simulation/m_qbmm.fpp +++ b/src/simulation/m_qbmm.fpp @@ -480,7 +480,7 @@ contains end do end if - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = 0, p do q = 0, n do i = 0, m diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index f534f2ca65..043ece5ca7 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -646,7 +646,7 @@ contains ! Converting Conservative to Primitive Variables if (mpp_lim .and. bubbles_euler) then - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end do j = idwbuff(1)%beg, idwbuff(1)%end @@ -867,7 +867,7 @@ contains ! END: Dimensional Splitting Loop if (ib) then - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -975,7 +975,7 @@ contains real(wp) :: advected_qty_val, pressure_val, velocity_val if (alt_soundspeed) then - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do q_loop = 0, p do l_loop = 0, n do k_loop = 0, m @@ -1432,7 +1432,7 @@ contains if (idir == 1) then ! x-direction if (surface_tension) then - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -1446,7 +1446,7 @@ contains end do end if - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -1464,7 +1464,7 @@ contains elseif (idir == 2) then ! y-direction if (surface_tension) then - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -1511,7 +1511,7 @@ contains end if - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = 0, p do k = 1, n do j = 0, m @@ -1527,7 +1527,7 @@ contains end do else - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -1548,7 +1548,7 @@ contains if (cyl_coord) then if ((bc_y%beg == BC_REFLECTIVE) .or. (bc_y%beg == BC_AXIS)) then - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = 0, p do k = 1, n do j = 0, m @@ -1578,7 +1578,7 @@ contains end if else - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -1599,7 +1599,7 @@ contains elseif (idir == 3) then ! z-direction if (surface_tension) then - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -1613,7 +1613,7 @@ contains end do end if - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -1629,7 +1629,7 @@ contains end do if (grid_geometry == 3) then - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = 0, p do k = 0, n do j = 0, m diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp index 52a4640524..1cc4651c72 100644 --- a/src/simulation/m_riemann_solvers.fpp +++ b/src/simulation/m_riemann_solvers.fpp @@ -3504,7 +3504,7 @@ contains if (norm_dir == 1) then if (bc_x%beg == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at beginning - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do i = 1, sys_size do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -3515,7 +3515,7 @@ contains end do if (viscous) then - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do k = isy%beg, isy%end @@ -3527,7 +3527,7 @@ contains end do if (n > 0) then - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do k = isy%beg, isy%end @@ -3539,7 +3539,7 @@ contains end do if (p > 0) then - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do k = isy%beg, isy%end @@ -3559,7 +3559,7 @@ contains if (bc_x%end == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at end - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do i = 1, sys_size do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -3571,7 +3571,7 @@ contains if (viscous) then - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do k = isy%beg, isy%end @@ -3583,7 +3583,7 @@ contains end do if (n > 0) then - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do k = isy%beg, isy%end @@ -3595,7 +3595,7 @@ contains end do if (p > 0) then - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do k = isy%beg, isy%end @@ -3618,7 +3618,7 @@ contains elseif (norm_dir == 2) then if (bc_y%beg == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at beginning - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do i = 1, sys_size do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -3630,7 +3630,7 @@ contains if (viscous) then - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do j = isx%beg, isx%end @@ -3640,7 +3640,7 @@ contains end do end do - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do j = isx%beg, isx%end @@ -3651,7 +3651,7 @@ contains end do if (p > 0) then - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do j = isx%beg, isx%end @@ -3668,7 +3668,7 @@ contains if (bc_y%end == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at end - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do i = 1, sys_size do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -3680,7 +3680,7 @@ contains if (viscous) then - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do j = isx%beg, isx%end @@ -3690,7 +3690,7 @@ contains end do end do - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do j = isx%beg, isx%end @@ -3701,7 +3701,7 @@ contains end do if (p > 0) then - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do j = isx%beg, isx%end @@ -3721,7 +3721,7 @@ contains else if (bc_z%beg == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at beginning - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do i = 1, sys_size do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -3732,7 +3732,7 @@ contains end do if (viscous) then - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do i = momxb, momxe do k = isy%beg, isy%end do j = isx%beg, isx%end @@ -3741,7 +3741,7 @@ contains end do end do end do - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do i = momxb, momxe do k = isy%beg, isy%end do j = isx%beg, isx%end @@ -3750,7 +3750,7 @@ contains end do end do end do - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do i = momxb, momxe do k = isy%beg, isy%end do j = isx%beg, isx%end @@ -3765,7 +3765,7 @@ contains if (bc_z%end == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at end - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do i = 1, sys_size do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -3776,7 +3776,7 @@ contains end do if (viscous) then - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do i = momxb, momxe do k = isy%beg, isy%end do j = isx%beg, isx%end @@ -3786,7 +3786,7 @@ contains end do end do - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do i = momxb, momxe do k = isy%beg, isy%end do j = isx%beg, isx%end @@ -3796,7 +3796,7 @@ contains end do end do - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do i = momxb, momxe do k = isy%beg, isy%end do j = isx%beg, isx%end @@ -4357,7 +4357,7 @@ contains end do end if - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = is3%beg, is3%end do j = is1%beg, is1%end do k = is2%beg, is2%end @@ -4410,7 +4410,7 @@ contains end do end if - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do j = is1%beg, is1%end do k = is2%beg, is2%end do l = is3%beg, is3%end @@ -4447,7 +4447,7 @@ contains end do end do - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = is3%beg, is3%end do k = is2%beg, is2%end do j = is1%beg, is1%end diff --git a/src/simulation/m_surface_tension.fpp b/src/simulation/m_surface_tension.fpp index 8dac00ee39..5fe98868fd 100644 --- a/src/simulation/m_surface_tension.fpp +++ b/src/simulation/m_surface_tension.fpp @@ -241,7 +241,7 @@ contains isx%end = m; isy%end = n; isz%end = p ! compute gradient components - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -251,7 +251,7 @@ contains end do end do - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -262,7 +262,7 @@ contains end do if (p > 0) then - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -273,7 +273,7 @@ contains end do end if - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = 0, p do k = 0, n do j = 0, m diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp index e790651ad9..1db8b74d09 100644 --- a/src/simulation/m_viscous.fpp +++ b/src/simulation/m_viscous.fpp @@ -77,7 +77,7 @@ contains !$acc update device(is1_viscous, is2_viscous, is3_viscous) - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg, is1_viscous%end @@ -589,7 +589,7 @@ contains !$acc update device(is1_viscous, is2_viscous, is3_viscous) - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = iy%beg, iy%end do j = is1_viscous%beg + 1, is1_viscous%end @@ -604,7 +604,7 @@ contains end do end do - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg, is1_viscous%end - 1 @@ -621,7 +621,7 @@ contains if (n > 0) then - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = is3_viscous%beg, is3_viscous%end do j = is2_viscous%beg + 1, is2_viscous%end do k = is1_viscous%beg, is1_viscous%end @@ -636,7 +636,7 @@ contains end do end do - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = is3_viscous%beg, is3_viscous%end do j = is2_viscous%beg, is2_viscous%end - 1 do k = is1_viscous%beg, is1_viscous%end @@ -651,7 +651,7 @@ contains end do end do - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = is3_viscous%beg, is3_viscous%end do j = is2_viscous%beg + 1, is2_viscous%end do k = is1_viscous%beg + 1, is1_viscous%end - 1 @@ -670,7 +670,7 @@ contains end do end do - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = is3_viscous%beg, is3_viscous%end do j = is2_viscous%beg, is2_viscous%end - 1 do k = is1_viscous%beg + 1, is1_viscous%end - 1 @@ -690,7 +690,7 @@ contains end do end do - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg + 1, is2_viscous%end - 1 do j = is1_viscous%beg + 1, is1_viscous%end @@ -710,7 +710,7 @@ contains end do end do - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg + 1, is2_viscous%end - 1 do j = is1_viscous%beg, is1_viscous%end - 1 @@ -732,7 +732,7 @@ contains if (p > 0) then - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do j = is3_viscous%beg + 1, is3_viscous%end do l = is2_viscous%beg, is2_viscous%end do k = is1_viscous%beg, is1_viscous%end @@ -748,7 +748,7 @@ contains end do end do - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do j = is3_viscous%beg, is3_viscous%end - 1 do l = is2_viscous%beg, is2_viscous%end do k = is1_viscous%beg, is1_viscous%end @@ -764,7 +764,7 @@ contains end do end do - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = is3_viscous%beg + 1, is3_viscous%end - 1 do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg + 1, is1_viscous%end @@ -785,7 +785,7 @@ contains end do end do - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = is3_viscous%beg + 1, is3_viscous%end - 1 do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg, is1_viscous%end - 1 @@ -806,7 +806,7 @@ contains end do end do - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = is3_viscous%beg + 1, is3_viscous%end - 1 do j = is2_viscous%beg + 1, is2_viscous%end do k = is1_viscous%beg, is1_viscous%end @@ -827,7 +827,7 @@ contains end do end do - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = is3_viscous%beg + 1, is3_viscous%end - 1 do j = is2_viscous%beg, is2_viscous%end - 1 do k = is1_viscous%beg, is1_viscous%end @@ -848,7 +848,7 @@ contains end do end do - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do j = is3_viscous%beg + 1, is3_viscous%end do l = is2_viscous%beg + 1, is2_viscous%end - 1 do k = is1_viscous%beg, is1_viscous%end @@ -869,7 +869,7 @@ contains end do end do - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do j = is3_viscous%beg, is3_viscous%end - 1 do l = is2_viscous%beg + 1, is2_viscous%end - 1 do k = is1_viscous%beg, is1_viscous%end @@ -889,7 +889,7 @@ contains end do end do end do - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do j = is3_viscous%beg + 1, is3_viscous%end do l = is2_viscous%beg, is2_viscous%end do k = is1_viscous%beg + 1, is1_viscous%end - 1 @@ -909,7 +909,7 @@ contains end do end do end do - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do j = is3_viscous%beg, is3_viscous%end - 1 do l = is2_viscous%beg, is2_viscous%end do k = is1_viscous%beg + 1, is1_viscous%end - 1 @@ -1206,7 +1206,7 @@ contains ! cell-boundaries, to calculate the cell-averaged first-order ! spatial derivatives inside the cell. - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg + 1, is1_viscous%end - 1 @@ -1234,7 +1234,7 @@ contains ! cell-boundaries, to calculate the cell-averaged first-order ! spatial derivatives inside the cell. - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg + 1, is2_viscous%end - 1 do j = is1_viscous%beg, is1_viscous%end @@ -1262,7 +1262,7 @@ contains ! cell-boundaries, to calculate the cell-averaged first-order ! spatial derivatives inside the cell. - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = is3_viscous%beg + 1, is3_viscous%end - 1 do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg, is1_viscous%end @@ -1317,7 +1317,7 @@ contains !$acc update device(is1_viscous, is2_viscous, is3_viscous) - !$acc parallel loop collapse(3) gang vector default(present) + $:parallel_loop(collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg, is1_viscous%end From 76f79e069240005e6b7a86295809d80f6facc94f Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Tue, 3 Jun 2025 20:55:46 -0400 Subject: [PATCH 06/75] Replaced more directives --- src/simulation/m_body_forces.fpp | 2 +- src/simulation/m_bubbles_EL.fpp | 6 ++--- src/simulation/m_cbc.fpp | 34 ++++++++++++++-------------- src/simulation/m_rhs.fpp | 6 ++--- src/simulation/m_riemann_solvers.fpp | 28 +++++++++++------------ src/simulation/m_time_steppers.fpp | 14 ++++++------ src/simulation/m_viscous.fpp | 12 +++++----- src/simulation/m_weno.fpp | 6 ++--- 8 files changed, 54 insertions(+), 54 deletions(-) diff --git a/src/simulation/m_body_forces.fpp b/src/simulation/m_body_forces.fpp index 7813017011..0bbe1ec1b9 100644 --- a/src/simulation/m_body_forces.fpp +++ b/src/simulation/m_body_forces.fpp @@ -109,7 +109,7 @@ contains call s_compute_acceleration(mytime) call s_compute_mixture_density(q_cons_vf) - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = momxb, E_idx do l = 0, p do k = 0, n diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index a408c11513..2e9aec51bf 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -644,7 +644,7 @@ contains if (lag_params%solver_approach == 2) then if (p == 0) then - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do k = 0, p do j = 0, n do i = 0, m @@ -660,7 +660,7 @@ contains end do end do else - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do k = 0, p do j = 0, n do i = 0, m @@ -766,7 +766,7 @@ contains call nvtxStartRange("BUBBLES-LAGRANGE-KERNELS") - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = 1, q_beta_idx do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end diff --git a/src/simulation/m_cbc.fpp b/src/simulation/m_cbc.fpp index c8861c04ef..03e3d2d91e 100644 --- a/src/simulation/m_cbc.fpp +++ b/src/simulation/m_cbc.fpp @@ -729,7 +729,7 @@ contains F_src_rs${XYZ}$_vf, & is1, is2, is3, idwbuff(2)%beg, idwbuff(3)%beg) - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = 1, flux_cbc_index do j = 0, 1 do r = is3%beg, is3%end @@ -749,7 +749,7 @@ contains end do end do - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = advxb, advxe do j = 0, 1 do r = is3%beg, is3%end @@ -1169,7 +1169,7 @@ contains ! Reshaping Inputted Data in x-direction if (cbc_dir == 1) then - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = 1, sys_size do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1192,7 +1192,7 @@ contains end do end do - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = 1, flux_cbc_index do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1216,7 +1216,7 @@ contains end do if (riemann_solver == 1) then - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = advxb, advxe do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1245,7 +1245,7 @@ contains ! Reshaping Inputted Data in y-direction elseif (cbc_dir == 2) then - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = 1, sys_size do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1268,7 +1268,7 @@ contains end do end do - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = 1, flux_cbc_index do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1292,7 +1292,7 @@ contains end do if (riemann_solver == 1) then - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = advxb, advxe do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1321,7 +1321,7 @@ contains ! Reshaping Inputted Data in z-direction else - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = 1, sys_size do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1344,7 +1344,7 @@ contains end do end do - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = 1, flux_cbc_index do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1368,7 +1368,7 @@ contains end do if (riemann_solver == 1) then - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = advxb, advxe do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1425,7 +1425,7 @@ contains ! Reshaping Outputted Data in x-direction if (cbc_dir == 1) then - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = 1, flux_cbc_index do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1448,7 +1448,7 @@ contains end do if (riemann_solver == 1) then - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = advxb, advxe do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1476,7 +1476,7 @@ contains ! Reshaping Outputted Data in y-direction elseif (cbc_dir == 2) then - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = 1, flux_cbc_index do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1500,7 +1500,7 @@ contains end do if (riemann_solver == 1) then - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = advxb, advxe do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1529,7 +1529,7 @@ contains ! Reshaping Outputted Data in z-direction else - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = 1, flux_cbc_index do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1553,7 +1553,7 @@ contains end do if (riemann_solver == 1) then - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = advxb, advxe do r = is3%beg, is3%end do k = is2%beg, is2%end diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index 043ece5ca7..e5e8ba9f39 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -592,7 +592,7 @@ contains !$acc update device(Res, Re_idx, Re_size) end if - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do id = 1, num_dims do i = 1, sys_size do l = idwbuff(3)%beg, idwbuff(3)%end @@ -632,7 +632,7 @@ contains call cpu_time(t_start) ! Association/Population of Working Variables - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = 1, sys_size do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end @@ -935,7 +935,7 @@ contains ! END: Additional physics and source terms if (run_time_info .or. probe_wrt .or. ib .or. bubbles_lagrange) then - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = 1, sys_size do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp index 1cc4651c72..a0c4cd50cd 100644 --- a/src/simulation/m_riemann_solvers.fpp +++ b/src/simulation/m_riemann_solvers.fpp @@ -3852,7 +3852,7 @@ contains if (viscous .or. (surface_tension)) then - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = momxb, E_idx do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -3866,7 +3866,7 @@ contains if (qbmm) then - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = 1, 4 do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -3882,7 +3882,7 @@ contains elseif (norm_dir == 2) then if (viscous .or. (surface_tension)) then - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = momxb, E_idx do l = is3%beg, is3%end do j = is1%beg, is1%end @@ -3895,7 +3895,7 @@ contains end if if (qbmm) then - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = 1, 4 do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -3911,7 +3911,7 @@ contains else if (viscous .or. (surface_tension)) then - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = momxb, E_idx do j = is1%beg, is1%end do k = is2%beg, is2%end @@ -3924,7 +3924,7 @@ contains end if if (qbmm) then - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = 1, 4 do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -4331,7 +4331,7 @@ contains ! Reshaping Outputted Data in y-direction if (norm_dir == 2) then - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = 1, sys_size do l = is3%beg, is3%end do j = is1%beg, is1%end @@ -4344,7 +4344,7 @@ contains end do if (cyl_coord) then - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = 1, sys_size do l = is3%beg, is3%end do j = is1%beg, is1%end @@ -4368,7 +4368,7 @@ contains end do if (riemann_solver == 1 .or. riemann_solver == 4) then - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = advxb + 1, advxe do l = is3%beg, is3%end do j = is1%beg, is1%end @@ -4383,7 +4383,7 @@ contains end if ! Reshaping Outputted Data in z-direction elseif (norm_dir == 3) then - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = 1, sys_size do j = is1%beg, is1%end do k = is2%beg, is2%end @@ -4396,7 +4396,7 @@ contains end do end do if (grid_geometry == 3) then - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = 1, sys_size do j = is1%beg, is1%end do k = is2%beg, is2%end @@ -4421,7 +4421,7 @@ contains end do if (riemann_solver == 1 .or. riemann_solver == 4) then - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = advxb + 1, advxe do j = is1%beg, is1%end do k = is2%beg, is2%end @@ -4435,7 +4435,7 @@ contains end if elseif (norm_dir == 1) then - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = 1, sys_size do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -4458,7 +4458,7 @@ contains end do if (riemann_solver == 1 .or. riemann_solver == 4) then - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = advxb + 1, advxe do l = is3%beg, is3%end do k = is2%beg, is2%end diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 7dd75f7486..21748e5aad 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -377,7 +377,7 @@ contains if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=1) - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = 1, sys_size do l = 0, p do k = 0, n @@ -479,7 +479,7 @@ contains if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=1) - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = 1, sys_size do l = 0, p do k = 0, n @@ -551,7 +551,7 @@ contains if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=2) - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = 1, sys_size do l = 0, p do k = 0, n @@ -661,7 +661,7 @@ contains if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=1) - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = 1, sys_size do l = 0, p do k = 0, n @@ -733,7 +733,7 @@ contains if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=2) - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = 1, sys_size do l = 0, p do k = 0, n @@ -806,7 +806,7 @@ contains if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=3) - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = 1, sys_size do l = 0, p do k = 0, n @@ -1024,7 +1024,7 @@ contains call nvtxStartRange("RHS-BODYFORCES") call s_compute_body_forces_rhs(q_prim_vf, q_cons_vf, rhs_vf) - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = momxb, E_idx do l = 0, p do k = 0, n diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp index 1db8b74d09..6a84cd63ac 100644 --- a/src/simulation/m_viscous.fpp +++ b/src/simulation/m_viscous.fpp @@ -1019,7 +1019,7 @@ contains if (viscous) then if (weno_Re_flux) then if (norm_dir == 2) then - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = iv%beg, iv%end do l = is3_viscous%beg, is3_viscous%end do j = is1_viscous%beg, is1_viscous%end @@ -1031,7 +1031,7 @@ contains end do end do elseif (norm_dir == 3) then - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = iv%beg, iv%end do j = is1_viscous%beg, is1_viscous%end do k = is2_viscous%beg, is2_viscous%end @@ -1043,7 +1043,7 @@ contains end do end do elseif (norm_dir == 1) then - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = iv%beg, iv%end do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg, is2_viscous%end @@ -1118,7 +1118,7 @@ contains if (viscous) then if (weno_Re_flux) then if (norm_dir == 2) then - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = iv%beg, iv%end do l = is3_viscous%beg, is3_viscous%end do j = is1_viscous%beg, is1_viscous%end @@ -1130,7 +1130,7 @@ contains end do end do elseif (norm_dir == 3) then - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = iv%beg, iv%end do j = is1_viscous%beg, is1_viscous%end do k = is2_viscous%beg, is2_viscous%end @@ -1142,7 +1142,7 @@ contains end do end do elseif (norm_dir == 1) then - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do i = iv%beg, iv%end do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg, is2_viscous%end diff --git a/src/simulation/m_weno.fpp b/src/simulation/m_weno.fpp index 39e0b281a8..e390192f6b 100644 --- a/src/simulation/m_weno.fpp +++ b/src/simulation/m_weno.fpp @@ -1138,7 +1138,7 @@ contains !$acc update device(v_size) if (weno_dir == 1) then - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do j = 1, v_size do q = is3_weno%beg, is3_weno%end do l = is2_weno%beg, is2_weno%end @@ -1176,7 +1176,7 @@ contains end if else #endif - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do j = 1, v_size do q = is3_weno%beg, is3_weno%end do l = is2_weno%beg, is2_weno%end @@ -1206,7 +1206,7 @@ contains end block else #endif - !$acc parallel loop collapse(4) gang vector default(present) + $:parallel_loop(collapse=4) do j = 1, v_size do q = is3_weno%beg, is3_weno%end do l = is2_weno%beg, is2_weno%end From aa9b5c68bee62a7b53c6e4eeb55ed9c16776cc76 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Wed, 4 Jun 2025 12:11:48 -0400 Subject: [PATCH 07/75] More directives replaced and formatter --- src/common/include/macros.fpp | 3 +-- src/common/m_phase_change.fpp | 6 ++--- src/common/m_variables_conversion.fpp | 7 +++--- src/simulation/m_bubbles_EE.fpp | 4 +-- src/simulation/m_bubbles_EL.fpp | 4 +-- src/simulation/m_cbc.fpp | 8 +++--- src/simulation/m_data_output.fpp | 2 +- src/simulation/m_fftw.fpp | 4 +-- src/simulation/m_hyperelastic.fpp | 4 +-- src/simulation/m_mhd.fpp | 3 +-- src/simulation/m_mpi_proxy.fpp | 36 +++++++++++++-------------- src/simulation/m_qbmm.fpp | 5 +++- src/simulation/m_rhs.fpp | 35 +++++++++++++++++--------- src/simulation/m_time_steppers.fpp | 24 +++++++++--------- 14 files changed, 77 insertions(+), 68 deletions(-) diff --git a/src/common/include/macros.fpp b/src/common/include/macros.fpp index ee90eee5b6..70ee3deb93 100644 --- a/src/common/include/macros.fpp +++ b/src/common/include/macros.fpp @@ -184,7 +184,7 @@ #:else #:set copy_val = "" #:endif - + #:if copyin is not None #:assert isinstance(copyin, list) #:assert len(copyin) != 0 @@ -219,5 +219,4 @@ #:set clause_val = collapse_val + parallelism_val + default_val + private_val + firstprivate_val + reduction_val + copy_val + copyin_val + copyout_val + create_val !$acc parallel loop ${clause_val}$ - #:enddef diff --git a/src/common/m_phase_change.fpp b/src/common/m_phase_change.fpp index 28439680a3..5522484167 100644 --- a/src/common/m_phase_change.fpp +++ b/src/common/m_phase_change.fpp @@ -98,9 +98,9 @@ contains ! starting equilibrium solver $:parallel_loop(collapse=3, private=["p_infOV", "p_infpT", "p_infSL", & - "sk", "hk", "gk", "ek", "rhok", "pS", "pSOV", "pSSL", & - "TS", "TSOV", "TSatOV", "TSatSL", "TSSL", "rhoe", & - "dynE", "rhos", "rho", "rM", "m1", "m2", "MCT", "TvF"]) + "sk", "hk", "gk", "ek", "rhok", "pS", "pSOV", "pSSL", & + "TS", "TSOV", "TSatOV", "TSatSL", "TSSL", "rhoe", & + "dynE", "rhos", "rho", "rM", "m1", "m2", "MCT", "TvF"]) do j = 0, m do k = 0, n do l = 0, p diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp index f0c4678f8f..433616bc95 100644 --- a/src/common/m_variables_conversion.fpp +++ b/src/common/m_variables_conversion.fpp @@ -883,10 +883,9 @@ contains end if #:endif - $:parallel_loop(collapse=3, private=["alpha_K", "alpha_rho_K", "Re_K", & - "nRtmp", "rho_K", "gamma_K", "pi_inf_K", "qv_K", & - "dyn_pres_K", "rhoYks", "B"]) + "nRtmp", "rho_K", "gamma_K", "pi_inf_K", "qv_K", & + "dyn_pres_K", "rhoYks", "B"]) do l = ibounds(3)%beg, ibounds(3)%end do k = ibounds(2)%beg, ibounds(2)%end do j = ibounds(1)%beg, ibounds(1)%end @@ -1482,7 +1481,7 @@ contains ! accounting for the contribution of either viscosity or capillarity #ifdef MFC_SIMULATION $:parallel_loop(collapse=3, private=["alpha_rho_K", "vel_K", & - "alpha_K", "Re_K", "Y_K"]) + "alpha_K", "Re_K", "Y_K"]) do l = is3b, is3e do k = is2b, is2e do j = is1b, is1e diff --git a/src/simulation/m_bubbles_EE.fpp b/src/simulation/m_bubbles_EE.fpp index 7730d60f39..13371d9406 100644 --- a/src/simulation/m_bubbles_EE.fpp +++ b/src/simulation/m_bubbles_EE.fpp @@ -192,8 +192,8 @@ contains adap_dt_stop_max = 0 $:parallel_loop(collapse=3, private=["Rtmp", "Vtmp", "myalpha_rho", "myalpha"], & - reduction=["adap_dt_stop_max"], reductionOp="MAX", & - copy=["adap_dt_stop_max"]) + reduction=["adap_dt_stop_max"], reductionOp="MAX", & + copy=["adap_dt_stop_max"]) do l = 0, p do k = 0, n do j = 0, m diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index 2e9aec51bf..8567ec97ae 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -1414,8 +1414,8 @@ contains lag_void_avg = 0._wp lag_vol = 0._wp $:parallel_loop(collapse=3, reduction=[["lag_vol", "lag_void_avg"], & - ["lag_void_max"]], reductionOp=["+", "MAX"], & - copy=["lag_vol", "lag_void_avg", "lag_void_max"]) + ["lag_void_max"]], reductionOp=["+", "MAX"], & + copy=["lag_vol", "lag_void_avg", "lag_void_max"]) do k = 0, p do j = 0, n do i = 0, m diff --git a/src/simulation/m_cbc.fpp b/src/simulation/m_cbc.fpp index 03e3d2d91e..67cdcc13fc 100644 --- a/src/simulation/m_cbc.fpp +++ b/src/simulation/m_cbc.fpp @@ -773,11 +773,9 @@ contains ! FD2 or FD4 of RHS at j = 0 $:parallel_loop(collapse=2, private=["alpha_rho", "vel", "adv", & - "mf", "dvel_ds", "dadv_ds", "Re_cbc", & - "dalpha_rho_ds","dvel_dt", "dadv_dt", & - "dalpha_rho_dt", "L", "lambda", "Ys", & - "dYs_dt", "dYs_ds", "h_k", "Cp_i", & - "Gamma_i", "Xs"]) + "mf", "dvel_ds", "dadv_ds", "Re_cbc", "dalpha_rho_ds","dvel_dt", & + "dadv_dt", "dalpha_rho_dt", "L", "lambda", "Ys", "dYs_dt", & + "dYs_ds", "h_k", "Cp_i", "Gamma_i", "Xs"]) do r = is3%beg, is3%end do k = is2%beg, is2%end diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp index b66e13d9c2..b778a0b4a2 100644 --- a/src/simulation/m_data_output.fpp +++ b/src/simulation/m_data_output.fpp @@ -277,7 +277,7 @@ contains integer :: j, k, l ! Computing Stability Criteria at Current Time-step - !$acc parallel loop collapse(3) gang vector default(present) private(vel, alpha, Re) + $:parallel_loop(collapse=3, private=["vel", "alpha", "Re"]) do l = 0, p do k = 0, n do j = 0, m diff --git a/src/simulation/m_fftw.fpp b/src/simulation/m_fftw.fpp index 14c87440db..e3476b4471 100644 --- a/src/simulation/m_fftw.fpp +++ b/src/simulation/m_fftw.fpp @@ -212,7 +212,7 @@ contains end do end do - !$acc parallel loop collapse(3) gang vector default(present) firstprivate(i) + $:parallel_loop(collapse=3, firstprivate=["i"]) do k = 1, sys_size do j = 0, m do l = 0, p @@ -251,7 +251,7 @@ contains #endif !$acc end host_data - !$acc parallel loop collapse(3) gang vector default(present) firstprivate(i) + $:parallel_loop(collapse=3, firstprivate=["i"]) do k = 1, sys_size do j = 0, m do l = 0, p diff --git a/src/simulation/m_hyperelastic.fpp b/src/simulation/m_hyperelastic.fpp index be79e98232..ada0069746 100644 --- a/src/simulation/m_hyperelastic.fpp +++ b/src/simulation/m_hyperelastic.fpp @@ -106,8 +106,8 @@ contains real(wp) :: G integer :: j, k, l, i, r - !$acc parallel loop collapse(3) gang vector default(present) private(alpha_K, alpha_rho_K, & - !$acc rho, gamma, pi_inf, qv, G, Re, tensora, tensorb) + $:parallel_loop(collapse=3, private=["alpha_K", "alpha_rho_K", "rho", & + "gamma", "pi_inf", "qv", "G", "Re", "tensora", "tensorb"]) do l = 0, p do k = 0, n do j = 0, m diff --git a/src/simulation/m_mhd.fpp b/src/simulation/m_mhd.fpp index f5730b513f..a9d89f8df8 100644 --- a/src/simulation/m_mhd.fpp +++ b/src/simulation/m_mhd.fpp @@ -76,8 +76,7 @@ contains real(wp), dimension(3) :: v, B real(wp) :: divB, vdotB - !$acc parallel loop collapse(3) gang vector default(present) & - !$acc private(v, B) + $:parallel_loop(collapse=3, private=["v", "B"]) do q = 0, p do l = 0, n do k = 0, m diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index f931227f4d..43f993e35c 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -798,7 +798,7 @@ contains if (bc_x%end >= 0) then ! PBC at the beginning and end ! Packing buffer to be sent to bc_x%end - !$acc parallel loop collapse(3) gang vector default(present) private(r) + $:parallel_loop(collapse=3, private=["r"]) do l = 0, p do k = 0, n do j = m - gp_layers + 1, m @@ -853,7 +853,7 @@ contains else ! PBC at the beginning only ! Packing buffer to be sent to bc_x%beg - !$acc parallel loop collapse(3) gang vector default(present) private(r) + $:parallel_loop(collapse=3, private=["r"]) do l = 0, p do k = 0, n do j = 0, gp_layers - 1 @@ -913,7 +913,7 @@ contains #endif ! Unpacking buffer received from bc_x%beg - !$acc parallel loop collapse(3) gang vector default(present) private(r) + $:parallel_loop(collapse=3, private=["r"]) do l = 0, p do k = 0, n do j = -gp_layers, -1 @@ -929,7 +929,7 @@ contains if (bc_x%beg >= 0) then ! PBC at the end and beginning - !$acc parallel loop collapse(3) gang vector default(present) private(r) + $:parallel_loop(collapse=3, private=["r"]) ! Packing buffer to be sent to bc_x%beg do l = 0, p do k = 0, n @@ -983,7 +983,7 @@ contains else ! PBC at the end only ! Packing buffer to be sent to bc_x%end - !$acc parallel loop collapse(3) gang vector default(present) private(r) + $:parallel_loop(collapse=3, private=["r"]) do l = 0, p do k = 0, n do j = m - gp_layers + 1, m @@ -1041,7 +1041,7 @@ contains end if ! Unpacking buffer received from bc_x%end - !$acc parallel loop collapse(3) gang vector default(present) private(r) + $:parallel_loop(collapse=3, private=["r"]) do l = 0, p do k = 0, n do j = m + 1, m + gp_layers @@ -1061,7 +1061,7 @@ contains if (bc_y%end >= 0) then ! PBC at the beginning and end ! Packing buffer to be sent to bc_y%end - !$acc parallel loop collapse(3) gang vector default(present) private(r) + $:parallel_loop(collapse=3, private=["r"]) do l = 0, p do k = n - gp_layers + 1, n do j = -gp_layers, m + gp_layers @@ -1117,7 +1117,7 @@ contains else ! PBC at the beginning only ! Packing buffer to be sent to bc_y%beg - !$acc parallel loop collapse(3) gang vector default(present) private(r) + $:parallel_loop(collapse=3, private=["r"]) do l = 0, p do k = 0, gp_layers - 1 do j = -gp_layers, m + gp_layers @@ -1179,7 +1179,7 @@ contains #endif ! Unpacking buffer received from bc_y%beg - !$acc parallel loop collapse(3) gang vector default(present) private(r) + $:parallel_loop(collapse=3, private=["r"]) do l = 0, p do k = -gp_layers, -1 do j = -gp_layers, m + gp_layers @@ -1197,7 +1197,7 @@ contains if (bc_y%beg >= 0) then ! PBC at the end and beginning ! Packing buffer to be sent to bc_y%beg - !$acc parallel loop collapse(3) gang vector default(present) private(r) + $:parallel_loop(collapse=3, private=["r"]) do l = 0, p do k = 0, gp_layers - 1 do j = -gp_layers, m + gp_layers @@ -1253,7 +1253,7 @@ contains else ! PBC at the end only ! Packing buffer to be sent to bc_y%end - !$acc parallel loop collapse(3) gang vector default(present) private(r) + $:parallel_loop(collapse=3, private=["r"]) do l = 0, p do k = n - gp_layers + 1, n do j = -gp_layers, m + gp_layers @@ -1315,7 +1315,7 @@ contains #endif ! Unpacking buffer received form bc_y%end - !$acc parallel loop collapse(3) gang vector default(present) private(r) + $:parallel_loop(collapse=3, private=["r"]) do l = 0, p do k = n + 1, n + gp_layers do j = -gp_layers, m + gp_layers @@ -1335,7 +1335,7 @@ contains if (bc_z%end >= 0) then ! PBC at the beginning and end ! Packing buffer to be sent to bc_z%end - !$acc parallel loop collapse(3) gang vector default(present) private(r) + $:parallel_loop(collapse=3, private=["r"]) do l = p - gp_layers + 1, p do k = -gp_layers, n + gp_layers do j = -gp_layers, m + gp_layers @@ -1392,7 +1392,7 @@ contains else ! PBC at the beginning only ! Packing buffer to be sent to bc_z%beg - !$acc parallel loop collapse(3) gang vector default(present) private(r) + $:parallel_loop(collapse=3, private=["r"]) do l = 0, gp_layers - 1 do k = -gp_layers, n + gp_layers do j = -gp_layers, m + gp_layers @@ -1454,7 +1454,7 @@ contains #endif ! Unpacking buffer from bc_z%beg - !$acc parallel loop collapse(3) gang vector default(present) private(r) + $:parallel_loop(collapse=3, private=["r"]) do l = -gp_layers, -1 do k = -gp_layers, n + gp_layers do j = -gp_layers, m + gp_layers @@ -1473,7 +1473,7 @@ contains if (bc_z%beg >= 0) then ! PBC at the end and beginning ! Packing buffer to be sent to bc_z%beg - !$acc parallel loop collapse(3) gang vector default(present) private(r) + $:parallel_loop(collapse=3, private=["r"]) do l = 0, gp_layers - 1 do k = -gp_layers, n + gp_layers do j = -gp_layers, m + gp_layers @@ -1528,7 +1528,7 @@ contains else ! PBC at the end only ! Packing buffer to be sent to bc_z%end - !$acc parallel loop collapse(3) gang vector default(present) private(r) + $:parallel_loop(collapse=3, private=["r"]) do l = p - gp_layers + 1, p do k = -gp_layers, n + gp_layers do j = -gp_layers, m + gp_layers @@ -1590,7 +1590,7 @@ contains #endif ! Unpacking buffer received from bc_z%end - !$acc parallel loop collapse(3) gang vector default(present) private(r) + $:parallel_loop(collapse=3, private=["r"]) do l = p + 1, p + gp_layers do k = -gp_layers, n + gp_layers do j = -gp_layers, m + gp_layers diff --git a/src/simulation/m_qbmm.fpp b/src/simulation/m_qbmm.fpp index e69c126e44..1b5e99985b 100644 --- a/src/simulation/m_qbmm.fpp +++ b/src/simulation/m_qbmm.fpp @@ -839,7 +839,10 @@ contains !$acc update device(is1_qbmm, is2_qbmm, is3_qbmm) - !$acc parallel loop collapse(3) gang vector default(present) private(moms, msum, wght, abscX, abscY, wght_pb, wght_mv, wght_ht, coeff, ht, r, q, n_tait, B_tait, pres, rho, nbub, c, alf, momsum, drdt, drdt2, chi_vw, x_vw, rho_mw, k_mw, T_bar, grad_T) + $:parallel_loop(collapse=3, private=["moms", "msum", "wght", "abscX", & + "abscY", "wght_pb", "wght_mv", "wght_ht", "coeff", "ht", "r", "q", & + "n_tait", "B_tait", "pres", "rho", "nbub", "c", "alf", "momsum", & + "drdt", "drdt2", "chi_vw", "x_vw", "rho_mw", "k_mw", "T_bar", "grad_T"]) do id3 = is3_qbmm%beg, is3_qbmm%end do id2 = is2_qbmm%beg, is2_qbmm%end do id1 = is1_qbmm%beg, is1_qbmm%end diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index e5e8ba9f39..bdefed9491 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -1231,8 +1231,9 @@ contains else ! Other Riemann solvers if (alt_soundspeed) then if (bubbles_euler .neqv. .true.) then - !$acc parallel loop collapse(3) gang vector default(present) & - !$acc private(local_inv_ds, local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2) + $:parallel_loop(collapse=3, private=["local_inv_ds", & + "local_q_cons_val", "local_k_term_val", & + "local_term_coeff", "local_flux1", "local_flux2"]) do q_idx = 0, p; do l_idx = 0, n; do k_idx = 0, m local_inv_ds = 1._wp/dx(k_idx) local_q_cons_val = q_cons_vf_arg%vf(advxe)%sf(k_idx, l_idx, q_idx) @@ -1244,8 +1245,10 @@ contains local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) end do; end do; end do - !$acc parallel loop collapse(3) gang vector default(present) & - !$acc private(local_inv_ds, local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2) + $:parallel_loop(collapse=3, private=["local_inv_ds",& + "local_q_cons_val", "local_k_term_val", & + "local_term_coeff", "local_flux1", & + "local_flux2"]) do q_idx = 0, p; do l_idx = 0, n; do k_idx = 0, m local_inv_ds = 1._wp/dx(k_idx) local_q_cons_val = q_cons_vf_arg%vf(advxb)%sf(k_idx, l_idx, q_idx) @@ -1295,8 +1298,10 @@ contains else ! Other Riemann solvers if (alt_soundspeed) then if (bubbles_euler .neqv. .true.) then - !$acc parallel loop collapse(3) gang vector default(present) & - !$acc private(local_inv_ds, local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2) + $:parallel_loop(collapse=3, private=["local_inv_ds", & + "local_q_cons_val", "local_k_term_val", & + "local_term_coeff", "local_flux1", & + "local_flux2"]) do l_idx = 0, p; do k_idx = 0, n; do q_idx = 0, m local_inv_ds = 1._wp/dy(k_idx) local_q_cons_val = q_cons_vf_arg%vf(advxe)%sf(q_idx, k_idx, l_idx) @@ -1312,8 +1317,10 @@ contains end if end do; end do; end do - !$acc parallel loop collapse(3) gang vector default(present) & - !$acc private(local_inv_ds, local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2) + $:parallel_loop(collapse=3, private=["local_inv_ds", & + "local_q_cons_val", "local_k_term_val", & + "local_term_coeff", "local_flux1", & + "local_flux2"]) do l_idx = 0, p; do k_idx = 0, n; do q_idx = 0, m local_inv_ds = 1._wp/dy(k_idx) local_q_cons_val = q_cons_vf_arg%vf(advxb)%sf(q_idx, k_idx, l_idx) @@ -1372,8 +1379,10 @@ contains else ! Other Riemann solvers if (alt_soundspeed) then if (bubbles_euler .neqv. .true.) then - !$acc parallel loop collapse(3) gang vector default(present) & - !$acc private(local_inv_ds, local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2) + $:parallel_loop(collapse=3, private=["local_inv_ds", & + "local_q_cons_val", "local_k_term_val", & + "local_term_coeff", "local_flux1", & + "local_flux2"]) do k_idx = 0, p; do q_idx = 0, n; do l_idx = 0, m local_inv_ds = 1._wp/dz(k_idx) local_q_cons_val = q_cons_vf_arg%vf(advxe)%sf(l_idx, q_idx, k_idx) @@ -1385,8 +1394,10 @@ contains local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) end do; end do; end do - !$acc parallel loop collapse(3) gang vector default(present) & - !$acc private(local_inv_ds, local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2) + $:parallel_loop(collapse=3, private=["local_inv_ds", & + "local_q_cons_val", "local_k_term_val", & + "local_term_coeff", "local_flux1", & + "local_flux2"]) do k_idx = 0, p; do q_idx = 0, n; do l_idx = 0, m local_inv_ds = 1._wp/dz(k_idx) local_q_cons_val = q_cons_vf_arg%vf(advxb)%sf(l_idx, q_idx, k_idx) diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 21748e5aad..4592d4024e 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -392,7 +392,7 @@ contains !Evolve pb and mv for non-polytropic qbmm if (qbmm .and. (.not. polytropic)) then - !$acc parallel loop collapse(5) gang vector default(present) + $:parallel_loop(collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -409,7 +409,7 @@ contains end if if (qbmm .and. (.not. polytropic)) then - !$acc parallel loop collapse(5) gang vector default(present) + $:parallel_loop(collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -494,7 +494,7 @@ contains !Evolve pb and mv for non-polytropic qbmm if (qbmm .and. (.not. polytropic)) then - !$acc parallel loop collapse(5) gang vector default(present) + $:parallel_loop(collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -511,7 +511,7 @@ contains end if if (qbmm .and. (.not. polytropic)) then - !$acc parallel loop collapse(5) gang vector default(present) + $:parallel_loop(collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -566,7 +566,7 @@ contains end do if (qbmm .and. (.not. polytropic)) then - !$acc parallel loop collapse(5) gang vector default(present) + $:parallel_loop(collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -584,7 +584,7 @@ contains end if if (qbmm .and. (.not. polytropic)) then - !$acc parallel loop collapse(5) gang vector default(present) + $:parallel_loop(collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -676,7 +676,7 @@ contains !Evolve pb and mv for non-polytropic qbmm if (qbmm .and. (.not. polytropic)) then - !$acc parallel loop collapse(5) gang vector default(present) + $:parallel_loop(collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -693,7 +693,7 @@ contains end if if (qbmm .and. (.not. polytropic)) then - !$acc parallel loop collapse(5) gang vector default(present) + $:parallel_loop(collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -748,7 +748,7 @@ contains end do if (qbmm .and. (.not. polytropic)) then - !$acc parallel loop collapse(5) gang vector default(present) + $:parallel_loop(collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -766,7 +766,7 @@ contains end if if (qbmm .and. (.not. polytropic)) then - !$acc parallel loop collapse(5) gang vector default(present) + $:parallel_loop(collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -821,7 +821,7 @@ contains end do if (qbmm .and. (.not. polytropic)) then - !$acc parallel loop collapse(5) gang vector default(present) + $:parallel_loop(collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -839,7 +839,7 @@ contains end if if (qbmm .and. (.not. polytropic)) then - !$acc parallel loop collapse(5) gang vector default(present) + $:parallel_loop(collapse=5) do i = 1, nb do l = 0, p do k = 0, n From d3e5ac7d486803d4d180a01106ce1e4ad247ec20 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Wed, 4 Jun 2025 13:25:54 -0400 Subject: [PATCH 08/75] More directives replaced --- src/simulation/m_rhs.fpp | 3 +- src/simulation/m_riemann_solvers.fpp | 85 ++++++++++++++++------------ src/simulation/m_surface_tension.fpp | 15 +++-- src/simulation/m_time_steppers.fpp | 2 +- src/simulation/m_viscous.fpp | 12 ++-- 5 files changed, 68 insertions(+), 49 deletions(-) diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index bdefed9491..c628067dfe 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -1695,7 +1695,8 @@ contains integer :: i, j, k, l, q, iter !< Generic loop iterators integer :: relax !< Relaxation procedure determination variable - !$acc parallel loop collapse(3) gang vector private(pres_K_init, rho_K_s, alpha_rho, alpha, Re, pres_relax) + $:parallel_loop(collapse=3, private=["pres_K_init", "rho_K_s", & + "alpha_rho", "alpha", "Re", "pres_relax"]) do l = 0, p do k = 0, n do j = 0, m diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp index a0c4cd50cd..1aee1317d8 100644 --- a/src/simulation/m_riemann_solvers.fpp +++ b/src/simulation/m_riemann_solvers.fpp @@ -362,15 +362,14 @@ contains #:for NORM_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (norm_dir == ${NORM_DIR}$) then - !$acc parallel loop collapse(3) gang vector default(present) & - !$acc private(alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, & - !$acc alpha_R, tau_e_L, tau_e_R, G_L, G_R, Re_L, Re_R, & - !$acc rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, Ys_L, Ys_R, & - !$acc xi_field_L, xi_field_R, & - !$acc Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, & - !$acc Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, & - !$acc c_fast, pres_mag, B, Ga, vdotB, B2, b4, cm, & - !$acc pcorr, zcoef, vel_L_tmp, vel_R_tmp) + $:parallel_loop(collapse=3, private=["alpha_rho_L", "alpha_rho_R", & + "vel_L", "vel_R", "alpha_L", "alpha_R", "tau_e_L", "tau_e_R", & + "G_L", "G_R", "Re_L", "Re_R", "rho_avg", "h_avg", "gamma_avg", & + "s_L", "s_R", "s_S", "Ys_L", "Ys_R", "xi_field_L", "xi_field_R", & + "Cp_iL", "Cp_iR", "Xs_L", "Xs_R", "Gamma_iL", "Gamma_iR", & + "Yi_avg", "Phi_avg", "h_iL", "h_iR", "h_avg_2", "c_fast", & + "pres_mag", "B", "Ga", "vdotB", "B2", "b4", "cm", "pcorr", & + "zcoef", "vel_L_tmp", "vel_R_tmp"]) do l = is3%beg, is3%end do k = is2%beg, is2%end do j = is1%beg, is1%end @@ -1296,13 +1295,14 @@ contains ! 6-EQUATION MODEL WITH HLLC if (model_eqns == 3) then !ME3 - - !$acc parallel loop collapse(3) gang vector default(present) & - !$acc private(vel_L, vel_R, vel_K_Star, Re_L, Re_R, rho_avg, h_avg, gamma_avg, & - !$acc s_L, s_R, s_S, vel_avg_rms, alpha_L, alpha_R, Ys_L, Ys_R, Xs_L, Xs_R, & - !$acc Gamma_iL, Gamma_iR, Cp_iL, Cp_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, & - !$acc tau_e_L, tau_e_R, G_L, G_R, flux_ene_e, xi_field_L, xi_field_R, pcorr, & - !$acc zcoef, vel_L_tmp, vel_R_tmp) + $:parallel_loop(collapse=3, private=["vel_L", "vel_R", & + "vel_K_Star", "Re_L", "Re_R", "rho_avg", "h_avg", & + "gamma_avg", "s_L", "s_R", "s_S", "vel_avg_rms", & + "alpha_L", "alpha_R", "Ys_L", "Ys_R", "Xs_L", "Xs_R", & + "Gamma_iL", "Gamma_iR", "Cp_iL", "Cp_iR", "Yi_avg", & + "Phi_avg", "h_iL", "h_iR", "h_avg_2", "tau_e_L", & + "tau_e_R", "G_L", "G_R", "flux_ene_e", "xi_field_L", & + "xi_field_R", "pcorr", "zcoef", "vel_L_tmp", "vel_R_tmp"]) do l = is3%beg, is3%end do k = is2%beg, is2%end do j = is1%beg, is1%end @@ -1728,8 +1728,10 @@ contains elseif (model_eqns == 4) then !ME4 - !$acc parallel loop collapse(3) gang vector default(present) private(alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, alpha_R, & - !$acc rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, vel_avg_rms, nbub_L, nbub_R, ptilde_L, ptilde_R) + $:parallel_loop(collapse=3, private=["alpha_rho_L", & + "alpha_rho_R", "vel_L", "vel_R", "alpha_L", "alpha_R", & + "rho_avg", "h_avg", "gamma_avg", "s_L", "s_R", "s_S", & + "vel_avg_rms", "nbub_L", "nbub_R", "ptilde_L", "ptilde_R"]) do l = is3%beg, is3%end do k = is2%beg, is2%end do j = is1%beg, is1%end @@ -1979,8 +1981,12 @@ contains !$acc end parallel loop elseif (model_eqns == 2 .and. bubbles_euler) then - !$acc parallel loop collapse(3) gang vector default(present) private(R0_L, R0_R, V0_L, V0_R, P0_L, P0_R, pbw_L, pbw_R, vel_L, vel_R, & - !$acc rho_avg, alpha_L, alpha_R, h_avg, gamma_avg, s_L, s_R, s_S, nbub_L, nbub_R, ptilde_L, ptilde_R, vel_avg_rms, Re_L, Re_R, pcorr, zcoef, vel_L_tmp, vel_R_tmp) + $:parallel_loop(collapse=3, private=["R0_L", "R0_R", "V0_L", & + "V0_R", "P0_L", "P0_R", "pbw_L", "pbw_R", "vel_L", & + "vel_R", "rho_avg", "alpha_L", "alpha_R", "h_avg", & + "gamma_avg", "s_L", "s_R", "s_S", "nbub_L", "nbub_R", & + "ptilde_L", "ptilde_R", "vel_avg_rms", "Re_L", "Re_R", & + "pcorr", "zcoef", "vel_L_tmp", "vel_R_tmp"]) do l = is3%beg, is3%end do k = is2%beg, is2%end do j = is1%beg, is1%end @@ -2446,11 +2452,15 @@ contains !$acc end parallel loop else ! 5-EQUATION MODEL WITH HLLC - !$acc parallel loop collapse(3) gang vector default(present) private(vel_L, vel_R, Re_L, Re_R, & - !$acc rho_avg, h_avg, gamma_avg, alpha_L, alpha_R, s_L, s_R, s_S, vel_avg_rms, pcorr, zcoef, & - !$acc vel_L_tmp, vel_R_tmp, Ys_L, Ys_R, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Cp_iL, Cp_iR, & - !$acc tau_e_L, tau_e_R, xi_field_L, xi_field_R, & - !$acc Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2) copyin(is1,is2,is3) + $:parallel_loop(collapse=3, private=["vel_L", "vel_R", & + "Re_L", "Re_R", "rho_avg", "h_avg", "gamma_avg", & + "alpha_L", "alpha_R", "s_L", "s_R", "s_S", & + "vel_avg_rms", "pcorr", "zcoef", "vel_L_tmp", & + "vel_R_tmp", "Ys_L", "Ys_R", "Xs_L", "Xs_R", & + "Gamma_iL", "Gamma_iR", "Cp_iL", "Cp_iR", "tau_e_L", & + "tau_e_R", "xi_field_L", "xi_field_R", "Yi_avg", & + "Phi_avg", "h_iL", "h_iR", "h_avg_2"], & + copyin=["is1", "is2", "is3"]) do l = is3%beg, is3%end do k = is2%beg, is2%end do j = is1%beg, is1%end @@ -3068,10 +3078,12 @@ contains #:for NORM_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (norm_dir == ${NORM_DIR}$) then - !$acc parallel loop collapse(3) gang vector default(present) & - !$acc private(alpha_rho_L, alpha_rho_R, vel, alpha_L, alpha_R, & - !$acc rho, pres, E, H_no_mag, gamma, pi_inf, qv, vel_rms, B, c, c_fast, pres_mag, & - !$acc U_L, U_R, U_starL, U_starR, U_doubleL, U_doubleR, F_L, F_R, F_starL, F_starR, F_hlld) + $:parallel_loop(collapse=3, private=["alpha_rho_L", & + "alpha_rho_R", "vel", "alpha_L", "alpha_R", "rho", "pres", & + "E", "H_no_mag", "gamma", "pi_inf", "qv", "vel_rms", "B", & + "c", "c_fast", "pres_mag", "U_L", "U_R", "U_starL", & + "U_starR", "U_doubleL", "U_doubleR", "F_L", "F_R", & + "F_starL", "F_starR", "F_hlld"]) do l = is3%beg, is3%end do k = is2%beg, is2%end do j = is1%beg, is1%end @@ -3990,10 +4002,10 @@ contains integer :: i_vel !!< Loop iterator for velocity components. integer :: idx_rp(3) !!< Indices $(j,k,l)$ of 'right' point for averaging. - !$acc parallel loop collapse(3) gang vector default(present) & - !$acc private(idx_rp, avg_v_int, avg_dvdx_int, avg_dvdy_int, avg_dvdz_int, & - !$acc Re_s, Re_b, vel_src_int, r_eff, divergence_cyl, & - !$acc stress_vector_shear, stress_normal_bulk, div_v_term_const) + $:parallel_loop(collapse=3, private=["idx_rp", "avg_v_int", & + "avg_dvdx_int", "avg_dvdy_int", "avg_dvdz_int", "Re_s", "Re_b", & + "vel_src_int", "r_eff", "divergence_cyl", "stress_vector_shear", & + "stress_normal_bulk", "div_v_term_const"]) do l = iz%beg, iz%end do k = iy%beg, iy%end do j = ix%beg, ix%end @@ -4158,10 +4170,9 @@ contains real(wp) :: divergence_v !< Velocity divergence at interface. - !$acc parallel loop collapse(3) gang vector default(present) & - !$acc private(idx_right_phys, vel_grad_avg, & - !$acc current_tau_shear, current_tau_bulk, vel_src_at_interface, & - !$acc Re_shear, Re_bulk, divergence_v, i_dim, vel_comp_idx) + $:parallel_loop(collapse=3, private=["idx_right_phys", "vel_grad_avg", & + "current_tau_shear", "current_tau_bulk", "vel_src_at_interface", & + "Re_shear", "Re_bulk", "divergence_v", "i_dim", "vel_comp_idx"]) do l_loop = isz%beg, isz%end do k_loop = isy%beg, isy%end do j_loop = isx%beg, isx%end diff --git a/src/simulation/m_surface_tension.fpp b/src/simulation/m_surface_tension.fpp index 5fe98868fd..549b409ac5 100644 --- a/src/simulation/m_surface_tension.fpp +++ b/src/simulation/m_surface_tension.fpp @@ -86,8 +86,9 @@ contains integer :: j, k, l, i if (id == 1) then - !$acc parallel loop collapse(3) gang vector default(present) private(Omega, & - !$acc w1L, w2L, w3L, w1R, w2R, w3R, w1, w2, w3, normWL, normWR, normW) + $:parallel_loop(collapse=3, private=["Omega", "w1L", "w2L", "w3L", & + "w1R", "w2R", "w3R", "w1", "w2", "w3", "normWL", & + "normWR", "normW"]) do l = isz%beg, isz%end do k = isy%beg, isy%end do j = isx%beg, isx%end @@ -132,8 +133,9 @@ contains elseif (id == 2) then - !$acc parallel loop collapse(3) gang vector default(present) private(Omega, & - !$acc w1L, w2L, w3L, w1R, w2R, w3R, w1, w2, w3, normWL, normWR, normW) + $:parallel_loop(collapse=3, private=["Omega", "w1L", "w2L", "w3L", & + "w1R", "w2R", "w3R", "w1", "w2", "w3", "normWL", "normWR", & + "normW"]) do l = isz%beg, isz%end do k = isy%beg, isy%end do j = isx%beg, isx%end @@ -178,8 +180,9 @@ contains elseif (id == 3) then - !$acc parallel loop collapse(3) gang vector default(present) private(Omega, & - !$acc w1L, w2L, w3L, w1R, w2R, w3R, w1, w2, w3, normWL, normWR, normW) + $:parallel_loop(collapse=3, private=["Omega", "w1L", "w2L", "w3L", & + "w1R", "w2R", "w3R", "w1", "w2", "w3", "normWL", "normWR", & + "normW"]) do l = isz%beg, isz%end do k = isy%beg, isy%end do j = isx%beg, isx%end diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 4592d4024e..5bafcd96ed 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -981,7 +981,7 @@ contains idwint, & gm_alpha_qp%vf) - !$acc parallel loop collapse(3) gang vector default(present) private(vel, alpha, Re) + $:parallel_loop(collapse=3, private=["vel", "alpha", "Re"]) do l = 0, p do k = 0, n do j = 0, m diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp index 6a84cd63ac..db40bbaaf2 100644 --- a/src/simulation/m_viscous.fpp +++ b/src/simulation/m_viscous.fpp @@ -89,7 +89,8 @@ contains end do end do if (shear_stress) then ! Shear stresses - !$acc parallel loop collapse(3) gang vector default(present) private(alpha_visc, alpha_rho_visc, Re_visc, tau_Re ) + $:parallel_loop(collapse=3, private=["alpha_visc", & + "alpha_rho_visc", "Re_visc", "tau_Re"]) do l = is3_viscous%beg, is3_viscous%end do k = -1, 1 do j = is1_viscous%beg, is1_viscous%end @@ -196,7 +197,8 @@ contains end if if (bulk_stress) then ! Bulk stresses - !$acc parallel loop collapse(3) gang vector default(present) private(alpha_visc, alpha_rho_visc, Re_visc, tau_Re ) + $:parallel_loop(collapse=3, private=["alpha_visc", & + "alpha_rho_visc", "Re_visc", "tau_Re"]) do l = is3_viscous%beg, is3_viscous%end do k = -1, 1 do j = is1_viscous%beg, is1_viscous%end @@ -300,7 +302,8 @@ contains if (p == 0) return if (shear_stress) then ! Shear stresses - !$acc parallel loop collapse(3) gang vector default(present) private(alpha_visc, alpha_rho_visc, Re_visc, tau_Re ) + $:parallel_loop(collapse=3, private=["alpha_visc", & + "alpha_rho_visc", "Re_visc", "tau_Re"]) do l = is3_viscous%beg, is3_viscous%end do k = -1, 1 do j = is1_viscous%beg, is1_viscous%end @@ -408,7 +411,8 @@ contains end if if (bulk_stress) then ! Bulk stresses - !$acc parallel loop collapse(3) gang vector default(present) private(alpha_visc, alpha_rho_visc, Re_visc, tau_Re ) + $:parallel_loop(collapse=3, private=["alpha_visc", & + "alpha_rho_visc", "Re_visc", "tau_Re"]) do l = is3_viscous%beg, is3_viscous%end do k = -1, 1 do j = is1_viscous%beg, is1_viscous%end From 2e61a77d928d7971a84e27adda7f7e94be6babf0 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Thu, 5 Jun 2025 13:28:43 -0400 Subject: [PATCH 09/75] Don't change mpi_proxy as will likely cause merge conflicts --- src/simulation/m_mpi_proxy.fpp | 36 +++++++++++++++++----------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index 43f993e35c..f931227f4d 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -798,7 +798,7 @@ contains if (bc_x%end >= 0) then ! PBC at the beginning and end ! Packing buffer to be sent to bc_x%end - $:parallel_loop(collapse=3, private=["r"]) + !$acc parallel loop collapse(3) gang vector default(present) private(r) do l = 0, p do k = 0, n do j = m - gp_layers + 1, m @@ -853,7 +853,7 @@ contains else ! PBC at the beginning only ! Packing buffer to be sent to bc_x%beg - $:parallel_loop(collapse=3, private=["r"]) + !$acc parallel loop collapse(3) gang vector default(present) private(r) do l = 0, p do k = 0, n do j = 0, gp_layers - 1 @@ -913,7 +913,7 @@ contains #endif ! Unpacking buffer received from bc_x%beg - $:parallel_loop(collapse=3, private=["r"]) + !$acc parallel loop collapse(3) gang vector default(present) private(r) do l = 0, p do k = 0, n do j = -gp_layers, -1 @@ -929,7 +929,7 @@ contains if (bc_x%beg >= 0) then ! PBC at the end and beginning - $:parallel_loop(collapse=3, private=["r"]) + !$acc parallel loop collapse(3) gang vector default(present) private(r) ! Packing buffer to be sent to bc_x%beg do l = 0, p do k = 0, n @@ -983,7 +983,7 @@ contains else ! PBC at the end only ! Packing buffer to be sent to bc_x%end - $:parallel_loop(collapse=3, private=["r"]) + !$acc parallel loop collapse(3) gang vector default(present) private(r) do l = 0, p do k = 0, n do j = m - gp_layers + 1, m @@ -1041,7 +1041,7 @@ contains end if ! Unpacking buffer received from bc_x%end - $:parallel_loop(collapse=3, private=["r"]) + !$acc parallel loop collapse(3) gang vector default(present) private(r) do l = 0, p do k = 0, n do j = m + 1, m + gp_layers @@ -1061,7 +1061,7 @@ contains if (bc_y%end >= 0) then ! PBC at the beginning and end ! Packing buffer to be sent to bc_y%end - $:parallel_loop(collapse=3, private=["r"]) + !$acc parallel loop collapse(3) gang vector default(present) private(r) do l = 0, p do k = n - gp_layers + 1, n do j = -gp_layers, m + gp_layers @@ -1117,7 +1117,7 @@ contains else ! PBC at the beginning only ! Packing buffer to be sent to bc_y%beg - $:parallel_loop(collapse=3, private=["r"]) + !$acc parallel loop collapse(3) gang vector default(present) private(r) do l = 0, p do k = 0, gp_layers - 1 do j = -gp_layers, m + gp_layers @@ -1179,7 +1179,7 @@ contains #endif ! Unpacking buffer received from bc_y%beg - $:parallel_loop(collapse=3, private=["r"]) + !$acc parallel loop collapse(3) gang vector default(present) private(r) do l = 0, p do k = -gp_layers, -1 do j = -gp_layers, m + gp_layers @@ -1197,7 +1197,7 @@ contains if (bc_y%beg >= 0) then ! PBC at the end and beginning ! Packing buffer to be sent to bc_y%beg - $:parallel_loop(collapse=3, private=["r"]) + !$acc parallel loop collapse(3) gang vector default(present) private(r) do l = 0, p do k = 0, gp_layers - 1 do j = -gp_layers, m + gp_layers @@ -1253,7 +1253,7 @@ contains else ! PBC at the end only ! Packing buffer to be sent to bc_y%end - $:parallel_loop(collapse=3, private=["r"]) + !$acc parallel loop collapse(3) gang vector default(present) private(r) do l = 0, p do k = n - gp_layers + 1, n do j = -gp_layers, m + gp_layers @@ -1315,7 +1315,7 @@ contains #endif ! Unpacking buffer received form bc_y%end - $:parallel_loop(collapse=3, private=["r"]) + !$acc parallel loop collapse(3) gang vector default(present) private(r) do l = 0, p do k = n + 1, n + gp_layers do j = -gp_layers, m + gp_layers @@ -1335,7 +1335,7 @@ contains if (bc_z%end >= 0) then ! PBC at the beginning and end ! Packing buffer to be sent to bc_z%end - $:parallel_loop(collapse=3, private=["r"]) + !$acc parallel loop collapse(3) gang vector default(present) private(r) do l = p - gp_layers + 1, p do k = -gp_layers, n + gp_layers do j = -gp_layers, m + gp_layers @@ -1392,7 +1392,7 @@ contains else ! PBC at the beginning only ! Packing buffer to be sent to bc_z%beg - $:parallel_loop(collapse=3, private=["r"]) + !$acc parallel loop collapse(3) gang vector default(present) private(r) do l = 0, gp_layers - 1 do k = -gp_layers, n + gp_layers do j = -gp_layers, m + gp_layers @@ -1454,7 +1454,7 @@ contains #endif ! Unpacking buffer from bc_z%beg - $:parallel_loop(collapse=3, private=["r"]) + !$acc parallel loop collapse(3) gang vector default(present) private(r) do l = -gp_layers, -1 do k = -gp_layers, n + gp_layers do j = -gp_layers, m + gp_layers @@ -1473,7 +1473,7 @@ contains if (bc_z%beg >= 0) then ! PBC at the end and beginning ! Packing buffer to be sent to bc_z%beg - $:parallel_loop(collapse=3, private=["r"]) + !$acc parallel loop collapse(3) gang vector default(present) private(r) do l = 0, gp_layers - 1 do k = -gp_layers, n + gp_layers do j = -gp_layers, m + gp_layers @@ -1528,7 +1528,7 @@ contains else ! PBC at the end only ! Packing buffer to be sent to bc_z%end - $:parallel_loop(collapse=3, private=["r"]) + !$acc parallel loop collapse(3) gang vector default(present) private(r) do l = p - gp_layers + 1, p do k = -gp_layers, n + gp_layers do j = -gp_layers, m + gp_layers @@ -1590,7 +1590,7 @@ contains #endif ! Unpacking buffer received from bc_z%end - $:parallel_loop(collapse=3, private=["r"]) + !$acc parallel loop collapse(3) gang vector default(present) private(r) do l = p + 1, p + gp_layers do k = -gp_layers, n + gp_layers do j = -gp_layers, m + gp_layers From 891b71488033513cdf46cf689beaae4d7941e7be Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Thu, 5 Jun 2025 17:53:30 -0400 Subject: [PATCH 10/75] Made macro captialized and updated macro with DRY --- src/common/include/macros.fpp | 198 ++++++++++++++++---------- src/common/m_boundary_common.fpp | 24 ++-- src/common/m_chemistry.fpp | 2 +- src/common/m_phase_change.fpp | 2 +- src/common/m_variables_conversion.fpp | 4 +- src/simulation/m_acoustic_src.fpp | 4 +- src/simulation/m_body_forces.fpp | 10 +- src/simulation/m_bubbles_EE.fpp | 14 +- src/simulation/m_bubbles_EL.fpp | 24 ++-- src/simulation/m_cbc.fpp | 70 ++++----- src/simulation/m_data_output.fpp | 2 +- src/simulation/m_fftw.fpp | 16 +-- src/simulation/m_hyperelastic.fpp | 2 +- src/simulation/m_hypoelastic.fpp | 26 ++-- src/simulation/m_mhd.fpp | 2 +- src/simulation/m_qbmm.fpp | 4 +- src/simulation/m_rhs.fpp | 50 +++---- src/simulation/m_riemann_solvers.fpp | 98 ++++++------- src/simulation/m_surface_tension.fpp | 14 +- src/simulation/m_time_steppers.fpp | 40 +++--- src/simulation/m_viscous.fpp | 84 +++++------ src/simulation/m_weno.fpp | 6 +- 22 files changed, 374 insertions(+), 322 deletions(-) diff --git a/src/common/include/macros.fpp b/src/common/include/macros.fpp index 70ee3deb93..180cf83c71 100644 --- a/src/common/include/macros.fpp +++ b/src/common/include/macros.fpp @@ -103,68 +103,109 @@ end if #:enddef -#:def parallel_loop(collapse=None, private=None, parallelism=["gang", "vector"], default="present", firstprivate=None, reduction=None, reductionOp=None, copy=None, copyin=None, copyinReadOnly=False, copyout=None, create=None) - #:if collapse is not None - #:assert isinstance(collapse, int) - #:assert collapse > 0 - #:set collapse_val = 'collapse(' + str(collapse) + ') ' +#:def ASSERT_LIST(data, datatype) + #:assert data is not None + #:assert isinstance(data, list) + #:assert len(data) != 0 + #:assert all(isinstance(element, datatype) for element in data) +#:enddef + +#:def GEN_PARENTHESES_CLAUSE(clause_name, clause_list) + #:assert isinstance(clause_name, str) + #:if clause_list is not None + $:ASSERT_LIST(clause_list, str) + #:set clause_str = clause_name + '(' + ', '.join(clause_list) + ') ' #:else - #:set collapse_val = "" + #:set clause_str = "" #:endif + $:clause_str +#:enddef - #:if private is not None - #:assert isinstance(private, list) - #:assert len(private) != 0 - #:assert all(type(element) == str for element in private) - #:set private_val = 'private(' + ', '.join(private) + ') ' +#:def GEN_COPY_STR(copy) + #:set copy_val = GEN_PARENTHESES_CLAUSE("copy", copy) + $:copy_val +#:enddef + +#:def GEN_COPYIN_STR(copyin, copyinReadOnly) + #:if copyin is not None + $:ASSERT_LIST(copyin, str) + #:assert isinstance(copyinReadOnly, bool) + #:if copyinReadOnly == True + #:set copyin_val = 'copyin(readonly:' + ', '.join(copyin) + ') ' + #:else + #:set copyin_val = 'copyin(' + ', '.join(copyin) + ') ' + #:endif #:else - #:set private_val = "" + #:set copyin_val = "" #:endif + $:copyin_val +#:enddef - #:if default is not None - #:assert isinstance(default, str) - #:assert (default == "present" or default == "none") - #:set default_val = 'default(' + default + ') ' +#:def GEN_COPYOUT_STR(copyout) + #:set copyout_val = GEN_PARENTHESES_CLAUSE("copyout", copyout) + $:copyout_val +#:enddef + +#:def GEN_CREATE_STR(create) + #:set create_val = GEN_PARENTHESES_CLAUSE("create", create) + $:create_val +#:enddef + +#:def GEN_EXTRA_ARGS_STR(extraArgs) + #:if extraArgs is not None + #:assert isinstance(extraArgs, str) + #:set extraArgs_val = extraArgs #:else - #:set default_val = "" + #:set extraArgs_val = "" #:endif + $:extraArgs_val +#:enddef +#:def GEN_PARALLELISM_STR(parallelism) #:if parallelism is not None - #:assert isinstance(parallelism, list) - #:assert len(parallelism) != 0 - #:assert all(type(element) == str for element in parallelism) - #:assert all((element == "gang" or element == "worker" or element == "vector") for element in parallelism) + $:ASSERT_LIST(parallelism, str) + #:assert all((element == "gang" or element == "worker" or element == "vector" or element == "seq") for element in parallelism) #:set parallelism_val = " ".join(parallelism) + " " #:else #:set parallelism_val = "" #:endif + $:parallelism_val +#:enddef - #:if firstprivate is not None - #:assert isinstance(firstprivate, list) - #:assert len(firstprivate) != 0 - #:assert all(type(element) == str for element in firstprivate) - #:set firstprivate_val = 'firstprivate(' + ', '.join(firstprivate) + ') ' +#:def PARALLEL_LOOP(collapse=None, private=None, parallelism=["gang", "vector"], default="present", firstprivate=None, reduction=None, reductionOp=None, copy=None, copyin=None, copyinReadOnly=False, copyout=None, create=None, extraAccArgs=None) + #:if collapse is not None + #:assert isinstance(collapse, int) + #:assert collapse > 1 + #:set collapse_val = 'collapse(' + str(collapse) + ') ' + #:else + #:set collapse_val = "" + #:endif + + #:set private_val = GEN_PARENTHESES_CLAUSE("private", private) + + #:if default is not None + #:assert isinstance(default, str) + #:assert (default == "present" or default == "none") + #:set default_val = 'default(' + default + ') ' #:else - #:set firstprivate_val = "" + #:set default_val = "" #:endif + #:set parallelism_val = GEN_PARALLELISM_STR(parallelism) + + #:set firstprivate_val = GEN_PARENTHESES_CLAUSE("firstprivate", firstprivate) + #:if reduction is not None and reductionOp is not None #:if isinstance(reduction, list) and isinstance(reductionOp, list) - #:assert isinstance(reduction, list) - #:assert len(reduction) != 0 - #:assert all(type(element) == list for element in reduction) + $:ASSERT_LIST(reduction, list) + $:ASSERT_LIST(reductionOp, str) #:assert all(len(element) != 0 for element in reduction) #:assert all(type(element) == str for sublist in reduction for element in sublist) - #:assert isinstance(reductionOp, list) - #:assert len(reductionOp) != 0 - #:assert all(type(element) == str for element in reductionOp) #:assert len(reduction) == len(reductionOp) #:set reduction_list = ['reduction(' + op + ':' + ', '.join(red) + ') ' for (red, op) in zip(reduction, reductionOp)] #:set reduction_val = " ". join(reduction_list) + " " #:elif isinstance(reduction, list) and isinstance(reductionOp, str) - #:assert isinstance(reduction, list) - #:assert len(reduction) != 0 - #:assert all(type(element) == str for element in reduction) + $:ASSERT_LIST(reduction, str) #:assert isinstance(reductionOp, str) #:set reduction_val = 'reduction(' + reductionOp + ':' + ', '.join(reduction) + ') ' #:else @@ -176,47 +217,58 @@ #:set reduction_val = "" #:endif - #:if copy is not None - #:assert isinstance(copy, list) - #:assert len(copy) != 0 - #:assert all(type(element) == str for element in copy) - #:set copy_val = 'copy(' + ', '.join(copy) + ') ' - #:else - #:set copy_val = "" - #:endif + #:set copy_val = GEN_COPY_STR(copy) - #:if copyin is not None - #:assert isinstance(copyin, list) - #:assert len(copyin) != 0 - #:assert all(type(element) == str for element in copyin) - #:assert isinstance(copyinReadOnly, bool) - #:if copyinReadOnly == True - #:set copyin_val = 'copyin(readonly:' + ', '.join(copyin) + ') ' - #:else - #:set copyin_val = 'copyin(' + ', '.join(copyin) + ') ' - #:endif - #:else - #:set copyin_val = "" - #:endif + #:set copyin_val = GEN_COPYIN_STR(copyin, copyinReadOnly) - #:if copyout is not None - #:assert isinstance(copyout, list) - #:assert len(copyout) != 0 - #:assert all(type(element) == str for element in copyout) - #:set copyout_val = 'copyout(' + ', '.join(copyout) + ') ' - #:else - #:set copyout_val = "" - #:endif + #:set copyout_val = GEN_COPYOUT_STR(copyout) + + #:set create_val = GEN_CREATE_STR(create) - #:if create is not None - #:assert isinstance(create, list) - #:assert len(create) != 0 - #:assert all(type(element) == str for element in create) - #:set create_val = 'create(' + ', '.join(create) + ') ' + #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) + + #:set clause_val = collapse_val + parallelism_val + default_val + private_val + firstprivate_val + reduction_val + copy_val + copyin_val + copyout_val + create_val + #:set acc_directive = '!$acc parallel loop ' + clause_val + extraAccArgs_val + $:acc_directive +#:enddef + +#:def routine(parallelism=["seq"], nohost=False, extraAccArgs=None) + #:set parallelism_val = GEN_PARALLELISM_STR(parallelism) + + #:assert isinstance(nohost, bool) + #:if nohost == True + #:set nohost_val = "nohost" #:else - #:set create_val = "" + #:set nohost_val = "" #:endif - #:set clause_val = collapse_val + parallelism_val + default_val + private_val + firstprivate_val + reduction_val + copy_val + copyin_val + copyout_val + create_val - !$acc parallel loop ${clause_val}$ + #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) + + #:set clause_val = parallelism_val + nohost_val + #:set acc_directive = '!$acc routine ' + clause_val + extraAccArgs_val + $:acc_directive +#:enddef + + + +#:def declare(copy=None, copyin=None, copyinReadOnly=False, copyout=None, create=None, present=None, deviceptr=None, link=None, extraAccArgs=None) + #:set copy_val = GEN_COPY_STR(copy) + + #:set copyin_val = GEN_COPYIN_STR(copyin, copyinReadOnly) + + #:set copyout_val = GEN_COPYOUT_STR(copyout) + + #:set create_val = GEN_CREATE_STR(create) + + #:set present_val = GEN_PARENTHESES_CLAUSE("present", present) + + #:set deviceptr_val = GEN_PARENTHESES_CLAUSE("deviceptr", deviceptr) + + #:set link_val = GEN_PARENTHESES_CLAUSE("link", link) + + #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) + + #:set clause_val = copy_val + copyin_val + copyout_val + create_val + present_val + deviceptr_val + link_val + #:set acc_directive = '!$acc declare ' + clause_val + extraAccArgs_val + $:acc_directive #:enddef diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index 1432e53a7c..3b8732c593 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -83,7 +83,7 @@ contains if (bcxb >= 0) then call s_mpi_sendrecv_variables_buffers(q_prim_vf, pb, mv, 1, -1) else - $:parallel_loop(collapse=2) + $:PARALLEL_LOOP(collapse=2) do l = 0, p do k = 0, n select case (int(bc_type(1, -1)%sf(0, k, l))) @@ -107,7 +107,7 @@ contains if (bcxe >= 0) then call s_mpi_sendrecv_variables_buffers(q_prim_vf, pb, mv, 1, 1) else - $:parallel_loop(collapse=2) + $:PARALLEL_LOOP(collapse=2) do l = 0, p do k = 0, n select case (int(bc_type(1, 1)%sf(0, k, l))) @@ -135,7 +135,7 @@ contains if (bcyb >= 0) then call s_mpi_sendrecv_variables_buffers(q_prim_vf, pb, mv, 2, -1) else - $:parallel_loop(collapse=2) + $:PARALLEL_LOOP(collapse=2) do l = 0, p do k = -buff_size, m + buff_size select case (int(bc_type(2, -1)%sf(k, 0, l))) @@ -161,7 +161,7 @@ contains if (bcye >= 0) then call s_mpi_sendrecv_variables_buffers(q_prim_vf, pb, mv, 2, 1) else - $:parallel_loop(collapse=2) + $:PARALLEL_LOOP(collapse=2) do l = 0, p do k = -buff_size, m + buff_size select case (int(bc_type(2, 1)%sf(k, 0, l))) @@ -189,7 +189,7 @@ contains if (bczb >= 0) then call s_mpi_sendrecv_variables_buffers(q_prim_vf, pb, mv, 3, -1) else - $:parallel_loop(collapse=2) + $:PARALLEL_LOOP(collapse=2) do l = -buff_size, n + buff_size do k = -buff_size, m + buff_size select case (int(bc_type(3, -1)%sf(k, l, 0))) @@ -213,7 +213,7 @@ contains if (bcze >= 0) then call s_mpi_sendrecv_variables_buffers(q_prim_vf, pb, mv, 3, 1) else - $:parallel_loop(collapse=2) + $:PARALLEL_LOOP(collapse=2) do l = -buff_size, n + buff_size do k = -buff_size, m + buff_size select case (int(bc_type(3, 1)%sf(k, l, 0))) @@ -1167,7 +1167,7 @@ contains if (bcxb >= 0) then call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 1, -1) else - $:parallel_loop(collapse=2) + $:PARALLEL_LOOP(collapse=2) do l = 0, p do k = 0, n select case (bc_type(1, -1)%sf(0, k, l)) @@ -1185,7 +1185,7 @@ contains if (bcxe >= 0) then call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 1, 1) else - $:parallel_loop(collapse=2) + $:PARALLEL_LOOP(collapse=2) do l = 0, p do k = 0, n select case (bc_type(1, 1)%sf(0, k, l)) @@ -1206,7 +1206,7 @@ contains if (bcyb >= 0) then call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 2, -1) else - $:parallel_loop(collapse=2) + $:PARALLEL_LOOP(collapse=2) do l = 0, p do k = -buff_size, m + buff_size select case (bc_type(2, -1)%sf(k, 0, l)) @@ -1224,7 +1224,7 @@ contains if (bcye >= 0) then call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 2, 1) else - $:parallel_loop(collapse=2) + $:PARALLEL_LOOP(collapse=2) do l = 0, p do k = -buff_size, m + buff_size select case (bc_type(2, 1)%sf(k, 0, l)) @@ -1245,7 +1245,7 @@ contains if (bczb >= 0) then call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 3, -1) else - $:parallel_loop(collapse=2) + $:PARALLEL_LOOP(collapse=2) do l = -buff_size, n + buff_size do k = -buff_size, m + buff_size select case (bc_type(3, -1)%sf(k, l, 0)) @@ -1263,7 +1263,7 @@ contains if (bcze >= 0) then call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 3, 1) else - $:parallel_loop(collapse=2) + $:PARALLEL_LOOP(collapse=2) do l = -buff_size, n + buff_size do k = -buff_size, m + buff_size select case (bc_type(3, 1)%sf(k, l, 0)) diff --git a/src/common/m_chemistry.fpp b/src/common/m_chemistry.fpp index ad3e3c7187..416ecc63cc 100644 --- a/src/common/m_chemistry.fpp +++ b/src/common/m_chemistry.fpp @@ -99,7 +99,7 @@ contains real(wp), dimension(num_species) :: Ys real(wp), dimension(num_species) :: omega - $:parallel_loop(collapse=3, private=["Ys", "omega"]) + $:PARALLEL_LOOP(collapse=3, private=["Ys", "omega"]) do z = bounds(3)%beg, bounds(3)%end do y = bounds(2)%beg, bounds(2)%end do x = bounds(1)%beg, bounds(1)%end diff --git a/src/common/m_phase_change.fpp b/src/common/m_phase_change.fpp index 5522484167..48a2b748a0 100644 --- a/src/common/m_phase_change.fpp +++ b/src/common/m_phase_change.fpp @@ -97,7 +97,7 @@ contains !$acc declare create(p_infOV, p_infpT, p_infSL, sk, hk, gk, ek, rhok) ! starting equilibrium solver - $:parallel_loop(collapse=3, private=["p_infOV", "p_infpT", "p_infSL", & + $:PARALLEL_LOOP(collapse=3, private=["p_infOV", "p_infpT", "p_infSL", & "sk", "hk", "gk", "ek", "rhok", "pS", "pSOV", "pSSL", & "TS", "TSOV", "TSatOV", "TSatSL", "TSSL", "rhoe", & "dynE", "rhos", "rho", "rM", "m1", "m2", "MCT", "TvF"]) diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp index 433616bc95..0f84d45222 100644 --- a/src/common/m_variables_conversion.fpp +++ b/src/common/m_variables_conversion.fpp @@ -883,7 +883,7 @@ contains end if #:endif - $:parallel_loop(collapse=3, private=["alpha_K", "alpha_rho_K", "Re_K", & + $:PARALLEL_LOOP(collapse=3, private=["alpha_K", "alpha_rho_K", "Re_K", & "nRtmp", "rho_K", "gamma_K", "pi_inf_K", "qv_K", & "dyn_pres_K", "rhoYks", "B"]) do l = ibounds(3)%beg, ibounds(3)%end @@ -1480,7 +1480,7 @@ contains ! Computing the flux variables from the primitive variables, without ! accounting for the contribution of either viscosity or capillarity #ifdef MFC_SIMULATION - $:parallel_loop(collapse=3, private=["alpha_rho_K", "vel_K", & + $:PARALLEL_LOOP(collapse=3, private=["alpha_rho_K", "vel_K", & "alpha_K", "Re_K", "Y_K"]) do l = is3b, is3e do k = is2b, is2e diff --git a/src/simulation/m_acoustic_src.fpp b/src/simulation/m_acoustic_src.fpp index 7b5d8a5fee..a1bc61efe8 100644 --- a/src/simulation/m_acoustic_src.fpp +++ b/src/simulation/m_acoustic_src.fpp @@ -159,7 +159,7 @@ contains sim_time = t_step*dt - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -312,7 +312,7 @@ contains end do ! Update the rhs variables - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m diff --git a/src/simulation/m_body_forces.fpp b/src/simulation/m_body_forces.fpp index 0bbe1ec1b9..b0daf8dd50 100644 --- a/src/simulation/m_body_forces.fpp +++ b/src/simulation/m_body_forces.fpp @@ -79,7 +79,7 @@ contains type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf integer :: i, j, k, l !< standard iterators - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -109,7 +109,7 @@ contains call s_compute_acceleration(mytime) call s_compute_mixture_density(q_cons_vf) - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = momxb, E_idx do l = 0, p do k = 0, n @@ -122,7 +122,7 @@ contains if (bf_x) then ! x-direction body forces - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -137,7 +137,7 @@ contains if (bf_y) then ! y-direction body forces - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -152,7 +152,7 @@ contains if (bf_z) then ! z-direction body forces - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m diff --git a/src/simulation/m_bubbles_EE.fpp b/src/simulation/m_bubbles_EE.fpp index 13371d9406..ca318f8cee 100644 --- a/src/simulation/m_bubbles_EE.fpp +++ b/src/simulation/m_bubbles_EE.fpp @@ -76,7 +76,7 @@ contains real(wp) :: nR3bar integer(wp) :: i, j, k, l - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -103,7 +103,7 @@ contains if (idir == 1) then if (.not. qbmm) then - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -119,7 +119,7 @@ contains elseif (idir == 2) then - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -133,7 +133,7 @@ contains elseif (idir == 3) then - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -173,7 +173,7 @@ contains integer :: dmBub_id !< Dummy variables for unified subgrid bubble subroutines real(wp) :: dmMass_v, dmMass_n, dmBeta_c, dmBeta_t, dmCson - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -191,7 +191,7 @@ contains end do adap_dt_stop_max = 0 - $:parallel_loop(collapse=3, private=["Rtmp", "Vtmp", "myalpha_rho", "myalpha"], & + $:PARALLEL_LOOP(collapse=3, private=["Rtmp", "Vtmp", "myalpha_rho", "myalpha"], & reduction=["adap_dt_stop_max"], reductionOp="MAX", & copy=["adap_dt_stop_max"]) do l = 0, p @@ -325,7 +325,7 @@ contains if (adap_dt .and. adap_dt_stop_max > 0) call s_mpi_abort("Adaptive time stepping failed to converge.") if (.not. adap_dt) then - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = 0, p do q = 0, n do i = 0, m diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index 8567ec97ae..5b6c818231 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -614,7 +614,7 @@ contains if (adap_dt .and. adap_dt_stop_max > 0) call s_mpi_abort("Adaptive time stepping failed to converge.") ! Bubbles remain in a fixed position - $:parallel_loop(collapse=2, private=["k"], copyin=["stage"]) + $:PARALLEL_LOOP(collapse=2, private=["k"], copyin=["stage"]) do k = 1, nBubs do l = 1, 3 mtn_dposdt(k, l, stage) = 0._wp @@ -644,7 +644,7 @@ contains if (lag_params%solver_approach == 2) then if (p == 0) then - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do k = 0, p do j = 0, n do i = 0, m @@ -660,7 +660,7 @@ contains end do end do else - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do k = 0, p do j = 0, n do i = 0, m @@ -680,7 +680,7 @@ contains call s_gradient_dir(q_prim_vf(E_idx), q_beta%vf(3), l) - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do k = 0, p do j = 0, n do i = 0, m @@ -695,7 +695,7 @@ contains end do !source in energy - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do k = idwbuff(3)%beg, idwbuff(3)%end do j = idwbuff(2)%beg, idwbuff(2)%end do i = idwbuff(1)%beg, idwbuff(1)%end @@ -706,7 +706,7 @@ contains call s_gradient_dir(q_beta%vf(3), q_beta%vf(4), l) - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do k = 0, p do j = 0, n do i = 0, m @@ -766,7 +766,7 @@ contains call nvtxStartRange("BUBBLES-LAGRANGE-KERNELS") - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = 1, q_beta_idx do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end @@ -781,7 +781,7 @@ contains mtn_s, mtn_pos, q_beta) !Store 1-beta - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end do j = idwbuff(1)%beg, idwbuff(1)%end @@ -1290,7 +1290,7 @@ contains if (dir == 1) then ! Gradient in x dir. - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do k = 0, p do j = 0, n do i = 0, m @@ -1305,7 +1305,7 @@ contains else if (dir == 2) then ! Gradient in y dir. - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do k = 0, p do j = 0, n do i = 0, m @@ -1319,7 +1319,7 @@ contains end do else ! Gradient in z dir. - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do k = 0, p do j = 0, n do i = 0, m @@ -1413,7 +1413,7 @@ contains lag_void_max = 0._wp lag_void_avg = 0._wp lag_vol = 0._wp - $:parallel_loop(collapse=3, reduction=[["lag_vol", "lag_void_avg"], & + $:PARALLEL_LOOP(collapse=3, reduction=[["lag_vol", "lag_void_avg"], & ["lag_void_max"]], reductionOp=["+", "MAX"], & copy=["lag_vol", "lag_void_avg", "lag_void_max"]) do k = 0, p diff --git a/src/simulation/m_cbc.fpp b/src/simulation/m_cbc.fpp index 67cdcc13fc..0235f71584 100644 --- a/src/simulation/m_cbc.fpp +++ b/src/simulation/m_cbc.fpp @@ -698,7 +698,7 @@ contains F_src_rs${XYZ}$_vf, & is1, is2, is3, idwbuff(2)%beg, idwbuff(3)%beg) - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do i = 1, flux_cbc_index do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -710,7 +710,7 @@ contains end do end do - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do i = advxb, advxe do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -729,7 +729,7 @@ contains F_src_rs${XYZ}$_vf, & is1, is2, is3, idwbuff(2)%beg, idwbuff(3)%beg) - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = 1, flux_cbc_index do j = 0, 1 do r = is3%beg, is3%end @@ -749,7 +749,7 @@ contains end do end do - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = advxb, advxe do j = 0, 1 do r = is3%beg, is3%end @@ -772,7 +772,7 @@ contains end if ! FD2 or FD4 of RHS at j = 0 - $:parallel_loop(collapse=2, private=["alpha_rho", "vel", "adv", & + $:PARALLEL_LOOP(collapse=2, private=["alpha_rho", "vel", "adv", & "mf", "dvel_ds", "dadv_ds", "Re_cbc", "dalpha_rho_ds","dvel_dt", & "dadv_dt", "dalpha_rho_dt", "L", "lambda", "Ys", "dYs_dt", & "dYs_ds", "h_k", "Cp_i", "Gamma_i", "Xs"]) @@ -1167,7 +1167,7 @@ contains ! Reshaping Inputted Data in x-direction if (cbc_dir == 1) then - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = 1, sys_size do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1179,7 +1179,7 @@ contains end do end do - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = 0, buff_size @@ -1190,7 +1190,7 @@ contains end do end do - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = 1, flux_cbc_index do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1203,7 +1203,7 @@ contains end do end do - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1214,7 +1214,7 @@ contains end do if (riemann_solver == 1) then - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = advxb, advxe do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1226,7 +1226,7 @@ contains end do end do else - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1243,7 +1243,7 @@ contains ! Reshaping Inputted Data in y-direction elseif (cbc_dir == 2) then - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = 1, sys_size do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1255,7 +1255,7 @@ contains end do end do - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = 0, buff_size @@ -1266,7 +1266,7 @@ contains end do end do - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = 1, flux_cbc_index do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1279,7 +1279,7 @@ contains end do end do - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1290,7 +1290,7 @@ contains end do if (riemann_solver == 1) then - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = advxb, advxe do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1302,7 +1302,7 @@ contains end do end do else - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1319,7 +1319,7 @@ contains ! Reshaping Inputted Data in z-direction else - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = 1, sys_size do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1331,7 +1331,7 @@ contains end do end do - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = 0, buff_size @@ -1342,7 +1342,7 @@ contains end do end do - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = 1, flux_cbc_index do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1355,7 +1355,7 @@ contains end do end do - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1366,7 +1366,7 @@ contains end do if (riemann_solver == 1) then - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = advxb, advxe do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1378,7 +1378,7 @@ contains end do end do else - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1423,7 +1423,7 @@ contains ! Reshaping Outputted Data in x-direction if (cbc_dir == 1) then - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = 1, flux_cbc_index do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1435,7 +1435,7 @@ contains end do end do end do - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1446,7 +1446,7 @@ contains end do if (riemann_solver == 1) then - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = advxb, advxe do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1458,7 +1458,7 @@ contains end do end do else - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1474,7 +1474,7 @@ contains ! Reshaping Outputted Data in y-direction elseif (cbc_dir == 2) then - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = 1, flux_cbc_index do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1487,7 +1487,7 @@ contains end do end do - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1498,7 +1498,7 @@ contains end do if (riemann_solver == 1) then - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = advxb, advxe do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1510,7 +1510,7 @@ contains end do end do else - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1527,7 +1527,7 @@ contains ! Reshaping Outputted Data in z-direction else - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = 1, flux_cbc_index do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1540,7 +1540,7 @@ contains end do end do - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1551,7 +1551,7 @@ contains end do if (riemann_solver == 1) then - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = advxb, advxe do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1563,7 +1563,7 @@ contains end do end do else - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp index b778a0b4a2..3d87014ea1 100644 --- a/src/simulation/m_data_output.fpp +++ b/src/simulation/m_data_output.fpp @@ -277,7 +277,7 @@ contains integer :: j, k, l ! Computing Stability Criteria at Current Time-step - $:parallel_loop(collapse=3, private=["vel", "alpha", "Re"]) + $:PARALLEL_LOOP(collapse=3, private=["vel", "alpha", "Re"]) do l = 0, p do k = 0, n do j = 0, m diff --git a/src/simulation/m_fftw.fpp b/src/simulation/m_fftw.fpp index e3476b4471..0a32961dce 100644 --- a/src/simulation/m_fftw.fpp +++ b/src/simulation/m_fftw.fpp @@ -139,7 +139,7 @@ contains if (bc_y%beg >= 0) return #if defined(MFC_OpenACC) - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do k = 1, sys_size do j = 0, m do l = 1, cmplx_size @@ -148,7 +148,7 @@ contains end do end do - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do k = 1, sys_size do j = 0, m do l = 0, p @@ -173,7 +173,7 @@ contains Nfq = 3 !$acc update device(Nfq) - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do k = 1, sys_size do j = 0, m do l = 1, Nfq @@ -191,7 +191,7 @@ contains #endif !$acc end host_data - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do k = 1, sys_size do j = 0, m do l = 0, p @@ -203,7 +203,7 @@ contains do i = 1, fourier_rings - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do k = 1, sys_size do j = 0, m do l = 1, cmplx_size @@ -212,7 +212,7 @@ contains end do end do - $:parallel_loop(collapse=3, firstprivate=["i"]) + $:PARALLEL_LOOP(collapse=3, firstprivate=["i"]) do k = 1, sys_size do j = 0, m do l = 0, p @@ -233,7 +233,7 @@ contains Nfq = min(floor(2_dp*real(i, dp)*pi), cmplx_size) !$acc update device(Nfq) - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do k = 1, sys_size do j = 0, m do l = 1, Nfq @@ -251,7 +251,7 @@ contains #endif !$acc end host_data - $:parallel_loop(collapse=3, firstprivate=["i"]) + $:PARALLEL_LOOP(collapse=3, firstprivate=["i"]) do k = 1, sys_size do j = 0, m do l = 0, p diff --git a/src/simulation/m_hyperelastic.fpp b/src/simulation/m_hyperelastic.fpp index ada0069746..e1a5d34a27 100644 --- a/src/simulation/m_hyperelastic.fpp +++ b/src/simulation/m_hyperelastic.fpp @@ -106,7 +106,7 @@ contains real(wp) :: G integer :: j, k, l, i, r - $:parallel_loop(collapse=3, private=["alpha_K", "alpha_rho_K", "rho", & + $:PARALLEL_LOOP(collapse=3, private=["alpha_K", "alpha_rho_K", "rho", & "gamma", "pi_inf", "qv", "G", "Re", "tensora", "tensorb"]) do l = 0, p do k = 0, n diff --git a/src/simulation/m_hypoelastic.fpp b/src/simulation/m_hypoelastic.fpp index f785cbc355..b298833cf5 100644 --- a/src/simulation/m_hypoelastic.fpp +++ b/src/simulation/m_hypoelastic.fpp @@ -104,7 +104,7 @@ contains ! calculate velocity gradients + rho_K and G_K ! TODO: re-organize these loops one by one for GPU efficiency if possible? - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -114,7 +114,7 @@ contains end do !$acc end parallel loop - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -130,7 +130,7 @@ contains !$acc end parallel loop if (ndirs > 1) then - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -140,7 +140,7 @@ contains end do !$acc end parallel loop - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -161,7 +161,7 @@ contains ! 3D if (ndirs == 3) then - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -172,7 +172,7 @@ contains end do !$acc end parallel loop - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -196,7 +196,7 @@ contains end if end if - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -220,7 +220,7 @@ contains end do ! apply rhs source term to elastic stress equation - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -234,7 +234,7 @@ contains end do elseif (idir == 2) then - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -269,7 +269,7 @@ contains end do elseif (idir == 3) then - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -337,7 +337,7 @@ contains if (cyl_coord .and. idir == 2) then - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -405,7 +405,7 @@ contains end do elseif (p == 0) then q = 0 - $:parallel_loop(collapse=2) + $:PARALLEL_LOOP(collapse=2) do l = 0, n do k = 0, m ! Maximum principal stress @@ -419,7 +419,7 @@ contains end do end do else - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do q = 0, p do l = 0, n do k = 0, m diff --git a/src/simulation/m_mhd.fpp b/src/simulation/m_mhd.fpp index a9d89f8df8..9b87a5a3be 100644 --- a/src/simulation/m_mhd.fpp +++ b/src/simulation/m_mhd.fpp @@ -76,7 +76,7 @@ contains real(wp), dimension(3) :: v, B real(wp) :: divB, vdotB - $:parallel_loop(collapse=3, private=["v", "B"]) + $:PARALLEL_LOOP(collapse=3, private=["v", "B"]) do q = 0, p do l = 0, n do k = 0, m diff --git a/src/simulation/m_qbmm.fpp b/src/simulation/m_qbmm.fpp index 1b5e99985b..924a0ed758 100644 --- a/src/simulation/m_qbmm.fpp +++ b/src/simulation/m_qbmm.fpp @@ -480,7 +480,7 @@ contains end do end if - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = 0, p do q = 0, n do i = 0, m @@ -839,7 +839,7 @@ contains !$acc update device(is1_qbmm, is2_qbmm, is3_qbmm) - $:parallel_loop(collapse=3, private=["moms", "msum", "wght", "abscX", & + $:PARALLEL_LOOP(collapse=3, private=["moms", "msum", "wght", "abscX", & "abscY", "wght_pb", "wght_mv", "wght_ht", "coeff", "ht", "r", "q", & "n_tait", "B_tait", "pres", "rho", "nbub", "c", "alf", "momsum", & "drdt", "drdt2", "chi_vw", "x_vw", "rho_mw", "k_mw", "T_bar", "grad_T"]) diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index c628067dfe..87c362aaea 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -592,7 +592,7 @@ contains !$acc update device(Res, Re_idx, Re_size) end if - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do id = 1, num_dims do i = 1, sys_size do l = idwbuff(3)%beg, idwbuff(3)%end @@ -632,7 +632,7 @@ contains call cpu_time(t_start) ! Association/Population of Working Variables - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = 1, sys_size do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end @@ -646,7 +646,7 @@ contains ! Converting Conservative to Primitive Variables if (mpp_lim .and. bubbles_euler) then - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end do j = idwbuff(1)%beg, idwbuff(1)%end @@ -867,7 +867,7 @@ contains ! END: Dimensional Splitting Loop if (ib) then - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -935,7 +935,7 @@ contains ! END: Additional physics and source terms if (run_time_info .or. probe_wrt .or. ib .or. bubbles_lagrange) then - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = 1, sys_size do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end @@ -975,7 +975,7 @@ contains real(wp) :: advected_qty_val, pressure_val, velocity_val if (alt_soundspeed) then - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do q_loop = 0, p do l_loop = 0, n do k_loop = 0, m @@ -1231,7 +1231,7 @@ contains else ! Other Riemann solvers if (alt_soundspeed) then if (bubbles_euler .neqv. .true.) then - $:parallel_loop(collapse=3, private=["local_inv_ds", & + $:PARALLEL_LOOP(collapse=3, private=["local_inv_ds", & "local_q_cons_val", "local_k_term_val", & "local_term_coeff", "local_flux1", "local_flux2"]) do q_idx = 0, p; do l_idx = 0, n; do k_idx = 0, m @@ -1245,7 +1245,7 @@ contains local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) end do; end do; end do - $:parallel_loop(collapse=3, private=["local_inv_ds",& + $:PARALLEL_LOOP(collapse=3, private=["local_inv_ds",& "local_q_cons_val", "local_k_term_val", & "local_term_coeff", "local_flux1", & "local_flux2"]) @@ -1298,7 +1298,7 @@ contains else ! Other Riemann solvers if (alt_soundspeed) then if (bubbles_euler .neqv. .true.) then - $:parallel_loop(collapse=3, private=["local_inv_ds", & + $:PARALLEL_LOOP(collapse=3, private=["local_inv_ds", & "local_q_cons_val", "local_k_term_val", & "local_term_coeff", "local_flux1", & "local_flux2"]) @@ -1317,7 +1317,7 @@ contains end if end do; end do; end do - $:parallel_loop(collapse=3, private=["local_inv_ds", & + $:PARALLEL_LOOP(collapse=3, private=["local_inv_ds", & "local_q_cons_val", "local_k_term_val", & "local_term_coeff", "local_flux1", & "local_flux2"]) @@ -1379,7 +1379,7 @@ contains else ! Other Riemann solvers if (alt_soundspeed) then if (bubbles_euler .neqv. .true.) then - $:parallel_loop(collapse=3, private=["local_inv_ds", & + $:PARALLEL_LOOP(collapse=3, private=["local_inv_ds", & "local_q_cons_val", "local_k_term_val", & "local_term_coeff", "local_flux1", & "local_flux2"]) @@ -1394,7 +1394,7 @@ contains local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) end do; end do; end do - $:parallel_loop(collapse=3, private=["local_inv_ds", & + $:PARALLEL_LOOP(collapse=3, private=["local_inv_ds", & "local_q_cons_val", "local_k_term_val", & "local_term_coeff", "local_flux1", & "local_flux2"]) @@ -1443,7 +1443,7 @@ contains if (idir == 1) then ! x-direction if (surface_tension) then - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -1457,7 +1457,7 @@ contains end do end if - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -1475,7 +1475,7 @@ contains elseif (idir == 2) then ! y-direction if (surface_tension) then - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -1507,7 +1507,7 @@ contains idwbuff(1), idwbuff(2), idwbuff(3)) end if - $:parallel_loop(collapse=2) + $:PARALLEL_LOOP(collapse=2) do l = 0, p do j = 0, m !$acc loop seq @@ -1522,7 +1522,7 @@ contains end if - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = 0, p do k = 1, n do j = 0, m @@ -1538,7 +1538,7 @@ contains end do else - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -1559,7 +1559,7 @@ contains if (cyl_coord) then if ((bc_y%beg == BC_REFLECTIVE) .or. (bc_y%beg == BC_AXIS)) then - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = 0, p do k = 1, n do j = 0, m @@ -1575,7 +1575,7 @@ contains end do if (viscous) then - $:parallel_loop(collapse=2) + $:PARALLEL_LOOP(collapse=2) do l = 0, p do j = 0, m !$acc loop seq @@ -1589,7 +1589,7 @@ contains end if else - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -1610,7 +1610,7 @@ contains elseif (idir == 3) then ! z-direction if (surface_tension) then - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -1624,7 +1624,7 @@ contains end do end if - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -1640,7 +1640,7 @@ contains end do if (grid_geometry == 3) then - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -1695,7 +1695,7 @@ contains integer :: i, j, k, l, q, iter !< Generic loop iterators integer :: relax !< Relaxation procedure determination variable - $:parallel_loop(collapse=3, private=["pres_K_init", "rho_K_s", & + $:PARALLEL_LOOP(collapse=3, private=["pres_K_init", "rho_K_s", & "alpha_rho", "alpha", "Re", "pres_relax"]) do l = 0, p do k = 0, n diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp index 1aee1317d8..594e0009e3 100644 --- a/src/simulation/m_riemann_solvers.fpp +++ b/src/simulation/m_riemann_solvers.fpp @@ -362,7 +362,7 @@ contains #:for NORM_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (norm_dir == ${NORM_DIR}$) then - $:parallel_loop(collapse=3, private=["alpha_rho_L", "alpha_rho_R", & + $:PARALLEL_LOOP(collapse=3, private=["alpha_rho_L", "alpha_rho_R", & "vel_L", "vel_R", "alpha_L", "alpha_R", "tau_e_L", "tau_e_R", & "G_L", "G_R", "Re_L", "Re_R", "rho_avg", "h_avg", "gamma_avg", & "s_L", "s_R", "s_S", "Ys_L", "Ys_R", "xi_field_L", "xi_field_R", & @@ -1295,7 +1295,7 @@ contains ! 6-EQUATION MODEL WITH HLLC if (model_eqns == 3) then !ME3 - $:parallel_loop(collapse=3, private=["vel_L", "vel_R", & + $:PARALLEL_LOOP(collapse=3, private=["vel_L", "vel_R", & "vel_K_Star", "Re_L", "Re_R", "rho_avg", "h_avg", & "gamma_avg", "s_L", "s_R", "s_S", "vel_avg_rms", & "alpha_L", "alpha_R", "Ys_L", "Ys_R", "Xs_L", "Xs_R", & @@ -1728,7 +1728,7 @@ contains elseif (model_eqns == 4) then !ME4 - $:parallel_loop(collapse=3, private=["alpha_rho_L", & + $:PARALLEL_LOOP(collapse=3, private=["alpha_rho_L", & "alpha_rho_R", "vel_L", "vel_R", "alpha_L", "alpha_R", & "rho_avg", "h_avg", "gamma_avg", "s_L", "s_R", "s_S", & "vel_avg_rms", "nbub_L", "nbub_R", "ptilde_L", "ptilde_R"]) @@ -1981,7 +1981,7 @@ contains !$acc end parallel loop elseif (model_eqns == 2 .and. bubbles_euler) then - $:parallel_loop(collapse=3, private=["R0_L", "R0_R", "V0_L", & + $:PARALLEL_LOOP(collapse=3, private=["R0_L", "R0_R", "V0_L", & "V0_R", "P0_L", "P0_R", "pbw_L", "pbw_R", "vel_L", & "vel_R", "rho_avg", "alpha_L", "alpha_R", "h_avg", & "gamma_avg", "s_L", "s_R", "s_S", "nbub_L", "nbub_R", & @@ -2452,7 +2452,7 @@ contains !$acc end parallel loop else ! 5-EQUATION MODEL WITH HLLC - $:parallel_loop(collapse=3, private=["vel_L", "vel_R", & + $:PARALLEL_LOOP(collapse=3, private=["vel_L", "vel_R", & "Re_L", "Re_R", "rho_avg", "h_avg", "gamma_avg", & "alpha_L", "alpha_R", "s_L", "s_R", "s_S", & "vel_avg_rms", "pcorr", "zcoef", "vel_L_tmp", & @@ -3078,7 +3078,7 @@ contains #:for NORM_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (norm_dir == ${NORM_DIR}$) then - $:parallel_loop(collapse=3, private=["alpha_rho_L", & + $:PARALLEL_LOOP(collapse=3, private=["alpha_rho_L", & "alpha_rho_R", "vel", "alpha_L", "alpha_R", "rho", "pres", & "E", "H_no_mag", "gamma", "pi_inf", "qv", "vel_rms", "B", & "c", "c_fast", "pres_mag", "U_L", "U_R", "U_starL", & @@ -3516,7 +3516,7 @@ contains if (norm_dir == 1) then if (bc_x%beg == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at beginning - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do i = 1, sys_size do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -3527,7 +3527,7 @@ contains end do if (viscous) then - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do k = isy%beg, isy%end @@ -3539,7 +3539,7 @@ contains end do if (n > 0) then - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do k = isy%beg, isy%end @@ -3551,7 +3551,7 @@ contains end do if (p > 0) then - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do k = isy%beg, isy%end @@ -3571,7 +3571,7 @@ contains if (bc_x%end == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at end - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do i = 1, sys_size do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -3583,7 +3583,7 @@ contains if (viscous) then - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do k = isy%beg, isy%end @@ -3595,7 +3595,7 @@ contains end do if (n > 0) then - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do k = isy%beg, isy%end @@ -3607,7 +3607,7 @@ contains end do if (p > 0) then - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do k = isy%beg, isy%end @@ -3630,7 +3630,7 @@ contains elseif (norm_dir == 2) then if (bc_y%beg == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at beginning - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do i = 1, sys_size do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -3642,7 +3642,7 @@ contains if (viscous) then - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do j = isx%beg, isx%end @@ -3652,7 +3652,7 @@ contains end do end do - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do j = isx%beg, isx%end @@ -3663,7 +3663,7 @@ contains end do if (p > 0) then - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do j = isx%beg, isx%end @@ -3680,7 +3680,7 @@ contains if (bc_y%end == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at end - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do i = 1, sys_size do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -3692,7 +3692,7 @@ contains if (viscous) then - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do j = isx%beg, isx%end @@ -3702,7 +3702,7 @@ contains end do end do - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do j = isx%beg, isx%end @@ -3713,7 +3713,7 @@ contains end do if (p > 0) then - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do j = isx%beg, isx%end @@ -3733,7 +3733,7 @@ contains else if (bc_z%beg == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at beginning - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do i = 1, sys_size do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -3744,7 +3744,7 @@ contains end do if (viscous) then - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do i = momxb, momxe do k = isy%beg, isy%end do j = isx%beg, isx%end @@ -3753,7 +3753,7 @@ contains end do end do end do - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do i = momxb, momxe do k = isy%beg, isy%end do j = isx%beg, isx%end @@ -3762,7 +3762,7 @@ contains end do end do end do - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do i = momxb, momxe do k = isy%beg, isy%end do j = isx%beg, isx%end @@ -3777,7 +3777,7 @@ contains if (bc_z%end == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at end - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do i = 1, sys_size do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -3788,7 +3788,7 @@ contains end do if (viscous) then - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do i = momxb, momxe do k = isy%beg, isy%end do j = isx%beg, isx%end @@ -3798,7 +3798,7 @@ contains end do end do - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do i = momxb, momxe do k = isy%beg, isy%end do j = isx%beg, isx%end @@ -3808,7 +3808,7 @@ contains end do end do - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do i = momxb, momxe do k = isy%beg, isy%end do j = isx%beg, isx%end @@ -3864,7 +3864,7 @@ contains if (viscous .or. (surface_tension)) then - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = momxb, E_idx do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -3878,7 +3878,7 @@ contains if (qbmm) then - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = 1, 4 do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -3894,7 +3894,7 @@ contains elseif (norm_dir == 2) then if (viscous .or. (surface_tension)) then - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = momxb, E_idx do l = is3%beg, is3%end do j = is1%beg, is1%end @@ -3907,7 +3907,7 @@ contains end if if (qbmm) then - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = 1, 4 do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -3923,7 +3923,7 @@ contains else if (viscous .or. (surface_tension)) then - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = momxb, E_idx do j = is1%beg, is1%end do k = is2%beg, is2%end @@ -3936,7 +3936,7 @@ contains end if if (qbmm) then - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = 1, 4 do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -4002,7 +4002,7 @@ contains integer :: i_vel !!< Loop iterator for velocity components. integer :: idx_rp(3) !!< Indices $(j,k,l)$ of 'right' point for averaging. - $:parallel_loop(collapse=3, private=["idx_rp", "avg_v_int", & + $:PARALLEL_LOOP(collapse=3, private=["idx_rp", "avg_v_int", & "avg_dvdx_int", "avg_dvdy_int", "avg_dvdz_int", "Re_s", "Re_b", & "vel_src_int", "r_eff", "divergence_cyl", "stress_vector_shear", & "stress_normal_bulk", "div_v_term_const"]) @@ -4170,7 +4170,7 @@ contains real(wp) :: divergence_v !< Velocity divergence at interface. - $:parallel_loop(collapse=3, private=["idx_right_phys", "vel_grad_avg", & + $:PARALLEL_LOOP(collapse=3, private=["idx_right_phys", "vel_grad_avg", & "current_tau_shear", "current_tau_bulk", "vel_src_at_interface", & "Re_shear", "Re_bulk", "divergence_v", "i_dim", "vel_comp_idx"]) do l_loop = isz%beg, isz%end @@ -4342,7 +4342,7 @@ contains ! Reshaping Outputted Data in y-direction if (norm_dir == 2) then - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = 1, sys_size do l = is3%beg, is3%end do j = is1%beg, is1%end @@ -4355,7 +4355,7 @@ contains end do if (cyl_coord) then - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = 1, sys_size do l = is3%beg, is3%end do j = is1%beg, is1%end @@ -4368,7 +4368,7 @@ contains end do end if - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = is3%beg, is3%end do j = is1%beg, is1%end do k = is2%beg, is2%end @@ -4379,7 +4379,7 @@ contains end do if (riemann_solver == 1 .or. riemann_solver == 4) then - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = advxb + 1, advxe do l = is3%beg, is3%end do j = is1%beg, is1%end @@ -4394,7 +4394,7 @@ contains end if ! Reshaping Outputted Data in z-direction elseif (norm_dir == 3) then - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = 1, sys_size do j = is1%beg, is1%end do k = is2%beg, is2%end @@ -4407,7 +4407,7 @@ contains end do end do if (grid_geometry == 3) then - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = 1, sys_size do j = is1%beg, is1%end do k = is2%beg, is2%end @@ -4421,7 +4421,7 @@ contains end do end if - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do j = is1%beg, is1%end do k = is2%beg, is2%end do l = is3%beg, is3%end @@ -4432,7 +4432,7 @@ contains end do if (riemann_solver == 1 .or. riemann_solver == 4) then - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = advxb + 1, advxe do j = is1%beg, is1%end do k = is2%beg, is2%end @@ -4446,7 +4446,7 @@ contains end if elseif (norm_dir == 1) then - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = 1, sys_size do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -4458,7 +4458,7 @@ contains end do end do - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = is3%beg, is3%end do k = is2%beg, is2%end do j = is1%beg, is1%end @@ -4469,7 +4469,7 @@ contains end do if (riemann_solver == 1 .or. riemann_solver == 4) then - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = advxb + 1, advxe do l = is3%beg, is3%end do k = is2%beg, is2%end diff --git a/src/simulation/m_surface_tension.fpp b/src/simulation/m_surface_tension.fpp index 549b409ac5..8b4d3d923f 100644 --- a/src/simulation/m_surface_tension.fpp +++ b/src/simulation/m_surface_tension.fpp @@ -86,7 +86,7 @@ contains integer :: j, k, l, i if (id == 1) then - $:parallel_loop(collapse=3, private=["Omega", "w1L", "w2L", "w3L", & + $:PARALLEL_LOOP(collapse=3, private=["Omega", "w1L", "w2L", "w3L", & "w1R", "w2R", "w3R", "w1", "w2", "w3", "normWL", & "normWR", "normW"]) do l = isz%beg, isz%end @@ -133,7 +133,7 @@ contains elseif (id == 2) then - $:parallel_loop(collapse=3, private=["Omega", "w1L", "w2L", "w3L", & + $:PARALLEL_LOOP(collapse=3, private=["Omega", "w1L", "w2L", "w3L", & "w1R", "w2R", "w3R", "w1", "w2", "w3", "normWL", "normWR", & "normW"]) do l = isz%beg, isz%end @@ -180,7 +180,7 @@ contains elseif (id == 3) then - $:parallel_loop(collapse=3, private=["Omega", "w1L", "w2L", "w3L", & + $:PARALLEL_LOOP(collapse=3, private=["Omega", "w1L", "w2L", "w3L", & "w1R", "w2R", "w3R", "w1", "w2", "w3", "normWL", "normWR", & "normW"]) do l = isz%beg, isz%end @@ -244,7 +244,7 @@ contains isx%end = m; isy%end = n; isz%end = p ! compute gradient components - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -254,7 +254,7 @@ contains end do end do - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -265,7 +265,7 @@ contains end do if (p > 0) then - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -276,7 +276,7 @@ contains end do end if - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 5bafcd96ed..7ee08b1223 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -377,7 +377,7 @@ contains if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=1) - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = 1, sys_size do l = 0, p do k = 0, n @@ -392,7 +392,7 @@ contains !Evolve pb and mv for non-polytropic qbmm if (qbmm .and. (.not. polytropic)) then - $:parallel_loop(collapse=5) + $:PARALLEL_LOOP(collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -409,7 +409,7 @@ contains end if if (qbmm .and. (.not. polytropic)) then - $:parallel_loop(collapse=5) + $:PARALLEL_LOOP(collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -479,7 +479,7 @@ contains if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=1) - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = 1, sys_size do l = 0, p do k = 0, n @@ -494,7 +494,7 @@ contains !Evolve pb and mv for non-polytropic qbmm if (qbmm .and. (.not. polytropic)) then - $:parallel_loop(collapse=5) + $:PARALLEL_LOOP(collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -511,7 +511,7 @@ contains end if if (qbmm .and. (.not. polytropic)) then - $:parallel_loop(collapse=5) + $:PARALLEL_LOOP(collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -551,7 +551,7 @@ contains if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=2) - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = 1, sys_size do l = 0, p do k = 0, n @@ -566,7 +566,7 @@ contains end do if (qbmm .and. (.not. polytropic)) then - $:parallel_loop(collapse=5) + $:PARALLEL_LOOP(collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -584,7 +584,7 @@ contains end if if (qbmm .and. (.not. polytropic)) then - $:parallel_loop(collapse=5) + $:PARALLEL_LOOP(collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -661,7 +661,7 @@ contains if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=1) - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = 1, sys_size do l = 0, p do k = 0, n @@ -676,7 +676,7 @@ contains !Evolve pb and mv for non-polytropic qbmm if (qbmm .and. (.not. polytropic)) then - $:parallel_loop(collapse=5) + $:PARALLEL_LOOP(collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -693,7 +693,7 @@ contains end if if (qbmm .and. (.not. polytropic)) then - $:parallel_loop(collapse=5) + $:PARALLEL_LOOP(collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -733,7 +733,7 @@ contains if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=2) - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = 1, sys_size do l = 0, p do k = 0, n @@ -748,7 +748,7 @@ contains end do if (qbmm .and. (.not. polytropic)) then - $:parallel_loop(collapse=5) + $:PARALLEL_LOOP(collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -766,7 +766,7 @@ contains end if if (qbmm .and. (.not. polytropic)) then - $:parallel_loop(collapse=5) + $:PARALLEL_LOOP(collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -806,7 +806,7 @@ contains if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=3) - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = 1, sys_size do l = 0, p do k = 0, n @@ -821,7 +821,7 @@ contains end do if (qbmm .and. (.not. polytropic)) then - $:parallel_loop(collapse=5) + $:PARALLEL_LOOP(collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -839,7 +839,7 @@ contains end if if (qbmm .and. (.not. polytropic)) then - $:parallel_loop(collapse=5) + $:PARALLEL_LOOP(collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -981,7 +981,7 @@ contains idwint, & gm_alpha_qp%vf) - $:parallel_loop(collapse=3, private=["vel", "alpha", "Re"]) + $:PARALLEL_LOOP(collapse=3, private=["vel", "alpha", "Re"]) do l = 0, p do k = 0, n do j = 0, m @@ -1024,7 +1024,7 @@ contains call nvtxStartRange("RHS-BODYFORCES") call s_compute_body_forces_rhs(q_prim_vf, q_cons_vf, rhs_vf) - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = momxb, E_idx do l = 0, p do k = 0, n diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp index db40bbaaf2..244230c01f 100644 --- a/src/simulation/m_viscous.fpp +++ b/src/simulation/m_viscous.fpp @@ -77,7 +77,7 @@ contains !$acc update device(is1_viscous, is2_viscous, is3_viscous) - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg, is1_viscous%end @@ -89,7 +89,7 @@ contains end do end do if (shear_stress) then ! Shear stresses - $:parallel_loop(collapse=3, private=["alpha_visc", & + $:PARALLEL_LOOP(collapse=3, private=["alpha_visc", & "alpha_rho_visc", "Re_visc", "tau_Re"]) do l = is3_viscous%beg, is3_viscous%end do k = -1, 1 @@ -197,7 +197,7 @@ contains end if if (bulk_stress) then ! Bulk stresses - $:parallel_loop(collapse=3, private=["alpha_visc", & + $:PARALLEL_LOOP(collapse=3, private=["alpha_visc", & "alpha_rho_visc", "Re_visc", "tau_Re"]) do l = is3_viscous%beg, is3_viscous%end do k = -1, 1 @@ -302,7 +302,7 @@ contains if (p == 0) return if (shear_stress) then ! Shear stresses - $:parallel_loop(collapse=3, private=["alpha_visc", & + $:PARALLEL_LOOP(collapse=3, private=["alpha_visc", & "alpha_rho_visc", "Re_visc", "tau_Re"]) do l = is3_viscous%beg, is3_viscous%end do k = -1, 1 @@ -411,7 +411,7 @@ contains end if if (bulk_stress) then ! Bulk stresses - $:parallel_loop(collapse=3, private=["alpha_visc", & + $:PARALLEL_LOOP(collapse=3, private=["alpha_visc", & "alpha_rho_visc", "Re_visc", "tau_Re"]) do l = is3_viscous%beg, is3_viscous%end do k = -1, 1 @@ -593,7 +593,7 @@ contains !$acc update device(is1_viscous, is2_viscous, is3_viscous) - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = iy%beg, iy%end do j = is1_viscous%beg + 1, is1_viscous%end @@ -608,7 +608,7 @@ contains end do end do - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg, is1_viscous%end - 1 @@ -625,7 +625,7 @@ contains if (n > 0) then - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end do j = is2_viscous%beg + 1, is2_viscous%end do k = is1_viscous%beg, is1_viscous%end @@ -640,7 +640,7 @@ contains end do end do - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end do j = is2_viscous%beg, is2_viscous%end - 1 do k = is1_viscous%beg, is1_viscous%end @@ -655,7 +655,7 @@ contains end do end do - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end do j = is2_viscous%beg + 1, is2_viscous%end do k = is1_viscous%beg + 1, is1_viscous%end - 1 @@ -674,7 +674,7 @@ contains end do end do - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end do j = is2_viscous%beg, is2_viscous%end - 1 do k = is1_viscous%beg + 1, is1_viscous%end - 1 @@ -694,7 +694,7 @@ contains end do end do - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg + 1, is2_viscous%end - 1 do j = is1_viscous%beg + 1, is1_viscous%end @@ -714,7 +714,7 @@ contains end do end do - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg + 1, is2_viscous%end - 1 do j = is1_viscous%beg, is1_viscous%end - 1 @@ -736,7 +736,7 @@ contains if (p > 0) then - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do j = is3_viscous%beg + 1, is3_viscous%end do l = is2_viscous%beg, is2_viscous%end do k = is1_viscous%beg, is1_viscous%end @@ -752,7 +752,7 @@ contains end do end do - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do j = is3_viscous%beg, is3_viscous%end - 1 do l = is2_viscous%beg, is2_viscous%end do k = is1_viscous%beg, is1_viscous%end @@ -768,7 +768,7 @@ contains end do end do - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg + 1, is3_viscous%end - 1 do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg + 1, is1_viscous%end @@ -789,7 +789,7 @@ contains end do end do - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg + 1, is3_viscous%end - 1 do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg, is1_viscous%end - 1 @@ -810,7 +810,7 @@ contains end do end do - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg + 1, is3_viscous%end - 1 do j = is2_viscous%beg + 1, is2_viscous%end do k = is1_viscous%beg, is1_viscous%end @@ -831,7 +831,7 @@ contains end do end do - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg + 1, is3_viscous%end - 1 do j = is2_viscous%beg, is2_viscous%end - 1 do k = is1_viscous%beg, is1_viscous%end @@ -852,7 +852,7 @@ contains end do end do - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do j = is3_viscous%beg + 1, is3_viscous%end do l = is2_viscous%beg + 1, is2_viscous%end - 1 do k = is1_viscous%beg, is1_viscous%end @@ -873,7 +873,7 @@ contains end do end do - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do j = is3_viscous%beg, is3_viscous%end - 1 do l = is2_viscous%beg + 1, is2_viscous%end - 1 do k = is1_viscous%beg, is1_viscous%end @@ -893,7 +893,7 @@ contains end do end do end do - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do j = is3_viscous%beg + 1, is3_viscous%end do l = is2_viscous%beg, is2_viscous%end do k = is1_viscous%beg + 1, is1_viscous%end - 1 @@ -913,7 +913,7 @@ contains end do end do end do - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do j = is3_viscous%beg, is3_viscous%end - 1 do l = is2_viscous%beg, is2_viscous%end do k = is1_viscous%beg + 1, is1_viscous%end - 1 @@ -1023,7 +1023,7 @@ contains if (viscous) then if (weno_Re_flux) then if (norm_dir == 2) then - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end do l = is3_viscous%beg, is3_viscous%end do j = is1_viscous%beg, is1_viscous%end @@ -1035,7 +1035,7 @@ contains end do end do elseif (norm_dir == 3) then - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end do j = is1_viscous%beg, is1_viscous%end do k = is2_viscous%beg, is2_viscous%end @@ -1047,7 +1047,7 @@ contains end do end do elseif (norm_dir == 1) then - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg, is2_viscous%end @@ -1122,7 +1122,7 @@ contains if (viscous) then if (weno_Re_flux) then if (norm_dir == 2) then - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end do l = is3_viscous%beg, is3_viscous%end do j = is1_viscous%beg, is1_viscous%end @@ -1134,7 +1134,7 @@ contains end do end do elseif (norm_dir == 3) then - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end do j = is1_viscous%beg, is1_viscous%end do k = is2_viscous%beg, is2_viscous%end @@ -1146,7 +1146,7 @@ contains end do end do elseif (norm_dir == 1) then - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg, is2_viscous%end @@ -1210,7 +1210,7 @@ contains ! cell-boundaries, to calculate the cell-averaged first-order ! spatial derivatives inside the cell. - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg + 1, is1_viscous%end - 1 @@ -1238,7 +1238,7 @@ contains ! cell-boundaries, to calculate the cell-averaged first-order ! spatial derivatives inside the cell. - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg + 1, is2_viscous%end - 1 do j = is1_viscous%beg, is1_viscous%end @@ -1266,7 +1266,7 @@ contains ! cell-boundaries, to calculate the cell-averaged first-order ! spatial derivatives inside the cell. - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg + 1, is3_viscous%end - 1 do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg, is1_viscous%end @@ -1321,7 +1321,7 @@ contains !$acc update device(is1_viscous, is2_viscous, is3_viscous) - $:parallel_loop(collapse=3) + $:PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg, is1_viscous%end @@ -1358,7 +1358,7 @@ contains end do end if - $:parallel_loop(collapse=2) + $:PARALLEL_LOOP(collapse=2) do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end grad_x%sf(idwbuff(1)%beg, k, l) = & @@ -1370,7 +1370,7 @@ contains end do end do if (n > 0) then - $:parallel_loop(collapse=2) + $:PARALLEL_LOOP(collapse=2) do l = idwbuff(3)%beg, idwbuff(3)%end do j = idwbuff(1)%beg, idwbuff(1)%end grad_y%sf(j, idwbuff(2)%beg, l) = & @@ -1382,7 +1382,7 @@ contains end do end do if (p > 0) then - $:parallel_loop(collapse=2) + $:PARALLEL_LOOP(collapse=2) do k = idwbuff(2)%beg, idwbuff(2)%end do j = idwbuff(1)%beg, idwbuff(1)%end grad_z%sf(j, k, idwbuff(3)%beg) = & @@ -1397,7 +1397,7 @@ contains end if if (bc_x%beg <= BC_GHOST_EXTRAPOLATION) then - $:parallel_loop(collapse=2) + $:PARALLEL_LOOP(collapse=2) do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end grad_x%sf(0, k, l) = (-3._wp*var%sf(0, k, l) + 4._wp*var%sf(1, k, l) - var%sf(2, k, l))/ & @@ -1406,7 +1406,7 @@ contains end do end if if (bc_x%end <= BC_GHOST_EXTRAPOLATION) then - $:parallel_loop(collapse=2) + $:PARALLEL_LOOP(collapse=2) do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end grad_x%sf(m, k, l) = (3._wp*var%sf(m, k, l) - 4._wp*var%sf(m - 1, k, l) + var%sf(m - 2, k, l))/ & @@ -1416,7 +1416,7 @@ contains end if if (n > 0) then if (bc_y%beg <= BC_GHOST_EXTRAPOLATION .and. bc_y%beg /= BC_NULL) then - $:parallel_loop(collapse=2) + $:PARALLEL_LOOP(collapse=2) do l = idwbuff(3)%beg, idwbuff(3)%end do j = idwbuff(1)%beg, idwbuff(1)%end grad_y%sf(j, 0, l) = (-3._wp*var%sf(j, 0, l) + 4._wp*var%sf(j, 1, l) - var%sf(j, 2, l))/ & @@ -1425,7 +1425,7 @@ contains end do end if if (bc_y%end <= BC_GHOST_EXTRAPOLATION) then - $:parallel_loop(collapse=2) + $:PARALLEL_LOOP(collapse=2) do l = idwbuff(3)%beg, idwbuff(3)%end do j = idwbuff(1)%beg, idwbuff(1)%end grad_y%sf(j, n, l) = (3._wp*var%sf(j, n, l) - 4._wp*var%sf(j, n - 1, l) + var%sf(j, n - 2, l))/ & @@ -1435,7 +1435,7 @@ contains end if if (p > 0) then if (bc_z%beg <= BC_GHOST_EXTRAPOLATION) then - $:parallel_loop(collapse=2) + $:PARALLEL_LOOP(collapse=2) do k = idwbuff(2)%beg, idwbuff(2)%end do j = idwbuff(1)%beg, idwbuff(1)%end grad_z%sf(j, k, 0) = & @@ -1445,7 +1445,7 @@ contains end do end if if (bc_z%end <= BC_GHOST_EXTRAPOLATION) then - $:parallel_loop(collapse=2) + $:PARALLEL_LOOP(collapse=2) do k = idwbuff(2)%beg, idwbuff(2)%end do j = idwbuff(1)%beg, idwbuff(1)%end grad_z%sf(j, k, p) = & diff --git a/src/simulation/m_weno.fpp b/src/simulation/m_weno.fpp index e390192f6b..ad9551e096 100644 --- a/src/simulation/m_weno.fpp +++ b/src/simulation/m_weno.fpp @@ -1138,7 +1138,7 @@ contains !$acc update device(v_size) if (weno_dir == 1) then - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do j = 1, v_size do q = is3_weno%beg, is3_weno%end do l = is2_weno%beg, is2_weno%end @@ -1176,7 +1176,7 @@ contains end if else #endif - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do j = 1, v_size do q = is3_weno%beg, is3_weno%end do l = is2_weno%beg, is2_weno%end @@ -1206,7 +1206,7 @@ contains end block else #endif - $:parallel_loop(collapse=4) + $:PARALLEL_LOOP(collapse=4) do j = 1, v_size do q = is3_weno%beg, is3_weno%end do l = is2_weno%beg, is2_weno%end From 5922cb2cb3c3548c39b1d77d8390b59f97526376 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Thu, 5 Jun 2025 19:32:15 -0400 Subject: [PATCH 11/75] Fixed issues with macro --- src/common/include/macros.fpp | 76 ++++++++++++++++----------- src/common/m_finite_differences.fpp | 4 +- src/common/m_variables_conversion.fpp | 2 +- src/simulation/m_cbc.fpp | 2 +- 4 files changed, 50 insertions(+), 34 deletions(-) diff --git a/src/common/include/macros.fpp b/src/common/include/macros.fpp index 180cf83c71..5e68d30b46 100644 --- a/src/common/include/macros.fpp +++ b/src/common/include/macros.fpp @@ -116,13 +116,13 @@ $:ASSERT_LIST(clause_list, str) #:set clause_str = clause_name + '(' + ', '.join(clause_list) + ') ' #:else - #:set clause_str = "" + #:set clause_str = '' #:endif $:clause_str #:enddef #:def GEN_COPY_STR(copy) - #:set copy_val = GEN_PARENTHESES_CLAUSE("copy", copy) + #:set copy_val = GEN_PARENTHESES_CLAUSE('copy', copy) $:copy_val #:enddef @@ -136,18 +136,18 @@ #:set copyin_val = 'copyin(' + ', '.join(copyin) + ') ' #:endif #:else - #:set copyin_val = "" + #:set copyin_val = '' #:endif $:copyin_val #:enddef #:def GEN_COPYOUT_STR(copyout) - #:set copyout_val = GEN_PARENTHESES_CLAUSE("copyout", copyout) + #:set copyout_val = GEN_PARENTHESES_CLAUSE('copyout', copyout) $:copyout_val #:enddef #:def GEN_CREATE_STR(create) - #:set create_val = GEN_PARENTHESES_CLAUSE("create", create) + #:set create_val = GEN_PARENTHESES_CLAUSE('create', create) $:create_val #:enddef @@ -156,7 +156,7 @@ #:assert isinstance(extraArgs, str) #:set extraArgs_val = extraArgs #:else - #:set extraArgs_val = "" + #:set extraArgs_val = '' #:endif $:extraArgs_val #:enddef @@ -164,36 +164,41 @@ #:def GEN_PARALLELISM_STR(parallelism) #:if parallelism is not None $:ASSERT_LIST(parallelism, str) - #:assert all((element == "gang" or element == "worker" or element == "vector" or element == "seq") for element in parallelism) - #:set parallelism_val = " ".join(parallelism) + " " + #:assert all((element == 'gang' or element == 'worker' or & + & element == 'vector' or element == 'seq') for element in parallelism) + #:set parallelism_val = ' '.join(parallelism) + ' ' #:else - #:set parallelism_val = "" + #:set parallelism_val = '' #:endif + #:set parallelism_val = parallelism_val.strip('\n') $:parallelism_val #:enddef -#:def PARALLEL_LOOP(collapse=None, private=None, parallelism=["gang", "vector"], default="present", firstprivate=None, reduction=None, reductionOp=None, copy=None, copyin=None, copyinReadOnly=False, copyout=None, create=None, extraAccArgs=None) +#:def PARALLEL_LOOP(collapse=None, private=None, parallelism=['gang', 'vector'], & + & default='present', firstprivate=None, reduction=None, reductionOp=None, & + & copy=None, copyin=None, copyinReadOnly=False, copyout=None, create=None, & + & extraAccArgs=None) #:if collapse is not None #:assert isinstance(collapse, int) #:assert collapse > 1 #:set collapse_val = 'collapse(' + str(collapse) + ') ' #:else - #:set collapse_val = "" + #:set collapse_val = '' #:endif - #:set private_val = GEN_PARENTHESES_CLAUSE("private", private) + #:set private_val = GEN_PARENTHESES_CLAUSE('private', private) #:if default is not None #:assert isinstance(default, str) - #:assert (default == "present" or default == "none") + #:assert (default == 'present' or default == 'none') #:set default_val = 'default(' + default + ') ' #:else - #:set default_val = "" + #:set default_val = '' #:endif #:set parallelism_val = GEN_PARALLELISM_STR(parallelism) - #:set firstprivate_val = GEN_PARENTHESES_CLAUSE("firstprivate", firstprivate) + #:set firstprivate_val = GEN_PARENTHESES_CLAUSE('firstprivate', firstprivate) #:if reduction is not None and reductionOp is not None #:if isinstance(reduction, list) and isinstance(reductionOp, list) @@ -203,18 +208,18 @@ #:assert all(type(element) == str for sublist in reduction for element in sublist) #:assert len(reduction) == len(reductionOp) #:set reduction_list = ['reduction(' + op + ':' + ', '.join(red) + ') ' for (red, op) in zip(reduction, reductionOp)] - #:set reduction_val = " ". join(reduction_list) + " " + #:set reduction_val = ' '. join(reduction_list) + ' ' #:elif isinstance(reduction, list) and isinstance(reductionOp, str) $:ASSERT_LIST(reduction, str) #:assert isinstance(reductionOp, str) #:set reduction_val = 'reduction(' + reductionOp + ':' + ', '.join(reduction) + ') ' #:else - #:stop "Invalid datatypes for reduction or reductionOp. Must be list of lists and lists or list and str respectively" + #:stop 'Invalid datatypes for reduction or reductionOp. Must be list of lists and lists or list and str respectively' #:endif #:elif reduction is not None or reductionOp is not None - #:stop "Cannot set the reduction list or reduction operation without setting the other" + #:stop 'Cannot set the reduction list or reduction operation without setting the other' #:else - #:set reduction_val = "" + #:set reduction_val = '' #:endif #:set copy_val = GEN_COPY_STR(copy) @@ -227,25 +232,31 @@ #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) - #:set clause_val = collapse_val + parallelism_val + default_val + private_val + firstprivate_val + reduction_val + copy_val + copyin_val + copyout_val + create_val - #:set acc_directive = '!$acc parallel loop ' + clause_val + extraAccArgs_val + #:set clause_val = collapse_val.strip('\n') + parallelism_val.strip('\n') + & + & default_val.strip('\n') + private_val.strip('\n') + & + & firstprivate_val.strip('\n') + reduction_val.strip('\n') + & + & copy_val.strip('\n') + copyin_val.strip('\n') + & + & copyout_val.strip('\n') + create_val.strip('\n') + #:set acc_directive = '!$acc parallel loop ' + & + & clause_val + extraAccArgs_val.strip('\n') $:acc_directive #:enddef -#:def routine(parallelism=["seq"], nohost=False, extraAccArgs=None) +#:def routine(parallelism=['seq'], nohost=False, extraAccArgs=None) #:set parallelism_val = GEN_PARALLELISM_STR(parallelism) #:assert isinstance(nohost, bool) #:if nohost == True - #:set nohost_val = "nohost" + #:set nohost_val = 'nohost' #:else - #:set nohost_val = "" + #:set nohost_val = '' #:endif #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) - #:set clause_val = parallelism_val + nohost_val - #:set acc_directive = '!$acc routine ' + clause_val + extraAccArgs_val + #:set clause_val = parallelism_val.strip('\n') + nohost_val.strip('\n') + #:set acc_directive = '!$acc routine ' + & + clause_val + extraAccArgs_val.strip('\n') $:acc_directive #:enddef @@ -260,15 +271,18 @@ #:set create_val = GEN_CREATE_STR(create) - #:set present_val = GEN_PARENTHESES_CLAUSE("present", present) + #:set present_val = GEN_PARENTHESES_CLAUSE('present', present) - #:set deviceptr_val = GEN_PARENTHESES_CLAUSE("deviceptr", deviceptr) + #:set deviceptr_val = GEN_PARENTHESES_CLAUSE('deviceptr', deviceptr) - #:set link_val = GEN_PARENTHESES_CLAUSE("link", link) + #:set link_val = GEN_PARENTHESES_CLAUSE('link', link) #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) - #:set clause_val = copy_val + copyin_val + copyout_val + create_val + present_val + deviceptr_val + link_val - #:set acc_directive = '!$acc declare ' + clause_val + extraAccArgs_val + #:set clause_val = copy_val.strip('\n') + copyin_val.strip('\n') + & + & copyout_val.strip('\n') + create_val.strip('\n') + & + & present_val.strip('\n') + deviceptr_val.strip('\n') + & + & link_val.strip('\n') + #:set acc_directive = '!$acc declare ' + clause_val + extraAccArgs_val.strip('\n') $:acc_directive #:enddef diff --git a/src/common/m_finite_differences.fpp b/src/common/m_finite_differences.fpp index d8f3c0f54a..ab3f331aad 100644 --- a/src/common/m_finite_differences.fpp +++ b/src/common/m_finite_differences.fpp @@ -1,3 +1,5 @@ +#:include 'macros.fpp' + module m_finite_differences use m_global_parameters @@ -16,7 +18,7 @@ contains real(wp) :: divergence - !$acc parallel loop collapse(3) private(divergence) + $:PARALLEL_LOOP(collapse=3, private=["divergence"]) do x = ix_s%beg, ix_s%end do y = iy_s%beg, iy_s%end do z = iz_s%beg, iz_s%end diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp index 0f84d45222..c6c662ce81 100644 --- a/src/common/m_variables_conversion.fpp +++ b/src/common/m_variables_conversion.fpp @@ -885,7 +885,7 @@ contains $:PARALLEL_LOOP(collapse=3, private=["alpha_K", "alpha_rho_K", "Re_K", & "nRtmp", "rho_K", "gamma_K", "pi_inf_K", "qv_K", & - "dyn_pres_K", "rhoYks", "B"]) + "dyn_pres_K", "rhoYks", "B"]) do l = ibounds(3)%beg, ibounds(3)%end do k = ibounds(2)%beg, ibounds(2)%end do j = ibounds(1)%beg, ibounds(1)%end diff --git a/src/simulation/m_cbc.fpp b/src/simulation/m_cbc.fpp index 0235f71584..ebeef9485d 100644 --- a/src/simulation/m_cbc.fpp +++ b/src/simulation/m_cbc.fpp @@ -774,7 +774,7 @@ contains ! FD2 or FD4 of RHS at j = 0 $:PARALLEL_LOOP(collapse=2, private=["alpha_rho", "vel", "adv", & "mf", "dvel_ds", "dadv_ds", "Re_cbc", "dalpha_rho_ds","dvel_dt", & - "dadv_dt", "dalpha_rho_dt", "L", "lambda", "Ys", "dYs_dt", & + "dadv_dt", "dalpha_rho_dt", "L", "lambda", "Ys", "dYs_dt", & "dYs_ds", "h_k", "Cp_i", "Gamma_i", "Xs"]) do r = is3%beg, is3%end do k = is2%beg, is2%end From d59e15e2d8e39032e624f81c4719d092ee0c87b5 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Mon, 9 Jun 2025 14:19:56 -0400 Subject: [PATCH 12/75] Added loop macro --- src/common/include/macros.fpp | 62 ++++++++++++++++++++++++++++++++--- 1 file changed, 57 insertions(+), 5 deletions(-) diff --git a/src/common/include/macros.fpp b/src/common/include/macros.fpp index 5e68d30b46..a2b141c77f 100644 --- a/src/common/include/macros.fpp +++ b/src/common/include/macros.fpp @@ -242,7 +242,7 @@ $:acc_directive #:enddef -#:def routine(parallelism=['seq'], nohost=False, extraAccArgs=None) +#:def ROUTINE(parallelism=['seq'], nohost=False, extraAccArgs=None) #:set parallelism_val = GEN_PARALLELISM_STR(parallelism) #:assert isinstance(nohost, bool) @@ -256,13 +256,11 @@ #:set clause_val = parallelism_val.strip('\n') + nohost_val.strip('\n') #:set acc_directive = '!$acc routine ' + & - clause_val + extraAccArgs_val.strip('\n') + & clause_val + extraAccArgs_val.strip('\n') $:acc_directive #:enddef - - -#:def declare(copy=None, copyin=None, copyinReadOnly=False, copyout=None, create=None, present=None, deviceptr=None, link=None, extraAccArgs=None) +#:def DECLARE(copy=None, copyin=None, copyinReadOnly=False, copyout=None, create=None, present=None, deviceptr=None, link=None, extraAccArgs=None) #:set copy_val = GEN_COPY_STR(copy) #:set copyin_val = GEN_COPYIN_STR(copyin, copyinReadOnly) @@ -286,3 +284,57 @@ #:set acc_directive = '!$acc declare ' + clause_val + extraAccArgs_val.strip('\n') $:acc_directive #:enddef + +#:def LOOP(collapse=None, parallelism=["seq"], data_dependency=None, reduction_list=None, reductionOp=None, private=None, extraAccArgs=None) + #:if collapse is not None + #:assert isinstance(collapse, int) + #:assert collapse > 1 + #:set collapse_val = 'collapse(' + str(collapse) + ') ' + #:else + #:set collapse_val = '' + #:endif + + #:set parallelism_val = GEN_PARALLELISM_STR(parallelism) + + + #:if data_dependency is not None + #:assert isinstance(data_dependency, str) + #:assert (data_dependency == 'auto' or data_dependency == 'independent') + #:set data_dependency_val = data_dependency + #:else + #:set data_dependency_val = '' + #:endif + + #:set private_val = GEN_PARENTHESES_CLAUSE('private', private) + + #:if reduction is not None and reductionOp is not None + #:if isinstance(reduction, list) and isinstance(reductionOp, list) + $:ASSERT_LIST(reduction, list) + $:ASSERT_LIST(reductionOp, str) + #:assert all(len(element) != 0 for element in reduction) + #:assert all(type(element) == str for sublist in reduction for element in sublist) + #:assert len(reduction) == len(reductionOp) + #:set reduction_list = ['reduction(' + op + ':' + ', '.join(red) + ') ' for (red, op) in zip(reduction, reductionOp)] + #:set reduction_val = ' '. join(reduction_list) + ' ' + #:elif isinstance(reduction, list) and isinstance(reductionOp, str) + $:ASSERT_LIST(reduction, str) + #:assert isinstance(reductionOp, str) + #:set reduction_val = 'reduction(' + reductionOp + ':' + ', '.join(reduction) + ') ' + #:else + #:stop 'Invalid datatypes for reduction or reductionOp. Must be list of lists and lists or list and str respectively' + #:endif + #:elif reduction is not None or reductionOp is not None + #:stop 'Cannot set the reduction list or reduction operation without setting the other' + #:else + #:set reduction_val = '' + #:endif + + #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) + + #:set clause_val = collapse_val.strip('\n') + parallelism_val.strip('\n') + & + & data_dependency_val.strip('\n') + private_val.strip('\n') + & + & reduction_val.strip('\n') + #:set acc_directive = '!$acc serial ' + & + & clause_val + extraAccArgs_val.strip('\n') + $:acc_directive +#:enddef \ No newline at end of file From ef035a84ef605b0df97dbe7c1fc2caf34a806a64 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Tue, 10 Jun 2025 18:06:41 -0400 Subject: [PATCH 13/75] Added rest of directive macros --- src/common/include/macros.fpp | 271 ++++++++++++++++++++++++++-------- 1 file changed, 213 insertions(+), 58 deletions(-) diff --git a/src/common/include/macros.fpp b/src/common/include/macros.fpp index a2b141c77f..d79960d87f 100644 --- a/src/common/include/macros.fpp +++ b/src/common/include/macros.fpp @@ -121,16 +121,26 @@ $:clause_str #:enddef +#:def GEN_PRIVATE_STR(private, initalized_values) + #:assert isinstance(initalized_values, bool) + #:if initalized_values == True + #:set private_val = GEN_PARENTHESES_CLAUSE('firstprivate', private) + #:else + #:set private_val = GEN_PARENTHESES_CLAUSE('private', private) + #:endif + $:private_val +#:enddef + #:def GEN_COPY_STR(copy) #:set copy_val = GEN_PARENTHESES_CLAUSE('copy', copy) $:copy_val #:enddef -#:def GEN_COPYIN_STR(copyin, copyinReadOnly) +#:def GEN_COPYIN_STR(copyin, readonly) + #:assert isinstance(readonly, bool) #:if copyin is not None $:ASSERT_LIST(copyin, str) - #:assert isinstance(copyinReadOnly, bool) - #:if copyinReadOnly == True + #:if readonly == True #:set copyin_val = 'copyin(readonly:' + ', '.join(copyin) + ') ' #:else #:set copyin_val = 'copyin(' + ', '.join(copyin) + ') ' @@ -151,6 +161,41 @@ $:create_val #:enddef +#:def GEN_NOCREATE_STR(no_create) + #:set nocreate_val = GEN_PARENTHESES_CLAUSE('no_create', no_create) + $:nocreate_val +#:enddef + +#:def GEN_DELETE_STR(delete) + #:set delete_val = GEN_PARENTHESES_CLAUSE('delete', delete) + $:delete_val +#:enddef + +#:def GEN_PRESENT_STR(present) + #:set present_val = GEN_PARENTHESES_CLAUSE('present', present) + $:present_val +#:enddef + +#:def GEN_DEVICEPTR_STR(deviceptr) + #:set deviceptr_val = GEN_PARENTHESES_CLAUSE('deviceptr', deviceptr) + $:deviceptr_val +#:enddef + +#:def GEN_ATTACH_STR(attach) + #:set attach_val = GEN_PARENTHESES_CLAUSE('attach', attach) + $:attach_val +#:enddef + +#:def GEN_DETACH_STR(detach) + #:set detach_val = GEN_PARENTHESES_CLAUSE('detach', detach) + $:detach_val +#:enddef + +#:def GEN_LINK_STR(link) + #:set link_val = GEN_PARENTHESES_CLAUSE('link', link) + $:link_val +#:enddef + #:def GEN_EXTRA_ARGS_STR(extraArgs) #:if extraArgs is not None #:assert isinstance(extraArgs, str) @@ -170,14 +215,10 @@ #:else #:set parallelism_val = '' #:endif - #:set parallelism_val = parallelism_val.strip('\n') $:parallelism_val #:enddef -#:def PARALLEL_LOOP(collapse=None, private=None, parallelism=['gang', 'vector'], & - & default='present', firstprivate=None, reduction=None, reductionOp=None, & - & copy=None, copyin=None, copyinReadOnly=False, copyout=None, create=None, & - & extraAccArgs=None) +#:def GEN_COLLAPSE_STR(collapse) #:if collapse is not None #:assert isinstance(collapse, int) #:assert collapse > 1 @@ -185,9 +226,10 @@ #:else #:set collapse_val = '' #:endif + $:collapse_val +#:enddef - #:set private_val = GEN_PARENTHESES_CLAUSE('private', private) - +#:def GEN_DEFAULT_STR(default) #:if default is not None #:assert isinstance(default, str) #:assert (default == 'present' or default == 'none') @@ -195,17 +237,16 @@ #:else #:set default_val = '' #:endif + $:default_val +#:enddef - #:set parallelism_val = GEN_PARALLELISM_STR(parallelism) - - #:set firstprivate_val = GEN_PARENTHESES_CLAUSE('firstprivate', firstprivate) - +#:def GEN_REDUCTION_STR(reduction, reductionOp) #:if reduction is not None and reductionOp is not None #:if isinstance(reduction, list) and isinstance(reductionOp, list) $:ASSERT_LIST(reduction, list) $:ASSERT_LIST(reductionOp, str) #:assert all(len(element) != 0 for element in reduction) - #:assert all(type(element) == str for sublist in reduction for element in sublist) + #:assert all(isinstance(element, str) for sublist in reduction for element in sublist) #:assert len(reduction) == len(reductionOp) #:set reduction_list = ['reduction(' + op + ':' + ', '.join(red) + ') ' for (red, op) in zip(reduction, reductionOp)] #:set reduction_val = ' '. join(reduction_list) + ' ' @@ -221,22 +262,59 @@ #:else #:set reduction_val = '' #:endif + $:reduction_val +#:enddef - #:set copy_val = GEN_COPY_STR(copy) +#:def GEN_HOST_STR(host) + #:set host_val = GEN_PARENTHESES_CLAUSE('host', host) + $:host_val +#:enddef + +#:def GEN_DEVICE_STR(device) + #:set device_val = GEN_PARENTHESES_CLAUSE('device', device) + $:device_val +#:enddef + + +#:def PARALLEL_LOOP(collapse=None, private=None, parallelism=['gang', 'vector'], & + & default='present', firstprivate=None, reduction=None, reductionOp=None, & + & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, & + & no_create=None, present=None, deviceptr=None, attach=None, extraAccArgs=None) + + #:set collapse_val = GEN_COLLAPSE_STR(collapse) + + #:set parallelism_val = GEN_PARALLELISM_STR(parallelism) + + #:set default_val = GEN_DEFAULT_STR(default) - #:set copyin_val = GEN_COPYIN_STR(copyin, copyinReadOnly) + #:set private_val = GEN_PRIVATE_STR(private, False).strip('\n') + GEN_PRIVATE_STR(firstprivate, True).strip('\n') + #:set reduction_val = GEN_REDUCTION_STR(reduction, reductionOp) + + #:set copy_val = GEN_COPY_STR(copy) + + #:set copyin_val = GEN_COPYIN_STR(copyin, False).strip('\n') + GEN_COPYIN_STR(copyinReadOnly, True).strip('\n') + #:set copyout_val = GEN_COPYOUT_STR(copyout) #:set create_val = GEN_CREATE_STR(create) + #:set no_create_val = GEN_NOCREATE_STR(no_create) + + #:set present_val = GEN_PRESENT_STR(present) + + #:set deviceptr_val = GEN_DEVICEPTR_STR(deviceptr) + + #:set attach_val = GEN_ATTACH_STR(attach) + #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) #:set clause_val = collapse_val.strip('\n') + parallelism_val.strip('\n') + & - & default_val.strip('\n') + private_val.strip('\n') + & - & firstprivate_val.strip('\n') + reduction_val.strip('\n') + & + & default_val.strip('\n') + private_val.strip('\n') + reduction_val.strip('\n') + & & copy_val.strip('\n') + copyin_val.strip('\n') + & - & copyout_val.strip('\n') + create_val.strip('\n') + & copyout_val.strip('\n') + create_val.strip('\n') + & + & no_create_val.strip('\n') + present_val.strip('\n') + & + & deviceptr_val.strip('\n') + attach_val.strip('\n') #:set acc_directive = '!$acc parallel loop ' + & & clause_val + extraAccArgs_val.strip('\n') $:acc_directive @@ -260,20 +338,21 @@ $:acc_directive #:enddef -#:def DECLARE(copy=None, copyin=None, copyinReadOnly=False, copyout=None, create=None, present=None, deviceptr=None, link=None, extraAccArgs=None) +#:def DECLARE(copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, present=None, deviceptr=None, link=None, extraAccArgs=None) + #:set copy_val = GEN_COPY_STR(copy) - #:set copyin_val = GEN_COPYIN_STR(copyin, copyinReadOnly) + #:set copyin_val = GEN_COPYIN_STR(copyin, False).strip('\n') + GEN_COPYIN_STR(copyinReadOnly, True).strip('\n') #:set copyout_val = GEN_COPYOUT_STR(copyout) #:set create_val = GEN_CREATE_STR(create) - #:set present_val = GEN_PARENTHESES_CLAUSE('present', present) + #:set present_val = GEN_PRESENT_STR(present) - #:set deviceptr_val = GEN_PARENTHESES_CLAUSE('deviceptr', deviceptr) + #:set deviceptr_val = GEN_DEVICEPTR_STR(deviceptr) - #:set link_val = GEN_PARENTHESES_CLAUSE('link', link) + #:set link_val = GEN_LINK_STR(link) #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) @@ -285,17 +364,11 @@ $:acc_directive #:enddef -#:def LOOP(collapse=None, parallelism=["seq"], data_dependency=None, reduction_list=None, reductionOp=None, private=None, extraAccArgs=None) - #:if collapse is not None - #:assert isinstance(collapse, int) - #:assert collapse > 1 - #:set collapse_val = 'collapse(' + str(collapse) + ') ' - #:else - #:set collapse_val = '' - #:endif +#:def LOOP(collapse=None, parallelism=["seq"], data_dependency=None, reduction=None, reductionOp=None, private=None, extraAccArgs=None) - #:set parallelism_val = GEN_PARALLELISM_STR(parallelism) + #:set collapse_val = GEN_COLLAPSE_STR(collapse) + #:set parallelism_val = GEN_PARALLELISM_STR(parallelism) #:if data_dependency is not None #:assert isinstance(data_dependency, str) @@ -305,36 +378,118 @@ #:set data_dependency_val = '' #:endif - #:set private_val = GEN_PARENTHESES_CLAUSE('private', private) + #:set private_val = GEN_PRIVATE_STR(private, False) - #:if reduction is not None and reductionOp is not None - #:if isinstance(reduction, list) and isinstance(reductionOp, list) - $:ASSERT_LIST(reduction, list) - $:ASSERT_LIST(reductionOp, str) - #:assert all(len(element) != 0 for element in reduction) - #:assert all(type(element) == str for sublist in reduction for element in sublist) - #:assert len(reduction) == len(reductionOp) - #:set reduction_list = ['reduction(' + op + ':' + ', '.join(red) + ') ' for (red, op) in zip(reduction, reductionOp)] - #:set reduction_val = ' '. join(reduction_list) + ' ' - #:elif isinstance(reduction, list) and isinstance(reductionOp, str) - $:ASSERT_LIST(reduction, str) - #:assert isinstance(reductionOp, str) - #:set reduction_val = 'reduction(' + reductionOp + ':' + ', '.join(reduction) + ') ' - #:else - #:stop 'Invalid datatypes for reduction or reductionOp. Must be list of lists and lists or list and str respectively' - #:endif - #:elif reduction is not None or reductionOp is not None - #:stop 'Cannot set the reduction list or reduction operation without setting the other' - #:else - #:set reduction_val = '' - #:endif + #:set reduction_val = GEN_REDUCTION_STR(reduction, reductionOp) #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) #:set clause_val = collapse_val.strip('\n') + parallelism_val.strip('\n') + & & data_dependency_val.strip('\n') + private_val.strip('\n') + & & reduction_val.strip('\n') - #:set acc_directive = '!$acc serial ' + & + #:set acc_directive = '!$acc loop ' + & & clause_val + extraAccArgs_val.strip('\n') $:acc_directive -#:enddef \ No newline at end of file +#:enddef + +#:def DATA(copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, no_create=None, present=None, deviceptr=None, attach=None, default=None, extraAccArgs=None) + + #:set copy_val = GEN_COPY_STR(copy) + + #:set copyin_val = GEN_COPYIN_STR(copyin, False).strip('\n') + GEN_COPYIN_STR(copyinReadOnly, True).strip('\n') + + #:set copyout_val = GEN_COPYOUT_STR(copyout) + + #:set create_val = GEN_CREATE_STR(create) + + #:set no_create_val = GEN_NOCREATE_STR(no_create) + + #:set present_val = GEN_PRESENT_STR(present) + + #:set deviceptr_val = GEN_DEVICEPTR_STR(deviceptr) + + #:set attach_val = GEN_ATTACH_STR(attach) + + #:set default_val = GEN_DEFAULT_STR(default) + + #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) + + #:set clause_val = copy_val.strip('\n') + copyin_val.strip('\n') + & + & copyout_val.strip('\n') + create_val.strip('\n') + & + & no_create_val.strip('\n') + present_val.strip('\n') + & + & deviceptr_val.strip('\n') + attach_val.strip('\n') + & + & default_val.strip('\n') + #:set acc_directive = '!$acc data ' + clause_val + extraAccArgs_val.strip('\n') + $:acc_directive +#:enddef + +#:def ENTER_DATA(copyin=None, copyinReadOnly=None, create=None, attach=None, extraAccArgs=None) + #:set copyin_val = GEN_COPYIN_STR(copyin, False).strip('\n') + GEN_COPYIN_STR(copyinReadOnly, True).strip('\n') + + #:set create_val = GEN_CREATE_STR(create) + + #:set attach_val = GEN_ATTACH_STR(attach) + + #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) + + #:set clause_val = copyin_val.strip('\n') + create_val.strip('\n') + attach_val.strip('\n') + #:set acc_directive = '!$acc enter data ' + clause_val + extraAccArgs_val.strip('\n') + $:acc_directive +#:enddef + +#:def EXIT_DATA(copyout=None, delete=None, detach=None, extraAccArgs=None) + #:set copyout_val = GEN_COPYOUT_STR(copyout) + + #:set delete_val = GEN_DELETE_STR(delete) + + #:set detach_val = GEN_DETACH_STR(detach) + + #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) + + #:set clause_val = copyout_val.strip('\n') + delete_val.strip('\n') + detach_val.strip('\n') + #:set acc_directive = '!$acc exit data ' + clause_val + extraAccArgs_val.strip('\n') + $:acc_directive +#:enddef + +#:def CACHE(cache, extraAccArgs=None) + #:set cache_val = GEN_PARENTHESES_CLAUSE('cache', cache) + + #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) + + #:set clause_val = cache_val.strip('\n') + #:set acc_directive = '!$acc ' + clause_val + extraAccArgs_val.strip('\n') + $:acc_directive +#:enddef + +#:def ATOMIC(atomic='update', extraAccArgs=None) + #:assert isinstance(atomic, str) + #:assert (atomic == 'read' or atomic == 'write' or atomic == 'update' or atomic == 'capture') + + #:set atomic_val = atomic + + #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) + + #:set clause_val = atomic_val.strip('\n') + #:set acc_directive = '!$acc atomic ' + clause_val + extraAccArgs_val.strip('\n') + $:acc_directive +#:enddef + +#:def UPDATE(host=None, device=None, extraAccArgs=None) + #:set host_val = GEN_HOST_STR(host) + + #:set device_val = GEN_DEVICE_STR(device) + + #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) + + #:set clause_val = host_val.strip('\n') + device_val.strip('\n') + #:set acc_directive = '!$acc update ' + clause_val + extraAccArgs_val.strip('\n') + $:acc_directive +#:enddef + +#:def WAIT(host=None, device=None, extraAccArgs=None) + #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) + + #:set clause_val = host_val.strip('\n') + device_val.strip('\n') + #:set acc_directive = '!$acc update ' + clause_val + extraAccArgs_val.strip('\n') + $:acc_directive +#:enddef From c4332d813f377f1533a5611cd75ce707446a65d6 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Tue, 10 Jun 2025 19:56:31 -0400 Subject: [PATCH 14/75] Moved directive macros to seperate file --- src/common/include/directive_macros.fpp | 391 +++++++++++++++++++++++ src/common/include/macros.fpp | 392 +----------------------- src/common/m_boundary_common.fpp | 1 + src/common/m_chemistry.fpp | 1 + src/common/m_finite_differences.fpp | 1 + src/common/m_phase_change.fpp | 1 + src/common/m_variables_conversion.fpp | 1 + src/simulation/m_acoustic_src.fpp | 1 + src/simulation/m_body_forces.fpp | 1 + src/simulation/m_bubbles_EE.fpp | 1 + src/simulation/m_bubbles_EL.fpp | 1 + src/simulation/m_cbc.fpp | 1 + src/simulation/m_data_output.fpp | 1 + src/simulation/m_fftw.fpp | 1 + src/simulation/m_hyperelastic.fpp | 1 + src/simulation/m_hypoelastic.fpp | 1 + src/simulation/m_mhd.fpp | 1 + src/simulation/m_qbmm.fpp | 1 + src/simulation/m_rhs.fpp | 1 + src/simulation/m_riemann_solvers.fpp | 1 + src/simulation/m_surface_tension.fpp | 1 + src/simulation/m_time_steppers.fpp | 1 + src/simulation/m_viscous.fpp | 1 + src/simulation/m_weno.fpp | 1 + 24 files changed, 414 insertions(+), 391 deletions(-) create mode 100644 src/common/include/directive_macros.fpp diff --git a/src/common/include/directive_macros.fpp b/src/common/include/directive_macros.fpp new file mode 100644 index 0000000000..2760fd1e9e --- /dev/null +++ b/src/common/include/directive_macros.fpp @@ -0,0 +1,391 @@ +#:def ASSERT_LIST(data, datatype) + #:assert data is not None + #:assert isinstance(data, list) + #:assert len(data) != 0 + #:assert all(isinstance(element, datatype) for element in data) +#:enddef + +#:def GEN_PARENTHESES_CLAUSE(clause_name, clause_list) + #:assert isinstance(clause_name, str) + #:if clause_list is not None + $:ASSERT_LIST(clause_list, str) + #:set clause_str = clause_name + '(' + ', '.join(clause_list) + ') ' + #:else + #:set clause_str = '' + #:endif + $:clause_str +#:enddef + +#:def GEN_PRIVATE_STR(private, initalized_values) + #:assert isinstance(initalized_values, bool) + #:if initalized_values == True + #:set private_val = GEN_PARENTHESES_CLAUSE('firstprivate', private) + #:else + #:set private_val = GEN_PARENTHESES_CLAUSE('private', private) + #:endif + $:private_val +#:enddef + +#:def GEN_COPY_STR(copy) + #:set copy_val = GEN_PARENTHESES_CLAUSE('copy', copy) + $:copy_val +#:enddef + +#:def GEN_COPYIN_STR(copyin, readonly) + #:assert isinstance(readonly, bool) + #:if copyin is not None + $:ASSERT_LIST(copyin, str) + #:if readonly == True + #:set copyin_val = 'copyin(readonly:' + ', '.join(copyin) + ') ' + #:else + #:set copyin_val = 'copyin(' + ', '.join(copyin) + ') ' + #:endif + #:else + #:set copyin_val = '' + #:endif + $:copyin_val +#:enddef + +#:def GEN_COPYOUT_STR(copyout) + #:set copyout_val = GEN_PARENTHESES_CLAUSE('copyout', copyout) + $:copyout_val +#:enddef + +#:def GEN_CREATE_STR(create) + #:set create_val = GEN_PARENTHESES_CLAUSE('create', create) + $:create_val +#:enddef + +#:def GEN_NOCREATE_STR(no_create) + #:set nocreate_val = GEN_PARENTHESES_CLAUSE('no_create', no_create) + $:nocreate_val +#:enddef + +#:def GEN_DELETE_STR(delete) + #:set delete_val = GEN_PARENTHESES_CLAUSE('delete', delete) + $:delete_val +#:enddef + +#:def GEN_PRESENT_STR(present) + #:set present_val = GEN_PARENTHESES_CLAUSE('present', present) + $:present_val +#:enddef + +#:def GEN_DEVICEPTR_STR(deviceptr) + #:set deviceptr_val = GEN_PARENTHESES_CLAUSE('deviceptr', deviceptr) + $:deviceptr_val +#:enddef + +#:def GEN_ATTACH_STR(attach) + #:set attach_val = GEN_PARENTHESES_CLAUSE('attach', attach) + $:attach_val +#:enddef + +#:def GEN_DETACH_STR(detach) + #:set detach_val = GEN_PARENTHESES_CLAUSE('detach', detach) + $:detach_val +#:enddef + +#:def GEN_LINK_STR(link) + #:set link_val = GEN_PARENTHESES_CLAUSE('link', link) + $:link_val +#:enddef + +#:def GEN_EXTRA_ARGS_STR(extraArgs) + #:if extraArgs is not None + #:assert isinstance(extraArgs, str) + #:set extraArgs_val = extraArgs + #:else + #:set extraArgs_val = '' + #:endif + $:extraArgs_val +#:enddef + +#:def GEN_PARALLELISM_STR(parallelism) + #:if parallelism is not None + $:ASSERT_LIST(parallelism, str) + #:assert all((element == 'gang' or element == 'worker' or & + & element == 'vector' or element == 'seq') for element in parallelism) + #:set parallelism_val = ' '.join(parallelism) + ' ' + #:else + #:set parallelism_val = '' + #:endif + $:parallelism_val +#:enddef + +#:def GEN_COLLAPSE_STR(collapse) + #:if collapse is not None + #:assert isinstance(collapse, int) + #:assert collapse > 1 + #:set collapse_val = 'collapse(' + str(collapse) + ') ' + #:else + #:set collapse_val = '' + #:endif + $:collapse_val +#:enddef + +#:def GEN_DEFAULT_STR(default) + #:if default is not None + #:assert isinstance(default, str) + #:assert (default == 'present' or default == 'none') + #:set default_val = 'default(' + default + ') ' + #:else + #:set default_val = '' + #:endif + $:default_val +#:enddef + +#:def GEN_REDUCTION_STR(reduction, reductionOp) + #:if reduction is not None and reductionOp is not None + #:if isinstance(reduction, list) and isinstance(reductionOp, list) + $:ASSERT_LIST(reduction, list) + $:ASSERT_LIST(reductionOp, str) + #:assert all(len(element) != 0 for element in reduction) + #:assert all(isinstance(element, str) for sublist in reduction for element in sublist) + #:assert len(reduction) == len(reductionOp) + #:set reduction_list = ['reduction(' + op + ':' + ', '.join(red) + ') ' for (red, op) in zip(reduction, reductionOp)] + #:set reduction_val = ' '. join(reduction_list) + ' ' + #:elif isinstance(reduction, list) and isinstance(reductionOp, str) + $:ASSERT_LIST(reduction, str) + #:assert isinstance(reductionOp, str) + #:set reduction_val = 'reduction(' + reductionOp + ':' + ', '.join(reduction) + ') ' + #:else + #:stop 'Invalid datatypes for reduction or reductionOp. Must be list of lists and lists or list and str respectively' + #:endif + #:elif reduction is not None or reductionOp is not None + #:stop 'Cannot set the reduction list or reduction operation without setting the other' + #:else + #:set reduction_val = '' + #:endif + $:reduction_val +#:enddef + +#:def GEN_HOST_STR(host) + #:set host_val = GEN_PARENTHESES_CLAUSE('host', host) + $:host_val +#:enddef + +#:def GEN_DEVICE_STR(device) + #:set device_val = GEN_PARENTHESES_CLAUSE('device', device) + $:device_val +#:enddef + + +#:def PARALLEL_LOOP(collapse=None, private=None, parallelism=['gang', 'vector'], & + & default='present', firstprivate=None, reduction=None, reductionOp=None, & + & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, & + & no_create=None, present=None, deviceptr=None, attach=None, extraAccArgs=None) + + #:set collapse_val = GEN_COLLAPSE_STR(collapse) + + #:set parallelism_val = GEN_PARALLELISM_STR(parallelism) + + #:set default_val = GEN_DEFAULT_STR(default) + + #:set private_val = GEN_PRIVATE_STR(private, False).strip('\n') + GEN_PRIVATE_STR(firstprivate, True).strip('\n') + + #:set reduction_val = GEN_REDUCTION_STR(reduction, reductionOp) + + #:set copy_val = GEN_COPY_STR(copy) + + #:set copyin_val = GEN_COPYIN_STR(copyin, False).strip('\n') + GEN_COPYIN_STR(copyinReadOnly, True).strip('\n') + + #:set copyout_val = GEN_COPYOUT_STR(copyout) + + #:set create_val = GEN_CREATE_STR(create) + + #:set no_create_val = GEN_NOCREATE_STR(no_create) + + #:set present_val = GEN_PRESENT_STR(present) + + #:set deviceptr_val = GEN_DEVICEPTR_STR(deviceptr) + + #:set attach_val = GEN_ATTACH_STR(attach) + + #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) + + #:set clause_val = collapse_val.strip('\n') + parallelism_val.strip('\n') + & + & default_val.strip('\n') + private_val.strip('\n') + reduction_val.strip('\n') + & + & copy_val.strip('\n') + copyin_val.strip('\n') + & + & copyout_val.strip('\n') + create_val.strip('\n') + & + & no_create_val.strip('\n') + present_val.strip('\n') + & + & deviceptr_val.strip('\n') + attach_val.strip('\n') + #:set acc_directive = '!$acc parallel loop ' + & + & clause_val + extraAccArgs_val.strip('\n') + $:acc_directive +#:enddef + +#:def ROUTINE(parallelism=['seq'], nohost=False, extraAccArgs=None) + #:set parallelism_val = GEN_PARALLELISM_STR(parallelism) + + #:assert isinstance(nohost, bool) + #:if nohost == True + #:set nohost_val = 'nohost' + #:else + #:set nohost_val = '' + #:endif + + #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) + + #:set clause_val = parallelism_val.strip('\n') + nohost_val.strip('\n') + #:set acc_directive = '!$acc routine ' + & + & clause_val + extraAccArgs_val.strip('\n') + $:acc_directive +#:enddef + +#:def DECLARE(copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, present=None, deviceptr=None, link=None, extraAccArgs=None) + + #:set copy_val = GEN_COPY_STR(copy) + + #:set copyin_val = GEN_COPYIN_STR(copyin, False).strip('\n') + GEN_COPYIN_STR(copyinReadOnly, True).strip('\n') + + #:set copyout_val = GEN_COPYOUT_STR(copyout) + + #:set create_val = GEN_CREATE_STR(create) + + #:set present_val = GEN_PRESENT_STR(present) + + #:set deviceptr_val = GEN_DEVICEPTR_STR(deviceptr) + + #:set link_val = GEN_LINK_STR(link) + + #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) + + #:set clause_val = copy_val.strip('\n') + copyin_val.strip('\n') + & + & copyout_val.strip('\n') + create_val.strip('\n') + & + & present_val.strip('\n') + deviceptr_val.strip('\n') + & + & link_val.strip('\n') + #:set acc_directive = '!$acc declare ' + clause_val + extraAccArgs_val.strip('\n') + $:acc_directive +#:enddef + +#:def LOOP(collapse=None, parallelism=["seq"], data_dependency=None, reduction=None, reductionOp=None, private=None, extraAccArgs=None) + + #:set collapse_val = GEN_COLLAPSE_STR(collapse) + + #:set parallelism_val = GEN_PARALLELISM_STR(parallelism) + + #:if data_dependency is not None + #:assert isinstance(data_dependency, str) + #:assert (data_dependency == 'auto' or data_dependency == 'independent') + #:set data_dependency_val = data_dependency + #:else + #:set data_dependency_val = '' + #:endif + + #:set private_val = GEN_PRIVATE_STR(private, False) + + #:set reduction_val = GEN_REDUCTION_STR(reduction, reductionOp) + + #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) + + #:set clause_val = collapse_val.strip('\n') + parallelism_val.strip('\n') + & + & data_dependency_val.strip('\n') + private_val.strip('\n') + & + & reduction_val.strip('\n') + #:set acc_directive = '!$acc loop ' + & + & clause_val + extraAccArgs_val.strip('\n') + $:acc_directive +#:enddef + +#:def DATA(copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, no_create=None, present=None, deviceptr=None, attach=None, default=None, extraAccArgs=None) + + #:set copy_val = GEN_COPY_STR(copy) + + #:set copyin_val = GEN_COPYIN_STR(copyin, False).strip('\n') + GEN_COPYIN_STR(copyinReadOnly, True).strip('\n') + + #:set copyout_val = GEN_COPYOUT_STR(copyout) + + #:set create_val = GEN_CREATE_STR(create) + + #:set no_create_val = GEN_NOCREATE_STR(no_create) + + #:set present_val = GEN_PRESENT_STR(present) + + #:set deviceptr_val = GEN_DEVICEPTR_STR(deviceptr) + + #:set attach_val = GEN_ATTACH_STR(attach) + + #:set default_val = GEN_DEFAULT_STR(default) + + #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) + + #:set clause_val = copy_val.strip('\n') + copyin_val.strip('\n') + & + & copyout_val.strip('\n') + create_val.strip('\n') + & + & no_create_val.strip('\n') + present_val.strip('\n') + & + & deviceptr_val.strip('\n') + attach_val.strip('\n') + & + & default_val.strip('\n') + #:set acc_directive = '!$acc data ' + clause_val + extraAccArgs_val.strip('\n') + $:acc_directive +#:enddef + +#:def ENTER_DATA(copyin=None, copyinReadOnly=None, create=None, attach=None, extraAccArgs=None) + #:set copyin_val = GEN_COPYIN_STR(copyin, False).strip('\n') + GEN_COPYIN_STR(copyinReadOnly, True).strip('\n') + + #:set create_val = GEN_CREATE_STR(create) + + #:set attach_val = GEN_ATTACH_STR(attach) + + #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) + + #:set clause_val = copyin_val.strip('\n') + create_val.strip('\n') + attach_val.strip('\n') + #:set acc_directive = '!$acc enter data ' + clause_val + extraAccArgs_val.strip('\n') + $:acc_directive +#:enddef + +#:def EXIT_DATA(copyout=None, delete=None, detach=None, extraAccArgs=None) + #:set copyout_val = GEN_COPYOUT_STR(copyout) + + #:set delete_val = GEN_DELETE_STR(delete) + + #:set detach_val = GEN_DETACH_STR(detach) + + #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) + + #:set clause_val = copyout_val.strip('\n') + delete_val.strip('\n') + detach_val.strip('\n') + #:set acc_directive = '!$acc exit data ' + clause_val + extraAccArgs_val.strip('\n') + $:acc_directive +#:enddef + +#:def CACHE(cache, extraAccArgs=None) + #:set cache_val = GEN_PARENTHESES_CLAUSE('cache', cache) + + #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) + + #:set clause_val = cache_val.strip('\n') + #:set acc_directive = '!$acc ' + clause_val + extraAccArgs_val.strip('\n') + $:acc_directive +#:enddef + +#:def ATOMIC(atomic='update', extraAccArgs=None) + #:assert isinstance(atomic, str) + #:assert (atomic == 'read' or atomic == 'write' or atomic == 'update' or atomic == 'capture') + + #:set atomic_val = atomic + + #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) + + #:set clause_val = atomic_val.strip('\n') + #:set acc_directive = '!$acc atomic ' + clause_val + extraAccArgs_val.strip('\n') + $:acc_directive +#:enddef + +#:def UPDATE(host=None, device=None, extraAccArgs=None) + #:set host_val = GEN_HOST_STR(host) + + #:set device_val = GEN_DEVICE_STR(device) + + #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) + + #:set clause_val = host_val.strip('\n') + device_val.strip('\n') + #:set acc_directive = '!$acc update ' + clause_val + extraAccArgs_val.strip('\n') + $:acc_directive +#:enddef + +#:def WAIT(host=None, device=None, extraAccArgs=None) + #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) + + #:set clause_val = host_val.strip('\n') + device_val.strip('\n') + #:set acc_directive = '!$acc update ' + clause_val + extraAccArgs_val.strip('\n') + $:acc_directive +#:enddef +! New line at end of file is required for FYPP \ No newline at end of file diff --git a/src/common/include/macros.fpp b/src/common/include/macros.fpp index d79960d87f..679c276bf4 100644 --- a/src/common/include/macros.fpp +++ b/src/common/include/macros.fpp @@ -102,394 +102,4 @@ //${message or '"No error description."'}$) end if #:enddef - -#:def ASSERT_LIST(data, datatype) - #:assert data is not None - #:assert isinstance(data, list) - #:assert len(data) != 0 - #:assert all(isinstance(element, datatype) for element in data) -#:enddef - -#:def GEN_PARENTHESES_CLAUSE(clause_name, clause_list) - #:assert isinstance(clause_name, str) - #:if clause_list is not None - $:ASSERT_LIST(clause_list, str) - #:set clause_str = clause_name + '(' + ', '.join(clause_list) + ') ' - #:else - #:set clause_str = '' - #:endif - $:clause_str -#:enddef - -#:def GEN_PRIVATE_STR(private, initalized_values) - #:assert isinstance(initalized_values, bool) - #:if initalized_values == True - #:set private_val = GEN_PARENTHESES_CLAUSE('firstprivate', private) - #:else - #:set private_val = GEN_PARENTHESES_CLAUSE('private', private) - #:endif - $:private_val -#:enddef - -#:def GEN_COPY_STR(copy) - #:set copy_val = GEN_PARENTHESES_CLAUSE('copy', copy) - $:copy_val -#:enddef - -#:def GEN_COPYIN_STR(copyin, readonly) - #:assert isinstance(readonly, bool) - #:if copyin is not None - $:ASSERT_LIST(copyin, str) - #:if readonly == True - #:set copyin_val = 'copyin(readonly:' + ', '.join(copyin) + ') ' - #:else - #:set copyin_val = 'copyin(' + ', '.join(copyin) + ') ' - #:endif - #:else - #:set copyin_val = '' - #:endif - $:copyin_val -#:enddef - -#:def GEN_COPYOUT_STR(copyout) - #:set copyout_val = GEN_PARENTHESES_CLAUSE('copyout', copyout) - $:copyout_val -#:enddef - -#:def GEN_CREATE_STR(create) - #:set create_val = GEN_PARENTHESES_CLAUSE('create', create) - $:create_val -#:enddef - -#:def GEN_NOCREATE_STR(no_create) - #:set nocreate_val = GEN_PARENTHESES_CLAUSE('no_create', no_create) - $:nocreate_val -#:enddef - -#:def GEN_DELETE_STR(delete) - #:set delete_val = GEN_PARENTHESES_CLAUSE('delete', delete) - $:delete_val -#:enddef - -#:def GEN_PRESENT_STR(present) - #:set present_val = GEN_PARENTHESES_CLAUSE('present', present) - $:present_val -#:enddef - -#:def GEN_DEVICEPTR_STR(deviceptr) - #:set deviceptr_val = GEN_PARENTHESES_CLAUSE('deviceptr', deviceptr) - $:deviceptr_val -#:enddef - -#:def GEN_ATTACH_STR(attach) - #:set attach_val = GEN_PARENTHESES_CLAUSE('attach', attach) - $:attach_val -#:enddef - -#:def GEN_DETACH_STR(detach) - #:set detach_val = GEN_PARENTHESES_CLAUSE('detach', detach) - $:detach_val -#:enddef - -#:def GEN_LINK_STR(link) - #:set link_val = GEN_PARENTHESES_CLAUSE('link', link) - $:link_val -#:enddef - -#:def GEN_EXTRA_ARGS_STR(extraArgs) - #:if extraArgs is not None - #:assert isinstance(extraArgs, str) - #:set extraArgs_val = extraArgs - #:else - #:set extraArgs_val = '' - #:endif - $:extraArgs_val -#:enddef - -#:def GEN_PARALLELISM_STR(parallelism) - #:if parallelism is not None - $:ASSERT_LIST(parallelism, str) - #:assert all((element == 'gang' or element == 'worker' or & - & element == 'vector' or element == 'seq') for element in parallelism) - #:set parallelism_val = ' '.join(parallelism) + ' ' - #:else - #:set parallelism_val = '' - #:endif - $:parallelism_val -#:enddef - -#:def GEN_COLLAPSE_STR(collapse) - #:if collapse is not None - #:assert isinstance(collapse, int) - #:assert collapse > 1 - #:set collapse_val = 'collapse(' + str(collapse) + ') ' - #:else - #:set collapse_val = '' - #:endif - $:collapse_val -#:enddef - -#:def GEN_DEFAULT_STR(default) - #:if default is not None - #:assert isinstance(default, str) - #:assert (default == 'present' or default == 'none') - #:set default_val = 'default(' + default + ') ' - #:else - #:set default_val = '' - #:endif - $:default_val -#:enddef - -#:def GEN_REDUCTION_STR(reduction, reductionOp) - #:if reduction is not None and reductionOp is not None - #:if isinstance(reduction, list) and isinstance(reductionOp, list) - $:ASSERT_LIST(reduction, list) - $:ASSERT_LIST(reductionOp, str) - #:assert all(len(element) != 0 for element in reduction) - #:assert all(isinstance(element, str) for sublist in reduction for element in sublist) - #:assert len(reduction) == len(reductionOp) - #:set reduction_list = ['reduction(' + op + ':' + ', '.join(red) + ') ' for (red, op) in zip(reduction, reductionOp)] - #:set reduction_val = ' '. join(reduction_list) + ' ' - #:elif isinstance(reduction, list) and isinstance(reductionOp, str) - $:ASSERT_LIST(reduction, str) - #:assert isinstance(reductionOp, str) - #:set reduction_val = 'reduction(' + reductionOp + ':' + ', '.join(reduction) + ') ' - #:else - #:stop 'Invalid datatypes for reduction or reductionOp. Must be list of lists and lists or list and str respectively' - #:endif - #:elif reduction is not None or reductionOp is not None - #:stop 'Cannot set the reduction list or reduction operation without setting the other' - #:else - #:set reduction_val = '' - #:endif - $:reduction_val -#:enddef - -#:def GEN_HOST_STR(host) - #:set host_val = GEN_PARENTHESES_CLAUSE('host', host) - $:host_val -#:enddef - -#:def GEN_DEVICE_STR(device) - #:set device_val = GEN_PARENTHESES_CLAUSE('device', device) - $:device_val -#:enddef - - -#:def PARALLEL_LOOP(collapse=None, private=None, parallelism=['gang', 'vector'], & - & default='present', firstprivate=None, reduction=None, reductionOp=None, & - & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, & - & no_create=None, present=None, deviceptr=None, attach=None, extraAccArgs=None) - - #:set collapse_val = GEN_COLLAPSE_STR(collapse) - - #:set parallelism_val = GEN_PARALLELISM_STR(parallelism) - - #:set default_val = GEN_DEFAULT_STR(default) - - #:set private_val = GEN_PRIVATE_STR(private, False).strip('\n') + GEN_PRIVATE_STR(firstprivate, True).strip('\n') - - #:set reduction_val = GEN_REDUCTION_STR(reduction, reductionOp) - - #:set copy_val = GEN_COPY_STR(copy) - - #:set copyin_val = GEN_COPYIN_STR(copyin, False).strip('\n') + GEN_COPYIN_STR(copyinReadOnly, True).strip('\n') - - #:set copyout_val = GEN_COPYOUT_STR(copyout) - - #:set create_val = GEN_CREATE_STR(create) - - #:set no_create_val = GEN_NOCREATE_STR(no_create) - - #:set present_val = GEN_PRESENT_STR(present) - - #:set deviceptr_val = GEN_DEVICEPTR_STR(deviceptr) - - #:set attach_val = GEN_ATTACH_STR(attach) - - #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) - - #:set clause_val = collapse_val.strip('\n') + parallelism_val.strip('\n') + & - & default_val.strip('\n') + private_val.strip('\n') + reduction_val.strip('\n') + & - & copy_val.strip('\n') + copyin_val.strip('\n') + & - & copyout_val.strip('\n') + create_val.strip('\n') + & - & no_create_val.strip('\n') + present_val.strip('\n') + & - & deviceptr_val.strip('\n') + attach_val.strip('\n') - #:set acc_directive = '!$acc parallel loop ' + & - & clause_val + extraAccArgs_val.strip('\n') - $:acc_directive -#:enddef - -#:def ROUTINE(parallelism=['seq'], nohost=False, extraAccArgs=None) - #:set parallelism_val = GEN_PARALLELISM_STR(parallelism) - - #:assert isinstance(nohost, bool) - #:if nohost == True - #:set nohost_val = 'nohost' - #:else - #:set nohost_val = '' - #:endif - - #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) - - #:set clause_val = parallelism_val.strip('\n') + nohost_val.strip('\n') - #:set acc_directive = '!$acc routine ' + & - & clause_val + extraAccArgs_val.strip('\n') - $:acc_directive -#:enddef - -#:def DECLARE(copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, present=None, deviceptr=None, link=None, extraAccArgs=None) - - #:set copy_val = GEN_COPY_STR(copy) - - #:set copyin_val = GEN_COPYIN_STR(copyin, False).strip('\n') + GEN_COPYIN_STR(copyinReadOnly, True).strip('\n') - - #:set copyout_val = GEN_COPYOUT_STR(copyout) - - #:set create_val = GEN_CREATE_STR(create) - - #:set present_val = GEN_PRESENT_STR(present) - - #:set deviceptr_val = GEN_DEVICEPTR_STR(deviceptr) - - #:set link_val = GEN_LINK_STR(link) - - #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) - - #:set clause_val = copy_val.strip('\n') + copyin_val.strip('\n') + & - & copyout_val.strip('\n') + create_val.strip('\n') + & - & present_val.strip('\n') + deviceptr_val.strip('\n') + & - & link_val.strip('\n') - #:set acc_directive = '!$acc declare ' + clause_val + extraAccArgs_val.strip('\n') - $:acc_directive -#:enddef - -#:def LOOP(collapse=None, parallelism=["seq"], data_dependency=None, reduction=None, reductionOp=None, private=None, extraAccArgs=None) - - #:set collapse_val = GEN_COLLAPSE_STR(collapse) - - #:set parallelism_val = GEN_PARALLELISM_STR(parallelism) - - #:if data_dependency is not None - #:assert isinstance(data_dependency, str) - #:assert (data_dependency == 'auto' or data_dependency == 'independent') - #:set data_dependency_val = data_dependency - #:else - #:set data_dependency_val = '' - #:endif - - #:set private_val = GEN_PRIVATE_STR(private, False) - - #:set reduction_val = GEN_REDUCTION_STR(reduction, reductionOp) - - #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) - - #:set clause_val = collapse_val.strip('\n') + parallelism_val.strip('\n') + & - & data_dependency_val.strip('\n') + private_val.strip('\n') + & - & reduction_val.strip('\n') - #:set acc_directive = '!$acc loop ' + & - & clause_val + extraAccArgs_val.strip('\n') - $:acc_directive -#:enddef - -#:def DATA(copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, no_create=None, present=None, deviceptr=None, attach=None, default=None, extraAccArgs=None) - - #:set copy_val = GEN_COPY_STR(copy) - - #:set copyin_val = GEN_COPYIN_STR(copyin, False).strip('\n') + GEN_COPYIN_STR(copyinReadOnly, True).strip('\n') - - #:set copyout_val = GEN_COPYOUT_STR(copyout) - - #:set create_val = GEN_CREATE_STR(create) - - #:set no_create_val = GEN_NOCREATE_STR(no_create) - - #:set present_val = GEN_PRESENT_STR(present) - - #:set deviceptr_val = GEN_DEVICEPTR_STR(deviceptr) - - #:set attach_val = GEN_ATTACH_STR(attach) - - #:set default_val = GEN_DEFAULT_STR(default) - - #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) - - #:set clause_val = copy_val.strip('\n') + copyin_val.strip('\n') + & - & copyout_val.strip('\n') + create_val.strip('\n') + & - & no_create_val.strip('\n') + present_val.strip('\n') + & - & deviceptr_val.strip('\n') + attach_val.strip('\n') + & - & default_val.strip('\n') - #:set acc_directive = '!$acc data ' + clause_val + extraAccArgs_val.strip('\n') - $:acc_directive -#:enddef - -#:def ENTER_DATA(copyin=None, copyinReadOnly=None, create=None, attach=None, extraAccArgs=None) - #:set copyin_val = GEN_COPYIN_STR(copyin, False).strip('\n') + GEN_COPYIN_STR(copyinReadOnly, True).strip('\n') - - #:set create_val = GEN_CREATE_STR(create) - - #:set attach_val = GEN_ATTACH_STR(attach) - - #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) - - #:set clause_val = copyin_val.strip('\n') + create_val.strip('\n') + attach_val.strip('\n') - #:set acc_directive = '!$acc enter data ' + clause_val + extraAccArgs_val.strip('\n') - $:acc_directive -#:enddef - -#:def EXIT_DATA(copyout=None, delete=None, detach=None, extraAccArgs=None) - #:set copyout_val = GEN_COPYOUT_STR(copyout) - - #:set delete_val = GEN_DELETE_STR(delete) - - #:set detach_val = GEN_DETACH_STR(detach) - - #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) - - #:set clause_val = copyout_val.strip('\n') + delete_val.strip('\n') + detach_val.strip('\n') - #:set acc_directive = '!$acc exit data ' + clause_val + extraAccArgs_val.strip('\n') - $:acc_directive -#:enddef - -#:def CACHE(cache, extraAccArgs=None) - #:set cache_val = GEN_PARENTHESES_CLAUSE('cache', cache) - - #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) - - #:set clause_val = cache_val.strip('\n') - #:set acc_directive = '!$acc ' + clause_val + extraAccArgs_val.strip('\n') - $:acc_directive -#:enddef - -#:def ATOMIC(atomic='update', extraAccArgs=None) - #:assert isinstance(atomic, str) - #:assert (atomic == 'read' or atomic == 'write' or atomic == 'update' or atomic == 'capture') - - #:set atomic_val = atomic - - #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) - - #:set clause_val = atomic_val.strip('\n') - #:set acc_directive = '!$acc atomic ' + clause_val + extraAccArgs_val.strip('\n') - $:acc_directive -#:enddef - -#:def UPDATE(host=None, device=None, extraAccArgs=None) - #:set host_val = GEN_HOST_STR(host) - - #:set device_val = GEN_DEVICE_STR(device) - - #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) - - #:set clause_val = host_val.strip('\n') + device_val.strip('\n') - #:set acc_directive = '!$acc update ' + clause_val + extraAccArgs_val.strip('\n') - $:acc_directive -#:enddef - -#:def WAIT(host=None, device=None, extraAccArgs=None) - #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) - - #:set clause_val = host_val.strip('\n') + device_val.strip('\n') - #:set acc_directive = '!$acc update ' + clause_val + extraAccArgs_val.strip('\n') - $:acc_directive -#:enddef +! New line at end of file is required for FYPP \ No newline at end of file diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index 3b8732c593..eed3cc5086 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -6,6 +6,7 @@ !! boundary condiitons #:include 'macros.fpp' +#:include 'directive_macros.fpp' module m_boundary_common diff --git a/src/common/m_chemistry.fpp b/src/common/m_chemistry.fpp index 416ecc63cc..e7c42b36f6 100644 --- a/src/common/m_chemistry.fpp +++ b/src/common/m_chemistry.fpp @@ -4,6 +4,7 @@ !! @author Henry Le Berre #:include 'macros.fpp' +#:include 'directive_macros.fpp' #:include 'case.fpp' module m_chemistry diff --git a/src/common/m_finite_differences.fpp b/src/common/m_finite_differences.fpp index ab3f331aad..18a0a84484 100644 --- a/src/common/m_finite_differences.fpp +++ b/src/common/m_finite_differences.fpp @@ -1,4 +1,5 @@ #:include 'macros.fpp' +#:include 'directive_macros.fpp' module m_finite_differences diff --git a/src/common/m_phase_change.fpp b/src/common/m_phase_change.fpp index 48a2b748a0..ba911005bf 100644 --- a/src/common/m_phase_change.fpp +++ b/src/common/m_phase_change.fpp @@ -2,6 +2,7 @@ !> procedure. #:include 'macros.fpp' +#:include 'directive_macros.fpp' module m_phase_change diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp index c6c662ce81..024934df1e 100644 --- a/src/common/m_variables_conversion.fpp +++ b/src/common/m_variables_conversion.fpp @@ -3,6 +3,7 @@ !! @brief Contains module m_variables_conversion #:include 'macros.fpp' +#:include 'directive_macros.fpp' #:include 'case.fpp' !> @brief This module consists of subroutines used in the conversion of the diff --git a/src/simulation/m_acoustic_src.fpp b/src/simulation/m_acoustic_src.fpp index a1bc61efe8..0fa8f33566 100644 --- a/src/simulation/m_acoustic_src.fpp +++ b/src/simulation/m_acoustic_src.fpp @@ -3,6 +3,7 @@ !! @brief Contains module m_acoustic_src #:include 'macros.fpp' +#:include 'directive_macros.fpp' !> @brief The module contains the subroutines used to create a acoustic source pressure source term module m_acoustic_src diff --git a/src/simulation/m_body_forces.fpp b/src/simulation/m_body_forces.fpp index b0daf8dd50..4080a7ddf1 100644 --- a/src/simulation/m_body_forces.fpp +++ b/src/simulation/m_body_forces.fpp @@ -1,4 +1,5 @@ #:include 'macros.fpp' +#:include 'directive_macros.fpp' module m_body_forces diff --git a/src/simulation/m_bubbles_EE.fpp b/src/simulation/m_bubbles_EE.fpp index ca318f8cee..dfe0f3da49 100644 --- a/src/simulation/m_bubbles_EE.fpp +++ b/src/simulation/m_bubbles_EE.fpp @@ -3,6 +3,7 @@ !! @brief Contains module m_bubbles_EE #:include 'macros.fpp' +#:include 'directive_macros.fpp' !> @brief This module is used to compute the ensemble-averaged bubble dynamic variables module m_bubbles_EE diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index 5b6c818231..b745739058 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -3,6 +3,7 @@ !! @brief Contains module m_bubbles_EL #:include 'macros.fpp' +#:include 'directive_macros.fpp' !> @brief This module is used to to compute the volume-averaged bubble model module m_bubbles_EL diff --git a/src/simulation/m_cbc.fpp b/src/simulation/m_cbc.fpp index ebeef9485d..73458d56f8 100644 --- a/src/simulation/m_cbc.fpp +++ b/src/simulation/m_cbc.fpp @@ -19,6 +19,7 @@ !! Please refer to Thompson (1987, 1990) for detailed descriptions. #:include 'macros.fpp' +#:include 'directive_macros.fpp' module m_cbc diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp index 3d87014ea1..ca0d7dbb8d 100644 --- a/src/simulation/m_data_output.fpp +++ b/src/simulation/m_data_output.fpp @@ -3,6 +3,7 @@ !! @brief Contains module m_data_output #:include 'macros.fpp' +#:include 'directive_macros.fpp' !> @brief The primary purpose of this module is to output the grid and the !! conservative variables data at the chosen time-step interval. In diff --git a/src/simulation/m_fftw.fpp b/src/simulation/m_fftw.fpp index 0a32961dce..10e063496c 100644 --- a/src/simulation/m_fftw.fpp +++ b/src/simulation/m_fftw.fpp @@ -3,6 +3,7 @@ !! @brief Contains module m_fftw #:include 'macros.fpp' +#:include 'directive_macros.fpp' !> @brief The module contains the subroutines for the FFT routines module m_fftw diff --git a/src/simulation/m_hyperelastic.fpp b/src/simulation/m_hyperelastic.fpp index e1a5d34a27..cd0552d438 100644 --- a/src/simulation/m_hyperelastic.fpp +++ b/src/simulation/m_hyperelastic.fpp @@ -3,6 +3,7 @@ !! @brief Contains module m_hyperelastic #:include 'macros.fpp' +#:include 'directive_macros.fpp' !> @brief This module consists of subroutines used in the calculation !! of the cauchy tensor diff --git a/src/simulation/m_hypoelastic.fpp b/src/simulation/m_hypoelastic.fpp index b298833cf5..68032cb5b8 100644 --- a/src/simulation/m_hypoelastic.fpp +++ b/src/simulation/m_hypoelastic.fpp @@ -3,6 +3,7 @@ !! @brief Contains module m_hypoelastic #:include 'macros.fpp' +#:include 'directive_macros.fpp' !> @brief This module is used to compute source terms for hypoelastic model module m_hypoelastic diff --git a/src/simulation/m_mhd.fpp b/src/simulation/m_mhd.fpp index 9b87a5a3be..8e6ea6c820 100644 --- a/src/simulation/m_mhd.fpp +++ b/src/simulation/m_mhd.fpp @@ -3,6 +3,7 @@ !! @brief Contains module m_mhd #:include 'macros.fpp' +#:include 'directive_macros.fpp' !> @brief This module is used to compute source terms for magnetohydrodynamics !! Note: not applicable for 1D diff --git a/src/simulation/m_qbmm.fpp b/src/simulation/m_qbmm.fpp index 924a0ed758..edaf2ee367 100644 --- a/src/simulation/m_qbmm.fpp +++ b/src/simulation/m_qbmm.fpp @@ -3,6 +3,7 @@ !! @brief Contains module m_qbmm #:include 'case.fpp' +#:include 'directive_macros.fpp' #:include 'macros.fpp' !> @brief This module is used to compute moment inversion via qbmm diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index 87c362aaea..730ccba98b 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -4,6 +4,7 @@ #:include 'case.fpp' #:include 'macros.fpp' +#:include 'directive_macros.fpp' !> @brief The module contains the subroutines used to calculate the right- !! hane-side (RHS) in the quasi-conservative, shock- and interface- diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp index 594e0009e3..d82ce6b154 100644 --- a/src/simulation/m_riemann_solvers.fpp +++ b/src/simulation/m_riemann_solvers.fpp @@ -21,6 +21,7 @@ #:include 'case.fpp' #:include 'macros.fpp' +#:include 'directive_macros.fpp' #:include 'inline_riemann.fpp' module m_riemann_solvers diff --git a/src/simulation/m_surface_tension.fpp b/src/simulation/m_surface_tension.fpp index 8b4d3d923f..a3be874f86 100644 --- a/src/simulation/m_surface_tension.fpp +++ b/src/simulation/m_surface_tension.fpp @@ -1,4 +1,5 @@ #:include 'macros.fpp' +#:include 'directive_macros.fpp' #:include 'inline_capillary.fpp' !> @brief This module is used to compute source terms for surface tension model diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 7ee08b1223..7c1c986ac9 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -3,6 +3,7 @@ !! @brief Contains module m_time_steppers #:include 'macros.fpp' +#:include 'directive_macros.fpp' !> @brief The following module features a variety of time-stepping schemes. !! Currently, it includes the following Runge-Kutta (RK) algorithms: diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp index 244230c01f..cd48b01e50 100644 --- a/src/simulation/m_viscous.fpp +++ b/src/simulation/m_viscous.fpp @@ -2,6 +2,7 @@ !! @file m_viscous.f90 !! @brief Contains module m_viscous #:include 'macros.fpp' +#:include 'directive_macros.fpp' !> @brief The module contains the subroutines used to compute viscous terms. module m_viscous diff --git a/src/simulation/m_weno.fpp b/src/simulation/m_weno.fpp index ad9551e096..1915254cc3 100644 --- a/src/simulation/m_weno.fpp +++ b/src/simulation/m_weno.fpp @@ -2,6 +2,7 @@ !! @file m_weno.f90 !! @brief Contains module m_weno #:include 'macros.fpp' +#:include 'directive_macros.fpp' !> @brief Weighted essentially non-oscillatory (WENO) reconstruction scheme !! that is supplemented with monotonicity preserving bounds (MPWENO) From 25a57168b666e59654d7594746e8c127f7bcc875 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Tue, 10 Jun 2025 20:15:51 -0400 Subject: [PATCH 15/75] Fixed spelling issue --- src/common/include/directive_macros.fpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/common/include/directive_macros.fpp b/src/common/include/directive_macros.fpp index 2760fd1e9e..4e6e62f746 100644 --- a/src/common/include/directive_macros.fpp +++ b/src/common/include/directive_macros.fpp @@ -16,9 +16,9 @@ $:clause_str #:enddef -#:def GEN_PRIVATE_STR(private, initalized_values) - #:assert isinstance(initalized_values, bool) - #:if initalized_values == True +#:def GEN_PRIVATE_STR(private, initialized_values) + #:assert isinstance(initialized_values, bool) + #:if initialized_values == True #:set private_val = GEN_PARENTHESES_CLAUSE('firstprivate', private) #:else #:set private_val = GEN_PARENTHESES_CLAUSE('private', private) From e8d78e8bf408163c419148c8a9e605d8a36e8719 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Tue, 10 Jun 2025 22:16:23 -0400 Subject: [PATCH 16/75] Replaced Openacc routine directives --- src/common/m_boundary_common.fpp | 22 ++++++------ src/common/m_helper.fpp | 5 +-- ...{m_helper_basic.f90 => m_helper_basic.fpp} | 7 ++-- src/common/m_phase_change.fpp | 12 +++---- src/common/m_variables_conversion.fpp | 10 +++--- src/pre_process/m_assign_variables.fpp | 5 +-- src/pre_process/m_patches.fpp | 9 ++--- src/simulation/m_acoustic_src.fpp | 6 ++-- src/simulation/m_bubbles.fpp | 35 ++++++++++--------- src/simulation/m_bubbles_EL.fpp | 4 +-- src/simulation/m_bubbles_EL_kernels.fpp | 13 +++---- src/simulation/m_compute_cbc.fpp | 17 ++++----- src/simulation/m_hyperelastic.fpp | 4 +-- src/simulation/m_ibm.fpp | 3 +- src/simulation/m_qbmm.fpp | 12 +++---- src/simulation/m_riemann_solvers.fpp | 4 +-- .../{m_sim_helpers.f90 => m_sim_helpers.fpp} | 7 ++-- 17 files changed, 92 insertions(+), 83 deletions(-) rename src/common/{m_helper_basic.f90 => m_helper_basic.fpp} (97%) rename src/simulation/{m_sim_helpers.f90 => m_sim_helpers.fpp} (99%) diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index eed3cc5086..a3460a3950 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -242,7 +242,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_ghost_cell_extrapolation #else - !$acc routine seq + $:ROUTINE() #endif type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv @@ -311,7 +311,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_symmetry #else - !$acc routine seq + $:ROUTINE() #endif type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv @@ -575,7 +575,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_periodic #else - !$acc routine seq + $:ROUTINE() #endif type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv @@ -718,7 +718,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_axis #else - !$acc routine seq + $:ROUTINE() #endif type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv @@ -782,7 +782,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_slip_wall #else - !$acc routine seq + $:ROUTINE() #endif type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv @@ -881,7 +881,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_no_slip_wall #else - !$acc routine seq + $:ROUTINE() #endif type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv @@ -1016,7 +1016,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_dirichlet #else - !$acc routine seq + $:ROUTINE() #endif type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv @@ -1085,7 +1085,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_qbmm_extrapolation #else - !$acc routine seq + $:ROUTINE() #endif real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv integer, intent(in) :: bc_dir, bc_loc @@ -1284,7 +1284,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_color_function_periodic #else - !$acc routine seq + $:ROUTINE() #endif type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs integer, intent(in) :: bc_dir, bc_loc @@ -1342,7 +1342,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_color_function_reflective #else - !$acc routine seq + $:ROUTINE() #endif type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs integer, intent(in) :: bc_dir, bc_loc @@ -1424,7 +1424,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_color_function_ghost_cell_extrapolation #else - !$acc routine seq + $:ROUTINE() #endif type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs integer, intent(in) :: bc_dir, bc_loc diff --git a/src/common/m_helper.fpp b/src/common/m_helper.fpp index 4be5974c2e..a73afddda0 100644 --- a/src/common/m_helper.fpp +++ b/src/common/m_helper.fpp @@ -1,4 +1,5 @@ #:include 'macros.fpp' +#:include 'directive_macros.fpp' !> !! @file m_helper.f90 !! @brief Contains module m_helper @@ -43,7 +44,7 @@ contains !! @param Rtmp is the bubble radii !! @param ntmp is the output number bubble density pure subroutine s_comp_n_from_prim(vftmp, Rtmp, ntmp, weights) - !$acc routine seq + $:ROUTINE() real(wp), intent(in) :: vftmp real(wp), dimension(nb), intent(in) :: Rtmp real(wp), intent(out) :: ntmp @@ -57,7 +58,7 @@ contains end subroutine s_comp_n_from_prim pure subroutine s_comp_n_from_cons(vftmp, nRtmp, ntmp, weights) - !$acc routine seq + $:ROUTINE() real(wp), intent(in) :: vftmp real(wp), dimension(nb), intent(in) :: nRtmp real(wp), intent(out) :: ntmp diff --git a/src/common/m_helper_basic.f90 b/src/common/m_helper_basic.fpp similarity index 97% rename from src/common/m_helper_basic.f90 rename to src/common/m_helper_basic.fpp index 4205279cd4..d0ace9e551 100644 --- a/src/common/m_helper_basic.f90 +++ b/src/common/m_helper_basic.fpp @@ -1,3 +1,4 @@ +#:include 'directive_macros.fpp' !> !! @file m_helper_basic.f90 !! @brief Contains module m_helper_basic @@ -23,7 +24,7 @@ module m_helper_basic !! @param tol_input Relative error (default = 1e-6_wp). !! @return Result of the comparison. logical pure elemental function f_approx_equal(a, b, tol_input) result(res) - !$acc routine seq + $:ROUTINE() real(wp), intent(in) :: a, b real(wp), optional, intent(in) :: tol_input real(wp) :: tol @@ -46,7 +47,7 @@ end function f_approx_equal !> Checks if a real(wp) variable is of default value. !! @param var Variable to check. logical pure elemental function f_is_default(var) result(res) - !$acc routine seq + $:ROUTINE() real(wp), intent(in) :: var res = f_approx_equal(var, dflt_real) @@ -71,7 +72,7 @@ end function f_all_default !> Checks if a real(wp) variable is an integer. !! @param var Variable to check. logical pure elemental function f_is_integer(var) result(res) - !$acc routine seq + $:ROUTINE() real(wp), intent(in) :: var res = f_approx_equal(var, real(nint(var), wp)) diff --git a/src/common/m_phase_change.fpp b/src/common/m_phase_change.fpp index ba911005bf..fd03e5f09a 100644 --- a/src/common/m_phase_change.fpp +++ b/src/common/m_phase_change.fpp @@ -290,7 +290,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_infinite_pt_relaxation_k #else - !$acc routine seq + $:ROUTINE() #endif ! initializing variables @@ -395,7 +395,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_infinite_ptg_relaxation_k #else - !$acc routine seq + $:ROUTINE() #endif integer, intent(in) :: j, k, l @@ -520,7 +520,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_correct_partial_densities #else - !$acc routine seq + $:ROUTINE() #endif !> @name variables for the correction of the reacting partial densities @@ -583,7 +583,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_jacobian_matrix #else - !$acc routine seq + $:ROUTINE() #endif real(wp), dimension(2, 2), intent(out) :: InvJac @@ -690,7 +690,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_pTg_residue #else - !$acc routine seq + $:ROUTINE() #endif integer, intent(in) :: j, k, l @@ -741,7 +741,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_TSat #else - !$acc routine seq + $:ROUTINE() #endif real(wp), intent(in) :: pSat diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp index 024934df1e..7523afd13d 100644 --- a/src/common/m_variables_conversion.fpp +++ b/src/common/m_variables_conversion.fpp @@ -121,7 +121,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_pressure #else - !$acc routine seq + $:ROUTINE() #endif real(wp), intent(in) :: energy, alf @@ -469,7 +469,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_convert_species_to_mixture_variables_acc #else - !$acc routine seq + $:ROUTINE() #endif real(wp), intent(out) :: rho_K, gamma_K, pi_inf_K, qv_K @@ -552,7 +552,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_convert_species_to_mixture_variables_bubbles_acc #else - !$acc routine seq + $:ROUTINE() #endif real(wp), intent(inout) :: rho_K, gamma_K, pi_inf_K, qv_K @@ -1618,7 +1618,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_speed_of_sound #else - !$acc routine seq + $:ROUTINE() #endif real(wp), intent(in) :: pres @@ -1688,7 +1688,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_fast_magnetosonic_speed #else - !$acc routine seq + $:ROUTINE() #endif real(wp), intent(in) :: B(3), rho, c diff --git a/src/pre_process/m_assign_variables.fpp b/src/pre_process/m_assign_variables.fpp index 11eb62f9e6..1adfd6ab1f 100644 --- a/src/pre_process/m_assign_variables.fpp +++ b/src/pre_process/m_assign_variables.fpp @@ -3,6 +3,7 @@ !! @brief Contains module m_assign_variables #:include 'case.fpp' +#:include 'directive_macros.fpp' module m_assign_variables @@ -103,7 +104,7 @@ contains !! @param patch_id_fp Array to track patch ids pure subroutine s_assign_patch_mixture_primitive_variables(patch_id, j, k, l, & eta, q_prim_vf, patch_id_fp) - !$acc routine seq + $:ROUTINE() integer, intent(in) :: patch_id integer, intent(in) :: j, k, l @@ -276,7 +277,7 @@ contains !! @param patch_id_fp Array to track patch ids impure subroutine s_assign_patch_species_primitive_variables(patch_id, j, k, l, & eta, q_prim_vf, patch_id_fp) - !$acc routine seq + $:ROUTINE() integer, intent(in) :: patch_id integer, intent(in) :: j, k, l diff --git a/src/pre_process/m_patches.fpp b/src/pre_process/m_patches.fpp index 2425553d3f..584048aa02 100644 --- a/src/pre_process/m_patches.fpp +++ b/src/pre_process/m_patches.fpp @@ -7,6 +7,7 @@ #:include '2dHardcodedIC.fpp' #:include '3dHardcodedIC.fpp' #:include 'macros.fpp' +#:include 'directive_macros.fpp' module m_patches @@ -2386,7 +2387,7 @@ contains end subroutine s_model subroutine s_convert_cylindrical_to_cartesian_coord(cyl_y, cyl_z) - !$acc routine seq + $:ROUTINE() real(wp), intent(in) :: cyl_y, cyl_z @@ -2397,7 +2398,7 @@ contains pure function f_convert_cyl_to_cart(cyl) result(cart) - !$acc routine seq + $:ROUTINE() t_vec3, intent(in) :: cyl t_vec3 :: cart @@ -2409,7 +2410,7 @@ contains end function f_convert_cyl_to_cart subroutine s_convert_cylindrical_to_spherical_coord(cyl_x, cyl_y) - !$acc routine seq + $:ROUTINE() real(wp), intent(IN) :: cyl_x, cyl_y @@ -2422,7 +2423,7 @@ contains !! @param offset Thickness !! @param a Starting position pure elemental function f_r(myth, offset, a) - !$acc routine seq + $:ROUTINE() real(wp), intent(in) :: myth, offset, a real(wp) :: b real(wp) :: f_r diff --git a/src/simulation/m_acoustic_src.fpp b/src/simulation/m_acoustic_src.fpp index 0fa8f33566..7c15bb5cf1 100644 --- a/src/simulation/m_acoustic_src.fpp +++ b/src/simulation/m_acoustic_src.fpp @@ -340,7 +340,7 @@ contains !! @param gauss_sigma_time_local sigma in time for Gaussian pulse !! @param source Source term amplitude pure elemental subroutine s_source_temporal(sim_time, c, ai, term_index, frequency_local, gauss_sigma_time_local, source, sum_BB) - !$acc routine seq + $:ROUTINE() integer, intent(in) :: ai, term_index real(wp), intent(in) :: sim_time, c, sum_BB real(wp), intent(in) :: frequency_local, gauss_sigma_time_local @@ -692,7 +692,7 @@ contains !! @param c Speed of sound !! @return frequency_local Converted frequency pure elemental function f_frequency_local(freq_conv_flag, ai, c) - !$acc routine seq + $:ROUTINE() logical, intent(in) :: freq_conv_flag integer, intent(in) :: ai real(wp), intent(in) :: c @@ -711,7 +711,7 @@ contains !! @param ai Acoustic source index !! @return gauss_sigma_time_local Converted Gaussian sigma time pure elemental function f_gauss_sigma_time_local(gauss_conv_flag, ai, c) - !$acc routine seq + $:ROUTINE() logical, intent(in) :: gauss_conv_flag integer, intent(in) :: ai real(wp), intent(in) :: c diff --git a/src/simulation/m_bubbles.fpp b/src/simulation/m_bubbles.fpp index bf008b9b00..b64c28a4ee 100644 --- a/src/simulation/m_bubbles.fpp +++ b/src/simulation/m_bubbles.fpp @@ -3,6 +3,7 @@ !! @brief Contains module m_bubbles #:include 'macros.fpp' +#:include 'directive_macros.fpp' !> @brief This module contains the procedures shared by the ensemble-averaged and volume-averaged bubble models. module m_bubbles @@ -39,7 +40,7 @@ contains !! @param f_divu Divergence of velocity !! @param fCson Speed of sound from fP (EL) pure elemental function f_rddot(fRho, fP, fR, fV, fR0, fpb, fpbdot, alf, fntait, fBtait, f_bub_adv_src, f_divu, fCson) - !$acc routine seq + $:ROUTINE() real(wp), intent(in) :: fRho, fP, fR, fV, fR0, fpb, fpbdot, alf real(wp), intent(in) :: fntait, fBtait, f_bub_adv_src, f_divu real(wp), intent(in) :: fCson @@ -80,7 +81,7 @@ contains !! @param fV Current bubble velocity !! @param fpb Internal bubble pressure pure elemental function f_cpbw(fR0, fR, fV, fpb) - !$acc routine seq + $:ROUTINE() real(wp), intent(in) :: fR0, fR, fV, fpb real(wp) :: f_cpbw @@ -99,7 +100,7 @@ contains !! @param fntait Tait EOS parameter !! @param fBtait Tait EOS parameter pure elemental function f_H(fCpbw, fCpinf, fntait, fBtait) - !$acc routine seq + $:ROUTINE() real(wp), intent(in) :: fCpbw, fCpinf, fntait, fBtait real(wp) :: tmp1, tmp2, tmp3 @@ -119,7 +120,7 @@ contains !! @param fBtait Tait EOS parameter !! @param fH Bubble enthalpy pure elemental function f_cgas(fCpinf, fntait, fBtait, fH) - !$acc routine seq + $:ROUTINE() real(wp), intent(in) :: fCpinf, fntait, fBtait, fH real(wp) :: tmp @@ -142,7 +143,7 @@ contains !! @param advsrc Advection equation source term !! @param divu Divergence of velocity pure elemental function f_cpinfdot(fRho, fP, falf, fntait, fBtait, advsrc, divu) - !$acc routine seq + $:ROUTINE() real(wp), intent(in) :: fRho, fP, falf, fntait, fBtait, advsrc, divu real(wp) :: c2_liquid @@ -172,7 +173,7 @@ contains !! @param fR0 Equilibrium bubble radius !! @param fpbdot Time derivative of the internal bubble pressure pure elemental function f_Hdot(fCpbw, fCpinf, fCpinf_dot, fntait, fBtait, fR, fV, fR0, fpbdot) - !$acc routine seq + $:ROUTINE() real(wp), intent(in) :: fCpbw, fCpinf, fCpinf_dot, fntait, fBtait real(wp), intent(in) :: fR, fV, fR0, fpbdot @@ -208,7 +209,7 @@ contains !! @param fR0 Equilibrium bubble radius !! @param fCpbw Boundary wall pressure pure elemental function f_rddot_RP(fCp, fRho, fR, fV, fR0, fCpbw) - !$acc routine seq + $:ROUTINE() real(wp), intent(in) :: fCp, fRho, fR, fV, fR0, fCpbw real(wp) :: f_rddot_RP @@ -231,7 +232,7 @@ contains !! @param fntait Tait EOS parameter !! @param fBtait Tait EOS parameter pure elemental function f_rddot_G(fCpbw, fR, fV, fH, fHdot, fcgas, fntait, fBtait) - !$acc routine seq + $:ROUTINE() real(wp), intent(in) :: fCpbw, fR, fV, fH, fHdot real(wp), intent(in) :: fcgas, fntait, fBtait @@ -254,7 +255,7 @@ contains !! @param fV Current bubble velocity !! @param fpb Internal bubble pressure pure elemental function f_cpbw_KM(fR0, fR, fV, fpb) - !$acc routine seq + $:ROUTINE() real(wp), intent(in) :: fR0, fR, fV, fpb real(wp) :: f_cpbw_KM @@ -281,7 +282,7 @@ contains !! @param fR0 Equilibrium bubble radius !! @param fC Current sound speed pure elemental function f_rddot_KM(fpbdot, fCp, fCpbw, fRho, fR, fV, fR0, fC) - !$acc routine seq + $:ROUTINE() real(wp), intent(in) :: fpbdot, fCp, fCpbw real(wp), intent(in) :: fRho, fR, fV, fR0, fC @@ -315,7 +316,7 @@ contains !! @param pb Internal bubble pressure !! @param iR0 Current bubble size index pure elemental subroutine s_bwproperty(pb, iR0, chi_vw, k_mw, rho_mw) - !$acc routine seq + $:ROUTINE() real(wp), intent(in) :: pb integer, intent(in) :: iR0 real(wp), intent(out) :: chi_vw !< Bubble wall properties (Ando 2010) @@ -345,7 +346,7 @@ contains !! @param fR_m Mixture gas constant (EL) !! @param fgamma_m Mixture gamma (EL) pure elemental subroutine s_vflux(fR, fV, fpb, fmass_v, iR0, vflux, fmass_n, fbeta_c, fR_m, fgamma_m) - !$acc routine seq + $:ROUTINE() real(wp), intent(in) :: fR real(wp), intent(in) :: fV real(wp), intent(in) :: fpb @@ -403,7 +404,7 @@ contains !! @param fR_m Mixture gas constant (EL) !! @param fgamma_m Mixture gamma (EL) pure elemental function f_bpres_dot(fvflux, fR, fV, fpb, fmass_v, iR0, fbeta_t, fR_m, fgamma_m) - !$acc routine seq + $:ROUTINE() real(wp), intent(in) :: fvflux real(wp), intent(in) :: fR real(wp), intent(in) :: fV @@ -466,7 +467,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_advance_step #else - !$acc routine seq + $:ROUTINE() #endif real(wp), intent(inout) :: fR, fV, fpb, fmass_v real(wp), intent(in) :: fRho, fP, fR0, fpbdot, alf @@ -600,7 +601,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_initial_substep_h #else - !$acc routine seq + $:ROUTINE() #endif real(wp), intent(IN) :: fRho, fP, fR, fV, fR0, fpb, fpbdot, alf real(wp), intent(IN) :: fntait, fBtait, f_bub_adv_src, f_divu @@ -686,7 +687,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_advance_substep #else - !$acc routine seq + $:ROUTINE() #endif real(wp), intent(OUT) :: err real(wp), intent(IN) :: fRho, fP, fR, fV, fR0, fpb, fpbdot, alf @@ -784,7 +785,7 @@ contains !! @param fdMvdt_tmp Rate of change of the mass of vapor in the bubble pure elemental subroutine s_advance_EL(fR_tmp, fV_tmp, fPb_tmp, fMv_tmp, bub_id, & fmass_n, fbeta_c, fbeta_t, fdPbdt_tmp, advance_EL) - !$acc routine seq + $:ROUTINE() real(wp), intent(IN) :: fR_tmp, fV_tmp, fPb_tmp, fMv_tmp real(wp), intent(IN) :: fmass_n, fbeta_c, fbeta_t integer, intent(IN) :: bub_id diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index b745739058..f83b64a3fd 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -738,7 +738,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_cson_from_pinf #else - !$acc routine seq + $:ROUTINE() #endif integer, intent(in) :: bub_id type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf @@ -809,7 +809,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_get_pinf #else - !$acc routine seq + $:ROUTINE() #endif integer, intent(in) :: bub_id, ptype type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp index 47566eac19..c173c82953 100644 --- a/src/simulation/m_bubbles_EL_kernels.fpp +++ b/src/simulation/m_bubbles_EL_kernels.fpp @@ -3,6 +3,7 @@ !! @brief Contains module m_bubbles_EL_kernels #:include 'macros.fpp' +#:include 'directive_macros.fpp' !> @brief This module contains kernel functions used to map the effect of the lagrangian bubbles !! in the Eulerian framework. @@ -203,7 +204,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_applygaussian #else - !$acc routine seq + $:ROUTINE() #endif real(wp), dimension(3), intent(in) :: center integer, dimension(3), intent(in) :: cellaux @@ -273,7 +274,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_check_celloutside #else - !$acc routine seq + $:ROUTINE() #endif integer, dimension(3), intent(inout) :: cellaux logical, intent(out) :: celloutside @@ -309,7 +310,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_shift_cell_symmetric_bc #else - !$acc routine seq + $:ROUTINE() #endif integer, dimension(3), intent(inout) :: cellaux integer, dimension(3), intent(in) :: cell @@ -350,7 +351,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_stddsv #else - !$acc routine seq + $:ROUTINE() #endif integer, dimension(3), intent(in) :: cell real(wp), intent(in) :: volpart @@ -391,7 +392,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_get_char_vol #else - !$acc routine seq + $:ROUTINE() #endif integer, intent(in) :: cellx, celly, cellz real(wp), intent(out) :: Charvol @@ -416,7 +417,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_get_cell #else - !$acc routine seq + $:ROUTINE() #endif real(wp), dimension(3), intent(in) :: s_cell integer, dimension(3), intent(out) :: get_cell diff --git a/src/simulation/m_compute_cbc.fpp b/src/simulation/m_compute_cbc.fpp index 275acb7f6f..23136a0198 100644 --- a/src/simulation/m_compute_cbc.fpp +++ b/src/simulation/m_compute_cbc.fpp @@ -1,3 +1,4 @@ +#:include 'directive_macros.fpp' !> !! @file m_compute_cbc.f90 !! @brief Contains module m_compute_cbc @@ -27,7 +28,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_slip_wall_L #else - !$acc routine seq + $:ROUTINE() #endif real(wp), dimension(3), intent(in) :: lambda real(wp), dimension(sys_size), intent(inout) :: L @@ -57,7 +58,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_nonreflecting_subsonic_buffer_L #else - !$acc routine seq + $:ROUTINE() #endif real(wp), dimension(3), intent(in) :: lambda real(wp), dimension(sys_size), intent(inout) :: L @@ -107,7 +108,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_nonreflecting_subsonic_inflow_L #else - !$acc routine seq + $:ROUTINE() #endif real(wp), dimension(3), intent(in) :: lambda real(wp), dimension(sys_size), intent(inout) :: L @@ -141,7 +142,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_nonreflecting_subsonic_outflow_L #else - !$acc routine seq + $:ROUTINE() #endif real(wp), dimension(3), intent(in) :: lambda real(wp), dimension(sys_size), intent(inout) :: L @@ -190,7 +191,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_force_free_subsonic_outflow_L #else - !$acc routine seq + $:ROUTINE() #endif real(wp), dimension(3), intent(in) :: lambda real(wp), dimension(sys_size), intent(inout) :: L @@ -228,7 +229,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_constant_pressure_subsonic_outflow_L #else - !$acc routine seq + $:ROUTINE() #endif real(wp), dimension(3), intent(in) :: lambda real(wp), dimension(sys_size), intent(inout) :: L @@ -267,7 +268,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_supersonic_inflow_L #else - !$acc routine seq + $:ROUTINE() #endif real(wp), dimension(3), intent(in) :: lambda real(wp), dimension(sys_size), intent(inout) :: L @@ -298,7 +299,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_supersonic_outflow_L #else - !$acc routine seq + $:ROUTINE() #endif real(wp), dimension(3), intent(in) :: lambda real(wp), dimension(sys_size), intent(inout) :: L diff --git a/src/simulation/m_hyperelastic.fpp b/src/simulation/m_hyperelastic.fpp index cd0552d438..487e0ceb64 100644 --- a/src/simulation/m_hyperelastic.fpp +++ b/src/simulation/m_hyperelastic.fpp @@ -221,7 +221,7 @@ contains !! calculate the FFtranspose to obtain the btensor, btensor is nxn tensor !! btensor is symmetric, save the data space pure subroutine s_neoHookean_cauchy_solver(btensor, q_prim_vf, G, j, k, l) - !$acc routine seq + $:ROUTINE() type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf type(scalar_field), dimension(b_size), intent(inout) :: btensor real(wp), intent(in) :: G @@ -260,7 +260,7 @@ contains !! calculate the FFtranspose to obtain the btensor, btensor is nxn tensor !! btensor is symmetric, save the data space pure subroutine s_Mooney_Rivlin_cauchy_solver(btensor, q_prim_vf, G, j, k, l) - !$acc routine seq + $:ROUTINE() type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf type(scalar_field), dimension(b_size), intent(inout) :: btensor real(wp), intent(in) :: G diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 420a024545..31fd99fe99 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -3,6 +3,7 @@ !! @brief Contains module m_ibm #:include 'macros.fpp' +#:include 'directive_macros.fpp' !> @brief This module is used to handle all operations related to immersed !! boundary methods (IBMs) @@ -744,7 +745,7 @@ contains !> Function that uses the interpolation coefficients and the current state !! at the cell centers in order to estimate the state at the image point pure subroutine s_interpolate_image_point(q_prim_vf, gp, alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP, r_IP, v_IP, pb_IP, mv_IP, nmom_IP, pb, mv, presb_IP, massv_IP) - !$acc routine seq + $:ROUTINE() type(scalar_field), & dimension(sys_size), & intent(IN) :: q_prim_vf !< Primitive Variables diff --git a/src/simulation/m_qbmm.fpp b/src/simulation/m_qbmm.fpp index edaf2ee367..f9ddd1f2d1 100644 --- a/src/simulation/m_qbmm.fpp +++ b/src/simulation/m_qbmm.fpp @@ -684,7 +684,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_coeff_nonpoly #else - !$acc routine seq + $:ROUTINE() #endif real(wp), intent(in) :: pres, rho, c real(wp), dimension(nterms, 0:2, 0:2), intent(out) :: coeffs @@ -757,7 +757,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_coeff #else - !$acc routine seq + $:ROUTINE() #endif real(wp), intent(inout) :: pres, rho, c @@ -1038,7 +1038,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_chyqmom #else - !$acc routine seq + $:ROUTINE() #endif real(wp), dimension(nmom), intent(in) :: momin real(wp), dimension(nnode), intent(inout) :: wght, abscX, abscY @@ -1105,7 +1105,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_hyqmom #else - !$acc routine seq + $:ROUTINE() #endif real(wp), dimension(2), intent(inout) :: frho, fup real(wp), dimension(3), intent(in) :: fmom @@ -1124,7 +1124,7 @@ contains end subroutine s_hyqmom pure function f_quad(abscX, abscY, wght_in, q, r, s) - !$acc routine seq + $:ROUTINE() real(wp), dimension(nnode, nb), intent(in) :: abscX, abscY, wght_in real(wp), intent(in) :: q, r, s @@ -1140,7 +1140,7 @@ contains end function f_quad pure function f_quad2D(abscX, abscY, wght_in, pow) - !$acc routine seq + $:ROUTINE() real(wp), dimension(nnode), intent(in) :: abscX, abscY, wght_in real(wp), dimension(3), intent(in) :: pow diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp index d82ce6b154..c496ce7632 100644 --- a/src/simulation/m_riemann_solvers.fpp +++ b/src/simulation/m_riemann_solvers.fpp @@ -4265,7 +4265,7 @@ contains !! @param[in] divergence_v Velocity divergence (du/dx + dv/dy + dw/dz). !! @param[out] tau_shear_out Calculated shear stress tensor (stress on i-face, j-direction). pure subroutine s_calculate_shear_stress_tensor(vel_grad_avg, Re_shear, divergence_v, tau_shear_out) - !$acc routine seq + $:ROUTINE() implicit none @@ -4299,7 +4299,7 @@ contains !! @param[in] divergence_v Velocity divergence (du/dx + dv/dy + dw/dz). !! @param[out] tau_bulk_out Calculated bulk stress tensor (stress on i-face, i-direction). pure subroutine s_calculate_bulk_stress_tensor(Re_bulk, divergence_v, tau_bulk_out) - !$acc routine seq + $:ROUTINE() implicit none diff --git a/src/simulation/m_sim_helpers.f90 b/src/simulation/m_sim_helpers.fpp similarity index 99% rename from src/simulation/m_sim_helpers.f90 rename to src/simulation/m_sim_helpers.fpp index 64960b1353..85a00f7b13 100644 --- a/src/simulation/m_sim_helpers.f90 +++ b/src/simulation/m_sim_helpers.fpp @@ -1,3 +1,4 @@ +#:include 'directive_macros.fpp' module m_sim_helpers use m_derived_types !< Definitions of the derived types @@ -33,7 +34,7 @@ pure subroutine s_compute_enthalpy(q_prim_vf, pres, rho, gamma, pi_inf, Re, H, a #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_enthalpy #else - !$acc routine seq + $:ROUTINE() #endif type(scalar_field), intent(in), dimension(sys_size) :: q_prim_vf @@ -98,7 +99,7 @@ end subroutine s_compute_enthalpy !! @param vcfl_sf (optional) cell centered viscous cfl number !! @param Rc_sf (optional) cell centered Rc pure subroutine s_compute_stability_from_dt(vel, c, rho, Re_l, j, k, l, icfl_sf, vcfl_sf, Rc_sf) - !$acc routine seq + $:ROUTINE() real(wp), intent(in), dimension(num_vels) :: vel real(wp), intent(in) :: c, rho real(wp), dimension(0:m, 0:n, 0:p), intent(inout) :: icfl_sf @@ -195,7 +196,7 @@ end subroutine s_compute_stability_from_dt !! @param k y coordinate !! @param l z coordinate pure subroutine s_compute_dt_from_cfl(vel, c, max_dt, rho, Re_l, j, k, l) - !$acc routine seq + $:ROUTINE() real(wp), dimension(num_vels), intent(in) :: vel real(wp), intent(in) :: c, rho real(wp), dimension(0:m, 0:n, 0:p), intent(inout) :: max_dt From 779ea27f0eb8f7b02d2b665004fcf9e0b30e4cb6 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Tue, 10 Jun 2025 23:21:34 -0400 Subject: [PATCH 17/75] Replaced non-parallel loops --- src/common/m_chemistry.fpp | 10 +- src/common/m_phase_change.fpp | 12 +- src/common/m_variables_conversion.fpp | 70 +++--- src/simulation/include/inline_riemann.fpp | 4 +- src/simulation/m_acoustic_src.fpp | 8 +- src/simulation/m_bubbles_EE.fpp | 20 +- src/simulation/m_bubbles_EL.fpp | 14 +- src/simulation/m_cbc.fpp | 64 +++--- src/simulation/m_hyperelastic.fpp | 16 +- src/simulation/m_hypoelastic.fpp | 6 +- src/simulation/m_ibm.fpp | 24 +- src/simulation/m_mhd.fpp | 6 +- src/simulation/m_qbmm.fpp | 24 +- src/simulation/m_rhs.fpp | 54 ++--- src/simulation/m_riemann_solvers.fpp | 262 +++++++++++----------- src/simulation/m_sim_helpers.fpp | 6 +- src/simulation/m_viscous.fpp | 104 ++++----- src/simulation/m_weno.fpp | 4 +- 18 files changed, 354 insertions(+), 354 deletions(-) diff --git a/src/common/m_chemistry.fpp b/src/common/m_chemistry.fpp index e7c42b36f6..14b3c3d1a6 100644 --- a/src/common/m_chemistry.fpp +++ b/src/common/m_chemistry.fpp @@ -36,7 +36,7 @@ contains do z = bounds(3)%beg, bounds(3)%end do y = bounds(2)%beg, bounds(2)%end do x = bounds(1)%beg, bounds(1)%end - !$acc loop seq + $:LOOP() do eqn = chemxb, chemxe Ys(eqn - chemxb + 1) = & q_cons_vf(eqn)%sf(x, y, z)/q_cons_vf(contxb)%sf(x, y, z) @@ -47,7 +47,7 @@ contains ! cons. contxb = \rho (1-fluid model) ! cons. momxb + i = \rho u_i energy = q_cons_vf(E_idx)%sf(x, y, z)/q_cons_vf(contxb)%sf(x, y, z) - !$acc loop seq + $:LOOP() do eqn = momxb, momxe energy = energy - & 0.5_wp*(q_cons_vf(eqn)%sf(x, y, z)/q_cons_vf(contxb)%sf(x, y, z))**2._wp @@ -73,7 +73,7 @@ contains do z = bounds(3)%beg, bounds(3)%end do y = bounds(2)%beg, bounds(2)%end do x = bounds(1)%beg, bounds(1)%end - !$acc loop seq + $:LOOP() do i = chemxb, chemxe Ys(i - chemxb + 1) = q_prim_vf(i)%sf(x, y, z) end do @@ -105,7 +105,7 @@ contains do y = bounds(2)%beg, bounds(2)%end do x = bounds(1)%beg, bounds(1)%end - !$acc loop seq + $:LOOP() do eqn = chemxb, chemxe Ys(eqn - chemxb + 1) = q_prim_qp(eqn)%sf(x, y, z) end do @@ -115,7 +115,7 @@ contains call get_net_production_rates(rho, T, Ys, omega) - !$acc loop seq + $:LOOP() do eqn = chemxb, chemxe omega_m = molecular_weights(eqn - chemxb + 1)*omega(eqn - chemxb + 1) diff --git a/src/common/m_phase_change.fpp b/src/common/m_phase_change.fpp index fd03e5f09a..89d4e412fe 100644 --- a/src/common/m_phase_change.fpp +++ b/src/common/m_phase_change.fpp @@ -107,7 +107,7 @@ contains do l = 0, p rho = 0.0_wp; TvF = 0.0_wp - !$acc loop seq + $:LOOP() do i = 1, num_fluids ! Mixture density @@ -133,7 +133,7 @@ contains ! kinetic energy as an auxiliary variable to the calculation of the total internal energy dynE = 0.0_wp - !$acc loop seq + $:LOOP() do i = momxb, momxe dynE = dynE + 5.0e-1_wp*q_cons_vf(i)%sf(j, k, l)**2/rho @@ -255,7 +255,7 @@ contains ! calculating volume fractions, internal energies, and total entropy rhos = 0.0_wp - !$acc loop seq + $:LOOP() do i = 1, num_fluids ! volume fractions @@ -308,7 +308,7 @@ contains ! auxiliary variables for the pT-equilibrium solver mCP = 0.0_wp; mQ = 0.0_wp; p_infpT = ps_inf; ! Performing tests before initializing the pT-equilibrium - !$acc loop seq + $:LOOP() do i = 1, num_fluids ! sum of the total alpha*rho*cp of the system @@ -357,7 +357,7 @@ contains ! updating functions used in the Newton's solver gpp = 0.0_wp; gp = 0.0_wp; hp = 0.0_wp - !$acc loop seq + $:LOOP() do i = 1, num_fluids gp = gp + (gs_min(i) - 1.0_wp)*q_cons_vf(i + contxb - 1)%sf(j, k, l)*cvs(i) & @@ -451,7 +451,7 @@ contains mCP = 0.0_wp; mCPD = 0.0_wp; mCVGP = 0.0_wp; mCVGP2 = 0.0_wp; mQ = 0.0_wp; mQD = 0.0_wp ! Those must be updated through the iterations, as they either depend on ! the partial masses for all fluids, or on the equilibrium pressure - !$acc loop seq + $:LOOP() do i = 1, num_fluids ! sum of the total alpha*rho*cp of the system diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp index 7523afd13d..4634ee0e34 100644 --- a/src/common/m_variables_conversion.fpp +++ b/src/common/m_variables_conversion.fpp @@ -759,7 +759,7 @@ contains nbub_sc = qK_cons_vf(bubxb)%sf(j, k, l) - !$acc loop seq + $:LOOP() do i = 1, nb mu = qK_cons_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l)/nbub_sc sig = (qK_cons_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l)/nbub_sc - mu**2)**0.5_wp @@ -792,7 +792,7 @@ contains nbub_sc = qK_cons_vf(bubxb)%sf(j, k, l) - !$acc loop seq + $:LOOP() do i = 1, nb mu = qK_cons_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l)/nbub_sc sig = (qK_cons_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l)/nbub_sc - mu**2)**0.5_wp @@ -892,7 +892,7 @@ contains do j = ibounds(1)%beg, ibounds(1)%end dyn_pres_K = 0._wp - !$acc loop seq + $:LOOP() do i = 1, num_fluids alpha_rho_K(i) = qK_cons_vf(i)%sf(j, k, l) alpha_K(i) = qK_cons_vf(advxb + i - 1)%sf(j, k, l) @@ -936,13 +936,13 @@ contains B2 = B(1)**2 + B(2)**2 + B(3)**2 m2 = 0._wp - !$acc loop seq + $:LOOP() do i = momxb, momxe m2 = m2 + qK_cons_vf(i)%sf(j, k, l)**2 end do S = 0._wp - !$acc loop seq + $:LOOP() do i = 1, 3 S = S + qK_cons_vf(momxb + i - 1)%sf(j, k, l)*B(i) end do @@ -950,14 +950,14 @@ contains E = qK_cons_vf(E_idx)%sf(j, k, l) D = 0._wp - !$acc loop seq + $:LOOP() do i = 1, contxe D = D + qK_cons_vf(i)%sf(j, k, l) end do ! Newton-Raphson W = E + D - !$acc loop seq + $:LOOP() do iter = 1, relativity_cons_to_prim_max_iter Ga = (W + B2)*W/sqrt((W + B2)**2*W**2 - (m2*W**2 + S**2*(2*W + B2))) pres = (W - D*Ga)/((gamma_K + 1)*Ga**2) ! Thermal pressure from EOS @@ -983,13 +983,13 @@ contains qK_prim_vf(E_idx)%sf(j, k, l) = (W - D*Ga)/((gamma_K + 1)*Ga**2) ! Recover the other primitive variables - !$acc loop seq + $:LOOP() do i = 1, 3 qK_prim_vf(momxb + i - 1)%sf(j, k, l) = (qK_cons_vf(momxb + i - 1)%sf(j, k, l) + (S/W)*B(i))/(W + B2) end do qK_prim_vf(1)%sf(j, k, l) = D/Ga ! Hard-coded for single-component for now - !$acc loop seq + $:LOOP() do i = B_idx%beg, B_idx%end qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) end do @@ -999,22 +999,22 @@ contains if (chemistry) then rho_K = 0._wp - !$acc loop seq + $:LOOP() do i = chemxb, chemxe rho_K = rho_K + max(0._wp, qK_cons_vf(i)%sf(j, k, l)) end do - !$acc loop seq + $:LOOP() do i = 1, contxe qK_prim_vf(i)%sf(j, k, l) = rho_K end do - !$acc loop seq + $:LOOP() do i = chemxb, chemxe qK_prim_vf(i)%sf(j, k, l) = max(0._wp, qK_cons_vf(i)%sf(j, k, l)/rho_K) end do else - !$acc loop seq + $:LOOP() do i = 1, contxe qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) end do @@ -1024,7 +1024,7 @@ contains rho_K = max(rho_K, sgm_eps) #endif - !$acc loop seq + $:LOOP() do i = momxb, momxe if (model_eqns /= 4) then qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) & @@ -1038,7 +1038,7 @@ contains end do if (chemistry) then - !$acc loop seq + $:LOOP() do i = 1, num_species rhoYks(i) = qK_cons_vf(chemxb + i - 1)%sf(j, k, l) end do @@ -1068,7 +1068,7 @@ contains end if if (bubbles_euler) then - !$acc loop seq + $:LOOP() do i = 1, nb nRtmp(i) = qK_cons_vf(bubrs(i))%sf(j, k, l) end do @@ -1080,7 +1080,7 @@ contains nbub_sc = qK_cons_vf(bubxb)%sf(j, k, l) !Convert cons to prim - !$acc loop seq + $:LOOP() do i = bubxb, bubxe qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/nbub_sc end do @@ -1097,7 +1097,7 @@ contains call s_comp_n_from_cons(vftmp, nRtmp, nbub_sc, weight) end if - !$acc loop seq + $:LOOP() do i = bubxb, bubxe qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/nbub_sc end do @@ -1105,21 +1105,21 @@ contains end if if (mhd) then - !$acc loop seq + $:LOOP() do i = B_idx%beg, B_idx%end qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) end do end if if (elasticity) then - !$acc loop seq + $:LOOP() do i = strxb, strxe qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/rho_K end do end if if (hypoelasticity) then - !$acc loop seq + $:LOOP() do i = strxb, strxe ! subtracting elastic contribution for pressure calculation if (G_K > verysmall) then @@ -1136,13 +1136,13 @@ contains end if if (hyperelasticity) then - !$acc loop seq + $:LOOP() do i = xibeg, xiend qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/rho_K end do end if - !$acc loop seq + $:LOOP() do i = advxb, advxe qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) end do @@ -1487,22 +1487,22 @@ contains do k = is2b, is2e do j = is1b, is1e - !$acc loop seq + $:LOOP() do i = 1, contxe alpha_rho_K(i) = qK_prim_vf(j, k, l, i) end do - !$acc loop seq + $:LOOP() do i = advxb, advxe alpha_K(i - E_idx) = qK_prim_vf(j, k, l, i) end do - !$acc loop seq + $:LOOP() do i = 1, num_vels vel_K(i) = qK_prim_vf(j, k, l, contxe + i) end do vel_K_sum = 0._wp - !$acc loop seq + $:LOOP() do i = 1, num_vels vel_K_sum = vel_K_sum + vel_K(i)**2._wp end do @@ -1523,7 +1523,7 @@ contains ! Computing the energy from the pressure if (chemistry) then - !$acc loop seq + $:LOOP() do i = chemxb, chemxe Y_K(i - chemxb + 1) = qK_prim_vf(j, k, l, i) end do @@ -1540,12 +1540,12 @@ contains end if ! mass flux, this should be \alpha_i \rho_i u_i - !$acc loop seq + $:LOOP() do i = 1, contxe FK_vf(j, k, l, i) = alpha_rho_K(i)*vel_K(dir_idx(1)) end do - !$acc loop seq + $:LOOP() do i = 1, num_vels FK_vf(j, k, l, contxe + dir_idx(i)) = & rho_K*vel_K(dir_idx(1)) & @@ -1558,14 +1558,14 @@ contains ! Species advection Flux, \rho*u*Y if (chemistry) then - !$acc loop seq + $:LOOP() do i = 1, num_species FK_vf(j, k, l, i - 1 + chemxb) = vel_K(dir_idx(1))*(rho_K*Y_K(i)) end do end if if (riemann_solver == 1 .or. riemann_solver == 4) then - !$acc loop seq + $:LOOP() do i = advxb, advxe FK_vf(j, k, l, i) = 0._wp FK_src_vf(j, k, l, i) = alpha_K(i - E_idx) @@ -1573,12 +1573,12 @@ contains else ! Could be bubbles_euler! - !$acc loop seq + $:LOOP() do i = advxb, advxe FK_vf(j, k, l, i) = vel_K(dir_idx(1))*alpha_K(i - E_idx) end do - !$acc loop seq + $:LOOP() do i = advxb, advxe FK_src_vf(j, k, l, i) = vel_K(dir_idx(1)) end do @@ -1652,7 +1652,7 @@ contains c = (1._wp/(rho*(adv(1)/blkmod1 + adv(2)/blkmod2))) elseif (model_eqns == 3) then c = 0._wp - !$acc loop seq + $:LOOP() do q = 1, num_fluids c = c + adv(q)*(1._wp/gammas(q) + 1._wp)* & (pres + pi_infs(q)/(gammas(q) + 1._wp)) diff --git a/src/simulation/include/inline_riemann.fpp b/src/simulation/include/inline_riemann.fpp index 9ad956a2c8..81e311141a 100644 --- a/src/simulation/include/inline_riemann.fpp +++ b/src/simulation/include/inline_riemann.fpp @@ -1,7 +1,7 @@ #:def arithmetic_avg() rho_avg = 5e-1_wp*(rho_L + rho_R) vel_avg_rms = 0._wp - !$acc loop seq + $:LOOP() do i = 1, num_vels vel_avg_rms = vel_avg_rms + (5e-1_wp*(vel_L(i) + vel_R(i)))**2._wp end do @@ -17,7 +17,7 @@ vel_avg_rms = 0._wp - !$acc loop seq + $:LOOP() do i = 1, num_vels vel_avg_rms = vel_avg_rms + (sqrt(rho_L)*vel_L(i) + sqrt(rho_R)*vel_R(i))**2._wp/ & (sqrt(rho_L) + sqrt(rho_R))**2._wp diff --git a/src/simulation/m_acoustic_src.fpp b/src/simulation/m_acoustic_src.fpp index 7c15bb5cf1..73d1f14fb8 100644 --- a/src/simulation/m_acoustic_src.fpp +++ b/src/simulation/m_acoustic_src.fpp @@ -232,7 +232,7 @@ contains if (bubbles_euler) then if (num_fluids > 2) then - !$acc loop seq + $:LOOP() do q = 1, num_fluids - 1 myRho = myRho + myalpha_rho(q) B_tait = B_tait + myalpha(q)*pi_infs(q) @@ -246,7 +246,7 @@ contains end if if ((.not. bubbles_euler) .or. (mpp_lim .and. (num_fluids > 2))) then - !$acc loop seq + $:LOOP() do q = 1, num_fluids myRho = myRho + myalpha_rho(q) B_tait = B_tait + myalpha(q)*pi_infs(q) @@ -317,11 +317,11 @@ contains do l = 0, p do k = 0, n do j = 0, m - !$acc loop seq + $:LOOP() do q = contxb, contxe rhs_vf(q)%sf(j, k, l) = rhs_vf(q)%sf(j, k, l) + mass_src(j, k, l) end do - !$acc loop seq + $:LOOP() do q = momxb, momxe rhs_vf(q)%sf(j, k, l) = rhs_vf(q)%sf(j, k, l) + mom_src(q - contxe, j, k, l) end do diff --git a/src/simulation/m_bubbles_EE.fpp b/src/simulation/m_bubbles_EE.fpp index dfe0f3da49..84ac15ecb3 100644 --- a/src/simulation/m_bubbles_EE.fpp +++ b/src/simulation/m_bubbles_EE.fpp @@ -82,7 +82,7 @@ contains do k = 0, n do j = 0, m nR3bar = 0._wp - !$acc loop seq + $:LOOP() do i = 1, nb nR3bar = nR3bar + weight(i)*(q_cons_vf(rs(i))%sf(j, k, l))**3._wp end do @@ -180,7 +180,7 @@ contains do j = 0, m bub_adv_src(j, k, l) = 0._wp - !$acc loop seq + $:LOOP() do q = 1, nb bub_r_src(j, k, l, q) = 0._wp bub_v_src(j, k, l, q) = 0._wp @@ -202,7 +202,7 @@ contains if (adv_n) then nbub = q_prim_vf(n_idx)%sf(j, k, l) else - !$acc loop seq + $:LOOP() do q = 1, nb Rtmp(q) = q_prim_vf(rs(q))%sf(j, k, l) Vtmp(q) = q_prim_vf(vs(q))%sf(j, k, l) @@ -210,7 +210,7 @@ contains R3 = 0._wp - !$acc loop seq + $:LOOP() do q = 1, nb R3 = R3 + weight(q)*Rtmp(q)**3._wp end do @@ -221,7 +221,7 @@ contains if (.not. adap_dt) then R2Vav = 0._wp - !$acc loop seq + $:LOOP() do q = 1, nb R2Vav = R2Vav + weight(q)*Rtmp(q)**2._wp*Vtmp(q) end do @@ -229,10 +229,10 @@ contains bub_adv_src(j, k, l) = 4._wp*pi*nbub*R2Vav end if - !$acc loop seq + $:LOOP() do q = 1, nb - !$acc loop seq + $:LOOP() do ii = 1, num_fluids myalpha_rho(ii) = q_cons_vf(ii)%sf(j, k, l) myalpha(ii) = q_cons_vf(advxb + ii - 1)%sf(j, k, l) @@ -243,14 +243,14 @@ contains B_tait = 0._wp if (mpp_lim .and. (num_fluids > 2)) then - !$acc loop seq + $:LOOP() do ii = 1, num_fluids myRho = myRho + myalpha_rho(ii) n_tait = n_tait + myalpha(ii)*gammas(ii) B_tait = B_tait + myalpha(ii)*pi_infs(ii) end do else if (num_fluids > 2) then - !$acc loop seq + $:LOOP() do ii = 1, num_fluids - 1 myRho = myRho + myalpha_rho(ii) n_tait = n_tait + myalpha(ii)*gammas(ii) @@ -333,7 +333,7 @@ contains rhs_vf(alf_idx)%sf(i, q, l) = rhs_vf(alf_idx)%sf(i, q, l) + bub_adv_src(i, q, l) if (num_fluids > 1) rhs_vf(advxb)%sf(i, q, l) = & rhs_vf(advxb)%sf(i, q, l) - bub_adv_src(i, q, l) - !$acc loop seq + $:LOOP() do k = 1, nb rhs_vf(rs(k))%sf(i, q, l) = rhs_vf(rs(k))%sf(i, q, l) + bub_r_src(i, q, l, k) rhs_vf(vs(k))%sf(i, q, l) = rhs_vf(vs(k))%sf(i, q, l) + bub_v_src(i, q, l, k) diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index f83b64a3fd..4fa66a2757 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -570,7 +570,7 @@ contains call s_get_pinf(k, q_prim_vf, 1, myPinf, cell, aux1, aux2) ! Obtain liquid density and computing speed of sound from pinf - !$acc loop seq + $:LOOP() do i = 1, num_fluids myalpha_rho(i) = q_prim_vf(i)%sf(cell(1), cell(2), cell(3)) myalpha(i) = q_prim_vf(E_idx + i)%sf(cell(1), cell(2), cell(3)) @@ -750,7 +750,7 @@ contains real(wp), dimension(num_dims) :: vel integer :: i - !$acc loop seq + $:LOOP() do i = 1, num_dims vel(i) = q_prim_vf(i + contxe)%sf(cell(1), cell(2), cell(3)) end do @@ -831,7 +831,7 @@ contains !< Find current bubble cell cell(:) = int(scoord(:)) - !$acc loop seq + $:LOOP() do i = 1, num_dims if (scoord(i) < 0._wp) cell(i) = cell(i) - 1 end do @@ -922,11 +922,11 @@ contains charpres2 = 0._wp vol = 0._wp - !$acc loop seq + $:LOOP() do i = 1, smearGrid - !$acc loop seq + $:LOOP() do j = 1, smearGrid - !$acc loop seq + $:LOOP() do k = 1, smearGridz cellaux(1) = cell(1) + i - (mapCells + 1) cellaux(2) = cell(2) + j - (mapCells + 1) @@ -1648,7 +1648,7 @@ contains integer :: i - !$acc loop seq + $:LOOP() do i = bub_id, nBubs - 1 lag_id(i, 1) = lag_id(i + 1, 1) bub_R0(i) = bub_R0(i + 1) diff --git a/src/simulation/m_cbc.fpp b/src/simulation/m_cbc.fpp index 73458d56f8..dd26870eb8 100644 --- a/src/simulation/m_cbc.fpp +++ b/src/simulation/m_cbc.fpp @@ -781,25 +781,25 @@ contains do k = is2%beg, is2%end ! Transferring the Primitive Variables - !$acc loop seq + $:LOOP() do i = 1, contxe alpha_rho(i) = q_prim_rs${XYZ}$_vf(0, k, r, i) end do - !$acc loop seq + $:LOOP() do i = 1, num_dims vel(i) = q_prim_rs${XYZ}$_vf(0, k, r, contxe + i) end do vel_K_sum = 0._wp - !$acc loop seq + $:LOOP() do i = 1, num_dims vel_K_sum = vel_K_sum + vel(i)**2._wp end do pres = q_prim_rs${XYZ}$_vf(0, k, r, E_idx) - !$acc loop seq + $:LOOP() do i = 1, advxe - E_idx adv(i) = q_prim_rs${XYZ}$_vf(0, k, r, E_idx + i) end do @@ -810,13 +810,13 @@ contains call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv, adv, alpha_rho, Re_cbc, 0, k, r) end if - !$acc loop seq + $:LOOP() do i = 1, contxe mf(i) = alpha_rho(i)/rho end do if (chemistry) then - !$acc loop seq + $:LOOP() do i = chemxb, chemxe Ys(i - chemxb + 1) = q_prim_rs${XYZ}$_vf(0, k, r, i) end do @@ -849,39 +849,39 @@ contains ! First-Order Spatial Derivatives of Primitive Variables - !$acc loop seq + $:LOOP() do i = 1, contxe dalpha_rho_ds(i) = 0._wp end do - !$acc loop seq + $:LOOP() do i = 1, num_dims dvel_ds(i) = 0._wp end do dpres_ds = 0._wp - !$acc loop seq + $:LOOP() do i = 1, advxe - E_idx dadv_ds(i) = 0._wp end do if (chemistry) then - !$acc loop seq + $:LOOP() do i = 1, num_species dYs_ds(i) = 0._wp end do end if - !$acc loop seq + $:LOOP() do j = 0, buff_size - !$acc loop seq + $:LOOP() do i = 1, contxe dalpha_rho_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, i)* & fd_coef_${XYZ}$ (j, cbc_loc) + & dalpha_rho_ds(i) end do - !$acc loop seq + $:LOOP() do i = 1, num_dims dvel_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, contxe + i)* & fd_coef_${XYZ}$ (j, cbc_loc) + & @@ -891,7 +891,7 @@ contains dpres_ds = q_prim_rs${XYZ}$_vf(j, k, r, E_idx)* & fd_coef_${XYZ}$ (j, cbc_loc) + & dpres_ds - !$acc loop seq + $:LOOP() do i = 1, advxe - E_idx dadv_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, E_idx + i)* & fd_coef_${XYZ}$ (j, cbc_loc) + & @@ -899,7 +899,7 @@ contains end do if (chemistry) then - !$acc loop seq + $:LOOP() do i = 1, num_species dYs_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, chemxb - 1 + i)* & fd_coef_${XYZ}$ (j, cbc_loc) + & @@ -926,7 +926,7 @@ contains call s_compute_nonreflecting_subsonic_inflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds) ! Add GRCBC for Subsonic Inflow if (bc_${XYZ}$%grcbc_in) then - !$acc loop seq + $:LOOP() do i = 2, momxb L(2) = c**3._wp*Ma*(alpha_rho(i - 1) - alpha_rho_in(i - 1, ${CBC_DIR}$))/Del_in(${CBC_DIR}$) - c*Ma*(pres - pres_in(${CBC_DIR}$))/Del_in(${CBC_DIR}$) end do @@ -936,7 +936,7 @@ contains L(momxb + 2) = c*Ma*(vel(dir_idx(3)) - vel_in(${CBC_DIR}$, dir_idx(3)))/Del_in(${CBC_DIR}$) end if end if - !$acc loop seq + $:LOOP() do i = E_idx, advxe - 1 L(i) = c*Ma*(adv(i + 1 - E_idx) - alpha_in(i + 1 - E_idx, ${CBC_DIR}$))/Del_in(${CBC_DIR}$) end do @@ -976,13 +976,13 @@ contains dpres_dt = -5e-1_wp*(L(advxe) + L(1)) end if - !$acc loop seq + $:LOOP() do i = 1, contxe dalpha_rho_dt(i) = & -(L(i + 1) - mf(i)*dpres_dt)/(c*c) end do - !$acc loop seq + $:LOOP() do i = 1, num_dims dvel_dt(dir_idx(i)) = dir_flg(dir_idx(i))* & (L(1) - L(advxe))/(2._wp*rho*c) + & @@ -991,13 +991,13 @@ contains end do vel_dv_dt_sum = 0._wp - !$acc loop seq + $:LOOP() do i = 1, num_dims vel_dv_dt_sum = vel_dv_dt_sum + vel(i)*dvel_dt(i) end do if (chemistry) then - !$acc loop seq + $:LOOP() do i = 1, num_species dYs_dt(i) = -1._wp*L(chemxb + i - 1) end do @@ -1005,12 +1005,12 @@ contains ! The treatment of void fraction source is unclear if (cyl_coord .and. cbc_dir == 2 .and. cbc_loc == 1) then - !$acc loop seq + $:LOOP() do i = 1, advxe - E_idx dadv_dt(i) = -L(momxe + i) !+ adv(i) * vel(dir_idx(1))/y_cc(n) end do else - !$acc loop seq + $:LOOP() do i = 1, advxe - E_idx dadv_dt(i) = -L(momxe + i) end do @@ -1023,7 +1023,7 @@ contains dgamma_dt = dadv_dt(1) dpi_inf_dt = dadv_dt(2) else - !$acc loop seq + $:LOOP() do i = 1, num_fluids drho_dt = drho_dt + dalpha_rho_dt(i) dgamma_dt = dgamma_dt + dadv_dt(i)*gammas(i) @@ -1033,13 +1033,13 @@ contains end if ! flux_rs_vf_l and flux_src_rs_vf_l at j = -1/2 - !$acc loop seq + $:LOOP() do i = 1, contxe flux_rs${XYZ}$_vf_l(-1, k, r, i) = flux_rs${XYZ}$_vf_l(0, k, r, i) & + ds(0)*dalpha_rho_dt(i) end do - !$acc loop seq + $:LOOP() do i = momxb, momxe flux_rs${XYZ}$_vf_l(-1, k, r, i) = flux_rs${XYZ}$_vf_l(0, k, r, i) & + ds(0)*(vel(i - contxe)*drho_dt & @@ -1050,14 +1050,14 @@ contains ! Evolution of LODI equation of energy for real gases adjusted to perfect gas, doi:10.1006/jcph.2002.6990 call get_species_enthalpies_rt(T, h_k) sum_Enthalpies = 0._wp - !$acc loop seq + $:LOOP() do i = 1, num_species h_k(i) = h_k(i)*gas_constant/molecular_weights(i)*T sum_Enthalpies = sum_Enthalpies + (rho*h_k(i) - pres*Mw/molecular_weights(i)*Cp/R_gas)*dYs_dt(i) end do flux_rs${XYZ}$_vf_l(-1, k, r, E_idx) = flux_rs${XYZ}$_vf_l(0, k, r, E_idx) & + ds(0)*((E/rho + pres/rho)*drho_dt + rho*vel_dv_dt_sum + Cp*T*L(2)/(c*c) + sum_Enthalpies) - !$acc loop seq + $:LOOP() do i = 1, num_species flux_rs${XYZ}$_vf_l(-1, k, r, i - 1 + chemxb) = flux_rs${XYZ}$_vf_l(0, k, r, chemxb + i - 1) & + ds(0)*(drho_dt*Ys(i) + rho*dYs_dt(i)) @@ -1073,12 +1073,12 @@ contains end if if (riemann_solver == 1) then - !$acc loop seq + $:LOOP() do i = advxb, advxe flux_rs${XYZ}$_vf_l(-1, k, r, i) = 0._wp end do - !$acc loop seq + $:LOOP() do i = advxb, advxe flux_src_rs${XYZ}$_vf_l(-1, k, r, i) = & 1._wp/max(abs(vel(dir_idx(1))), sgm_eps) & @@ -1091,13 +1091,13 @@ contains else - !$acc loop seq + $:LOOP() do i = advxb, advxe flux_rs${XYZ}$_vf_l(-1, k, r, i) = flux_rs${XYZ}$_vf_l(0, k, r, i) + & ds(0)*dadv_dt(i - E_idx) end do - !$acc loop seq + $:LOOP() do i = advxb, advxe flux_src_rs${XYZ}$_vf_l(-1, k, r, i) = flux_src_rs${XYZ}$_vf_l(0, k, r, i) end do diff --git a/src/simulation/m_hyperelastic.fpp b/src/simulation/m_hyperelastic.fpp index 487e0ceb64..43581328b7 100644 --- a/src/simulation/m_hyperelastic.fpp +++ b/src/simulation/m_hyperelastic.fpp @@ -56,7 +56,7 @@ contains @:ACC_SETUP_VFs(btensor) @:ALLOCATE(Gs(1:num_fluids)) - !$acc loop seq + $:LOOP() do i = 1, num_fluids Gs(i) = fluid_pp(i)%G end do @@ -112,7 +112,7 @@ contains do l = 0, p do k = 0, n do j = 0, m - !$acc loop seq + $:LOOP() do i = 1, num_fluids alpha_rho_k(i) = q_cons_vf(i)%sf(j, k, l) alpha_k(i) = q_cons_vf(advxb + i - 1)%sf(j, k, l) @@ -125,7 +125,7 @@ contains !if ( G <= verysmall ) G_K = 0_wp if (G > verysmall) then - !$acc loop seq + $:LOOP() do i = 1, tensor_size tensora(i) = 0_wp end do @@ -134,7 +134,7 @@ contains ! number for the tensor 1-3: dxix_dx, dxiy_dx, dxiz_dx ! 4-6 : dxix_dy, dxiy_dy, dxiz_dy ! 7-9 : dxix_dz, dxiy_dz, dxiz_dz - !$acc loop seq + $:LOOP() do r = -fd_number, fd_number ! derivatives in the x-direction tensora(1) = tensora(1) + q_prim_vf(xibeg)%sf(j + r, k, l)*fd_coeff_x(r, j) @@ -168,7 +168,7 @@ contains if (tensorb(tensor_size) > verysmall) then ! STEP 2c: computing the inverse of grad_xi tensor = F ! tensorb is the adjoint, tensora becomes F - !$acc loop seq + $:LOOP() do i = 1, tensor_size - 1 tensora(i) = tensorb(i)/tensorb(tensor_size) end do @@ -199,7 +199,7 @@ contains q_prim_vf(E_idx)%sf(j, k, l) = q_prim_vf(E_idx)%sf(j, k, l) - & G*q_prim_vf(xiend + 1)%sf(j, k, l)/gamma ! STEP 5c: updating the Cauchy stress conservative scalar field - !$acc loop seq + $:LOOP() do i = 1, b_size - 1 q_cons_vf(strxb + i - 1)%sf(j, k, l) = & rho*q_prim_vf(strxb + i - 1)%sf(j, k, l) @@ -240,7 +240,7 @@ contains #:endfor ! dividing by the jacobian for neo-Hookean model ! setting the tensor to the stresses for riemann solver - !$acc loop seq + $:LOOP() do i = 1, b_size - 1 q_prim_vf(strxb + i - 1)%sf(j, k, l) = & G*btensor(i)%sf(j, k, l)/btensor(b_size)%sf(j, k, l) @@ -281,7 +281,7 @@ contains ! dividing by the jacobian for neo-Hookean model ! setting the tensor to the stresses for riemann solver - !$acc loop seq + $:LOOP() do i = 1, b_size - 1 q_prim_vf(strxb + i - 1)%sf(j, k, l) = & G*btensor(i)%sf(j, k, l)/btensor(b_size)%sf(j, k, l) diff --git a/src/simulation/m_hypoelastic.fpp b/src/simulation/m_hypoelastic.fpp index 68032cb5b8..4c1a101e49 100644 --- a/src/simulation/m_hypoelastic.fpp +++ b/src/simulation/m_hypoelastic.fpp @@ -119,7 +119,7 @@ contains do q = 0, p do l = 0, n do k = 0, m - !$acc loop seq + $:LOOP() do r = -fd_number, fd_number du_dx(k, l, q) = du_dx(k, l, q) & + q_prim_vf(momxb)%sf(k + r, l, q)*fd_coeff_x_h(r, k) @@ -145,7 +145,7 @@ contains do q = 0, p do l = 0, n do k = 0, m - !$acc loop seq + $:LOOP() do r = -fd_number, fd_number du_dy(k, l, q) = du_dy(k, l, q) & + q_prim_vf(momxb)%sf(k, l + r, q)*fd_coeff_y_h(r, l) @@ -177,7 +177,7 @@ contains do q = 0, p do l = 0, n do k = 0, m - !$acc loop seq + $:LOOP() do r = -fd_number, fd_number du_dz(k, l, q) = du_dz(k, l, q) & + q_prim_vf(momxb)%sf(k, l, q + r)*fd_coeff_z_h(r, q) diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 31fd99fe99..d118a2fa55 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -190,7 +190,7 @@ contains dyn_pres = 0._wp ! Set q_prim_vf params at GP so that mixture vars calculated properly - !$acc loop seq + $:LOOP() do q = 1, num_fluids q_prim_vf(q)%sf(j, k, l) = alpha_rho_IP(q) q_prim_vf(advxb + q - 1)%sf(j, k, l) = alpha_IP(q) @@ -226,7 +226,7 @@ contains end if ! Set momentum - !$acc loop seq + $:LOOP() do q = momxb, momxe q_cons_vf(q)%sf(j, k, l) = rho*vel_g(q - momxb + 1) dyn_pres = dyn_pres + q_cons_vf(q)%sf(j, k, l)* & @@ -234,7 +234,7 @@ contains end do ! Set continuity and adv vars - !$acc loop seq + $:LOOP() do q = 1, num_fluids q_cons_vf(q)%sf(j, k, l) = alpha_rho_IP(q) q_cons_vf(advxb + q - 1)%sf(j, k, l) = alpha_IP(q) @@ -288,7 +288,7 @@ contains end if if (model_eqns == 3) then - !$acc loop seq + $:LOOP() do q = intxb, intxe q_cons_vf(q)%sf(j, k, l) = alpha_IP(q - intxb + 1)*(gammas(q - intxb + 1)*pres_IP & + pi_infs(q - intxb + 1)) @@ -314,7 +314,7 @@ contains physical_loc = [x_cc(j), y_cc(k), 0._wp] end if - !$acc loop seq + $:LOOP() do q = 1, num_fluids q_prim_vf(q)%sf(j, k, l) = alpha_rho_IP(q) q_prim_vf(advxb + q - 1)%sf(j, k, l) = alpha_IP(q) @@ -329,7 +329,7 @@ contains dyn_pres = 0._wp - !$acc loop seq + $:LOOP() do q = momxb, momxe q_cons_vf(q)%sf(j, k, l) = rho*vel_g(q - momxb + 1) dyn_pres = dyn_pres + q_cons_vf(q)%sf(j, k, l)* & @@ -798,11 +798,11 @@ contains end if end if - !$acc loop seq + $:LOOP() do i = i1, i2 - !$acc loop seq + $:LOOP() do j = j1, j2 - !$acc loop seq + $:LOOP() do k = k1, k2 coeff = gp%interp_coeffs(i - i1 + 1, j - j1 + 1, k - k1 + 1) @@ -810,13 +810,13 @@ contains pres_IP = pres_IP + coeff* & q_prim_vf(E_idx)%sf(i, j, k) - !$acc loop seq + $:LOOP() do q = momxb, momxe vel_IP(q + 1 - momxb) = vel_IP(q + 1 - momxb) + coeff* & q_prim_vf(q)%sf(i, j, k) end do - !$acc loop seq + $:LOOP() do l = contxb, contxe alpha_rho_IP(l) = alpha_rho_IP(l) + coeff* & q_prim_vf(l)%sf(i, j, k) @@ -829,7 +829,7 @@ contains end if if (bubbles_euler .and. .not. qbmm) then - !$acc loop seq + $:LOOP() do l = 1, nb if (polytropic) then r_IP(l) = r_IP(l) + coeff*q_prim_vf(bubxb + (l - 1)*2)%sf(i, j, k) diff --git a/src/simulation/m_mhd.fpp b/src/simulation/m_mhd.fpp index 8e6ea6c820..a096a4fe04 100644 --- a/src/simulation/m_mhd.fpp +++ b/src/simulation/m_mhd.fpp @@ -83,16 +83,16 @@ contains do k = 0, m divB = 0._wp - !$acc loop seq + $:LOOP() do r = -fd_number, fd_number divB = divB + q_prim_vf(B_idx%beg)%sf(k + r, l, q)*fd_coeff_x_h(r, k) end do - !$acc loop seq + $:LOOP() do r = -fd_number, fd_number divB = divB + q_prim_vf(B_idx%beg + 1)%sf(k, l + r, q)*fd_coeff_y_h(r, l) end do if (p > 0) then - !$acc loop seq + $:LOOP() do r = -fd_number, fd_number divB = divB + q_prim_vf(B_idx%beg + 2)%sf(k, l, q + r)*fd_coeff_z_h(r, q) end do diff --git a/src/simulation/m_qbmm.fpp b/src/simulation/m_qbmm.fpp index f9ddd1f2d1..075334a2d0 100644 --- a/src/simulation/m_qbmm.fpp +++ b/src/simulation/m_qbmm.fpp @@ -488,7 +488,7 @@ contains rhs_vf(alf_idx)%sf(i, q, l) = rhs_vf(alf_idx)%sf(i, q, l) + mom_sp(2)%sf(i, q, l) j = bubxb - !$acc loop seq + $:LOOP() do k = 1, nb rhs_vf(j)%sf(i, q, l) = & rhs_vf(j)%sf(i, q, l) + mom_3d(0, 0, k)%sf(i, q, l) @@ -876,10 +876,10 @@ contains nbub = q_cons_vf(bubxb)%sf(id1, id2, id3) - !$acc loop seq + $:LOOP() do q = 1, nb !Initialize moment set for each R0 bin - !$acc loop seq + $:LOOP() do r = 2, nmom moms(r) = q_prim_vf(bubmoms(q, r))%sf(id1, id2, id3) end do @@ -890,13 +890,13 @@ contains if (polytropic) then !Account for bubble pressure pb0 at each R0 bin - !$acc loop seq + $:LOOP() do j = 1, nnode wght_pb(j, q) = wght(j, q)*(pb0(q) - pv) end do else !Account for bubble pressure, mass transfer rate and heat transfer rate in wght_pb, wght_mv and wght_ht using Preston model - !$acc loop seq + $:LOOP() do j = 1, nnode chi_vw = 1._wp/(1._wp + R_v/R_n*(pb(id1, id2, id3, j, q)/pv - 1._wp)) x_vw = M_n*chi_vw/(M_v + (M_n - M_v)*chi_vw) @@ -919,13 +919,13 @@ contains !Compute change in moments due to bubble dynamics r = 1 - !$acc loop seq + $:LOOP() do i2 = 0, 2 - !$acc loop seq + $:LOOP() do i1 = 0, 2 if ((i1 + i2) <= 2) then momsum = 0._wp - !$acc loop seq + $:LOOP() do j = 1, nterms ! Account for term with pb in Rayleigh Plesset equation if (bubble_model == 3 .and. j == 3) then @@ -960,7 +960,7 @@ contains ! Compute change in pb and mv for non-polytroic model if (.not. polytropic) then - !$acc loop seq + $:LOOP() do j = 1, nnode ! Compute Rdot (drdt) at quadrature node in the ODE for pb (note this is not the same as bubble variable Rdot) drdt = msum(2) @@ -1010,11 +1010,11 @@ contains end if else - !$acc loop seq + $:LOOP() do q = 1, nb - !$acc loop seq + $:LOOP() do i1 = 0, 2 - !$acc loop seq + $:LOOP() do i2 = 0, 2 moms3d(i1, i2, q)%sf(id1, id2, id3) = 0._wp end do diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index 730ccba98b..9f96010e56 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -652,11 +652,11 @@ contains do k = idwbuff(2)%beg, idwbuff(2)%end do j = idwbuff(1)%beg, idwbuff(1)%end alf_sum%sf(j, k, l) = 0._wp - !$acc loop seq + $:LOOP() do i = advxb, advxe - 1 alf_sum%sf(j, k, l) = alf_sum%sf(j, k, l) + q_cons_qp%vf(i)%sf(j, k, l) end do - !$acc loop seq + $:LOOP() do i = advxb, advxe - 1 q_cons_qp%vf(i)%sf(j, k, l) = q_cons_qp%vf(i)%sf(j, k, l)*(1._wp - q_cons_qp%vf(alf_idx)%sf(j, k, l)) & /alf_sum%sf(j, k, l) @@ -1462,7 +1462,7 @@ contains do l = 0, p do k = 0, n do j = 0, m - !$acc loop seq + $:LOOP() do i = momxb, E_idx rhs_vf(i)%sf(j, k, l) = & rhs_vf(i)%sf(j, k, l) + 1._wp/dx(j)* & @@ -1511,7 +1511,7 @@ contains $:PARALLEL_LOOP(collapse=2) do l = 0, p do j = 0, m - !$acc loop seq + $:LOOP() do i = momxb, E_idx rhs_vf(i)%sf(j, 0, l) = & rhs_vf(i)%sf(j, 0, l) + 1._wp/(y_cc(1) - y_cc(-1))* & @@ -1527,7 +1527,7 @@ contains do l = 0, p do k = 1, n do j = 0, m - !$acc loop seq + $:LOOP() do i = momxb, E_idx rhs_vf(i)%sf(j, k, l) = & rhs_vf(i)%sf(j, k, l) + 1._wp/dy(k)* & @@ -1543,7 +1543,7 @@ contains do l = 0, p do k = 0, n do j = 0, m - !$acc loop seq + $:LOOP() do i = momxb, E_idx rhs_vf(i)%sf(j, k, l) = & rhs_vf(i)%sf(j, k, l) + 1._wp/dy(k)* & @@ -1564,7 +1564,7 @@ contains do l = 0, p do k = 1, n do j = 0, m - !$acc loop seq + $:LOOP() do i = momxb, E_idx rhs_vf(i)%sf(j, k, l) = & rhs_vf(i)%sf(j, k, l) - 5e-1_wp/y_cc(k)* & @@ -1579,7 +1579,7 @@ contains $:PARALLEL_LOOP(collapse=2) do l = 0, p do j = 0, m - !$acc loop seq + $:LOOP() do i = momxb, E_idx rhs_vf(i)%sf(j, 0, l) = & rhs_vf(i)%sf(j, 0, l) - 1._wp/y_cc(0)* & @@ -1594,7 +1594,7 @@ contains do l = 0, p do k = 0, n do j = 0, m - !$acc loop seq + $:LOOP() do i = momxb, E_idx rhs_vf(i)%sf(j, k, l) = & rhs_vf(i)%sf(j, k, l) - 5e-1_wp/y_cc(k)* & @@ -1629,7 +1629,7 @@ contains do l = 0, p do k = 0, n do j = 0, m - !$acc loop seq + $:LOOP() do i = momxb, E_idx rhs_vf(i)%sf(j, k, l) = & rhs_vf(i)%sf(j, k, l) + 1._wp/dz(l)* & @@ -1706,7 +1706,7 @@ contains if (mpp_lim) then sum_alpha = 0._wp - !$acc loop seq + $:LOOP() do i = 1, num_fluids if ((q_cons_vf(i + contxb - 1)%sf(j, k, l) < 0._wp) .or. & (q_cons_vf(i + advxb - 1)%sf(j, k, l) < 0._wp)) then @@ -1720,7 +1720,7 @@ contains sum_alpha = sum_alpha + q_cons_vf(i + advxb - 1)%sf(j, k, l) end do - !$acc loop seq + $:LOOP() do i = 1, num_fluids q_cons_vf(i + advxb - 1)%sf(j, k, l) = q_cons_vf(i + advxb - 1)%sf(j, k, l)/sum_alpha end do @@ -1731,7 +1731,7 @@ contains ! Is the pressure relaxation procedure necessary? relax = 1 - !$acc loop seq + $:LOOP() do i = 1, num_fluids if (q_cons_vf(i + advxb - 1)%sf(j, k, l) > (1._wp - sgm_eps)) relax = 0 end do @@ -1740,7 +1740,7 @@ contains ! Initial state pres_relax = 0._wp - !$acc loop seq + $:LOOP() do i = 1, num_fluids if (q_cons_vf(i + advxb - 1)%sf(j, k, l) > sgm_eps) then pres_K_init(i) = & @@ -1760,12 +1760,12 @@ contains f_pres = 1e-9_wp df_pres = 1e9_wp - !$acc loop seq + $:LOOP() do i = 1, num_fluids rho_K_s(i) = 0._wp end do - !$acc loop seq + $:LOOP() do iter = 0, 49 if (abs(f_pres) > 1e-10_wp) then @@ -1781,7 +1781,7 @@ contains f_pres = -1._wp df_pres = 0._wp - !$acc loop seq + $:LOOP() do i = 1, num_fluids if (q_cons_vf(i + advxb - 1)%sf(j, k, l) > sgm_eps) then rho_K_s(i) = q_cons_vf(i + contxb - 1)%sf(j, k, l)/ & @@ -1801,7 +1801,7 @@ contains end do ! Cell update of the volume fraction - !$acc loop seq + $:LOOP() do i = 1, num_fluids if (q_cons_vf(i + advxb - 1)%sf(j, k, l) > sgm_eps) & q_cons_vf(i + advxb - 1)%sf(j, k, l) = q_cons_vf(i + contxb - 1)%sf(j, k, l) & @@ -1817,7 +1817,7 @@ contains ! However, the internal-energy equations should be reset with the corresponding mixture ! pressure from the correction. This step is carried out below. - !$acc loop seq + $:LOOP() do i = 1, num_fluids alpha_rho(i) = q_cons_vf(i)%sf(j, k, l) alpha(i) = q_cons_vf(E_idx + i)%sf(j, k, l) @@ -1829,14 +1829,14 @@ contains pi_inf = 0._wp if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then - !$acc loop seq + $:LOOP() do i = 1, num_fluids rho = rho + alpha_rho(i) gamma = gamma + alpha(i)*gammas(i) pi_inf = pi_inf + alpha(i)*pi_infs(i) end do else if ((model_eqns == 2) .and. (num_fluids > 2)) then - !$acc loop seq + $:LOOP() do i = 1, num_fluids - 1 rho = rho + alpha_rho(i) gamma = gamma + alpha(i)*gammas(i) @@ -1855,7 +1855,7 @@ contains sum_alpha = 0._wp if (mpp_lim) then - !$acc loop seq + $:LOOP() do i = 1, num_fluids alpha_rho(i) = max(0._wp, alpha_rho(i)) alpha(i) = min(max(0._wp, alpha(i)), 1._wp) @@ -1866,7 +1866,7 @@ contains end if - !$acc loop seq + $:LOOP() do i = 1, num_fluids rho = rho + alpha_rho(i) gamma = gamma + alpha(i)*gammas(i) @@ -1874,12 +1874,12 @@ contains end do if (viscous) then - !$acc loop seq + $:LOOP() do i = 1, 2 Re(i) = dflt_real if (Re_size(i) > 0) Re(i) = 0._wp - !$acc loop seq + $:LOOP() do q = 1, Re_size(i) Re(i) = alpha(Re_idx(i, q))/Res(i, q) & + Re(i) @@ -1893,7 +1893,7 @@ contains dyn_pres = 0._wp - !$acc loop seq + $:LOOP() do i = momxb, momxe dyn_pres = dyn_pres + 5e-1_wp*q_cons_vf(i)%sf(j, k, l)* & q_cons_vf(i)%sf(j, k, l)/max(rho, sgm_eps) @@ -1901,7 +1901,7 @@ contains pres_relax = (q_cons_vf(E_idx)%sf(j, k, l) - dyn_pres - pi_inf)/gamma - !$acc loop seq + $:LOOP() do i = 1, num_fluids q_cons_vf(i + intxb - 1)%sf(j, k, l) = & q_cons_vf(i + advxb - 1)%sf(j, k, l)* & diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp index c496ce7632..a6d1c80a77 100644 --- a/src/simulation/m_riemann_solvers.fpp +++ b/src/simulation/m_riemann_solvers.fpp @@ -374,13 +374,13 @@ contains do l = is3%beg, is3%end do k = is2%beg, is2%end do j = is1%beg, is1%end - !$acc loop seq + $:LOOP() do i = 1, contxe alpha_rho_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, i) alpha_rho_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) end do - !$acc loop seq + $:LOOP() do i = 1, num_vels vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) @@ -388,13 +388,13 @@ contains vel_L_rms = 0._wp; vel_R_rms = 0._wp - !$acc loop seq + $:LOOP() do i = 1, num_vels vel_L_rms = vel_L_rms + vel_L(i)**2._wp vel_R_rms = vel_R_rms + vel_R(i)**2._wp end do - !$acc loop seq + $:LOOP() do i = 1, num_fluids alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) @@ -438,7 +438,7 @@ contains pres_mag%R = 0._wp if (mpp_lim) then - !$acc loop seq + $:LOOP() do i = 1, num_fluids alpha_rho_L(i) = max(0._wp, alpha_rho_L(i)) alpha_L(i) = min(max(0._wp, alpha_L(i)), 1._wp) @@ -447,7 +447,7 @@ contains alpha_L = alpha_L/max(alpha_L_sum, sgm_eps) - !$acc loop seq + $:LOOP() do i = 1, num_fluids alpha_rho_R(i) = max(0._wp, alpha_rho_R(i)) alpha_R(i) = min(max(0._wp, alpha_R(i)), 1._wp) @@ -457,7 +457,7 @@ contains alpha_R = alpha_R/max(alpha_R_sum, sgm_eps) end if - !$acc loop seq + $:LOOP() do i = 1, num_fluids rho_L = rho_L + alpha_rho_L(i) gamma_L = gamma_L + alpha_L(i)*gammas(i) @@ -471,13 +471,13 @@ contains end do if (viscous) then - !$acc loop seq + $:LOOP() do i = 1, 2 Re_L(i) = dflt_real if (Re_size(i) > 0) Re_L(i) = 0._wp - !$acc loop seq + $:LOOP() do q = 1, Re_size(i) Re_L(i) = alpha_L(Re_idx(i, q))/Res(i, q) & + Re_L(i) @@ -487,13 +487,13 @@ contains end do - !$acc loop seq + $:LOOP() do i = 1, 2 Re_R(i) = dflt_real if (Re_size(i) > 0) Re_R(i) = 0._wp - !$acc loop seq + $:LOOP() do q = 1, Re_size(i) Re_R(i) = alpha_R(Re_idx(i, q))/Res(i, q) & + Re_R(i) @@ -504,7 +504,7 @@ contains end if if (chemistry) then - !$acc loop seq + $:LOOP() do i = chemxb, chemxe Ys_L(i - chemxb + 1) = qL_prim_rs${XYZ}$_vf(j, k, l, i) Ys_R(i - chemxb + 1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) @@ -601,7 +601,7 @@ contains if (hypoelasticity) then G_L = 0._wp; G_R = 0._wp - !$acc loop seq + $:LOOP() do i = 1, num_fluids G_L = G_L + alpha_L(i)*Gs(i) G_R = G_R + alpha_R(i)*Gs(i) @@ -634,7 +634,7 @@ contains ! G_L = 0._wp ! G_R = 0._wp ! - ! !$acc loop seq + ! $:LOOP() ! do i = 1, num_fluids ! G_L = G_L + alpha_L(i)*Gs(i) ! G_R = G_R + alpha_R(i)*Gs(i) @@ -643,17 +643,17 @@ contains ! if ((G_L > 1e-3_wp) .and. (G_R > 1e-3_wp)) then ! E_L = E_L + G_L*qL_prim_rs${XYZ}$_vf(j, k, l, xiend + 1) ! E_R = E_R + G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, xiend + 1) - ! !$acc loop seq + ! $:LOOP() ! do i = 1, b_size-1 ! tau_e_L(i) = G_L*qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) ! tau_e_R(i) = G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) ! end do - ! !$acc loop seq + ! $:LOOP() ! do i = 1, b_size-1 ! tau_e_L(i) = 0_wp ! tau_e_R(i) = 0_wp ! end do - ! !$acc loop seq + ! $:LOOP() ! do i = 1, num_dims ! xi_field_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) ! xi_field_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, xibeg - 1 + i) @@ -681,7 +681,7 @@ contains end if if (viscous) then - !$acc loop seq + $:LOOP() do i = 1, 2 Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) end do @@ -760,7 +760,7 @@ contains ! Mass if (.not. relativity) then - !$acc loop seq + $:LOOP() do i = 1, contxe flux_rs${XYZ}$_vf(j, k, l, i) = & (s_M*alpha_rho_R(i)*vel_R(norm_dir) & @@ -770,7 +770,7 @@ contains /(s_M - s_P) end do elseif (relativity) then - !$acc loop seq + $:LOOP() do i = 1, contxe flux_rs${XYZ}$_vf(j, k, l, i) = & (s_M*Ga%R*alpha_rho_R(i)*vel_R(norm_dir) & @@ -851,7 +851,7 @@ contains + s_M*s_P*(cm%L(3) - cm%R(3))) & /(s_M - s_P) elseif (bubbles_euler) then - !$acc loop seq + $:LOOP() do i = 1, num_vels flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & (s_M*(rho_R*vel_R(dir_idx(1)) & @@ -866,7 +866,7 @@ contains + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R(dir_idx(i)) - vel_L(dir_idx(i))) end do else if (hypoelasticity) then - !$acc loop seq + $:LOOP() do i = 1, num_vels flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & (s_M*(rho_R*vel_R(dir_idx(1)) & @@ -882,7 +882,7 @@ contains /(s_M - s_P) end do else - !$acc loop seq + $:LOOP() do i = 1, num_vels flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & (s_M*(rho_R*vel_R(dir_idx(1)) & @@ -978,7 +978,7 @@ contains end if ! Advection - !$acc loop seq + $:LOOP() do i = advxb, advxe flux_rs${XYZ}$_vf(j, k, l, i) = & (qL_prim_rs${XYZ}$_vf(j, k, l, i) & @@ -1003,7 +1003,7 @@ contains !end if ! Div(U)? - !$acc loop seq + $:LOOP() do i = 1, num_vels vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(i)) = & (xi_M*(rho_L*vel_L(dir_idx(i))* & @@ -1024,7 +1024,7 @@ contains end if if (chemistry) then - !$acc loop seq + $:LOOP() do i = chemxb, chemxe Y_L = qL_prim_rs${XYZ}$_vf(j, k, l, i) Y_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) @@ -1074,7 +1074,7 @@ contains #:if (NORM_DIR == 2) if (cyl_coord) then !Substituting the advective flux into the inviscid geometrical source flux - !$acc loop seq + $:LOOP() do i = 1, E_idx flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) end do @@ -1083,7 +1083,7 @@ contains flux_rs${XYZ}$_vf(j, k, l, contxe + 2) & - (s_M*pres_R - s_P*pres_L)/(s_M - s_P) ! Geometrical source of the void fraction(s) is zero - !$acc loop seq + $:LOOP() do i = advxb, advxe flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) end do @@ -1096,7 +1096,7 @@ contains (s_M*tau_e_R(4) - s_P*tau_e_L(4)) & /(s_M - s_P) - !$acc loop seq + $:LOOP() do i = strxb, strxe flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) end do @@ -1312,7 +1312,7 @@ contains vel_L_rms = 0._wp; vel_R_rms = 0._wp - !$acc loop seq + $:LOOP() do i = 1, num_dims vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) @@ -1337,32 +1337,32 @@ contains alpha_R_sum = 0._wp if (mpp_lim) then - !$acc loop seq + $:LOOP() do i = 1, num_fluids qL_prim_rs${XYZ}$_vf(j, k, l, i) = max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, i)) qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = min(max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)), 1._wp) alpha_L_sum = alpha_L_sum + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) end do - !$acc loop seq + $:LOOP() do i = 1, num_fluids qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)/max(alpha_L_sum, sgm_eps) end do - !$acc loop seq + $:LOOP() do i = 1, num_fluids qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) = max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)) qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = min(max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)), 1._wp) alpha_R_sum = alpha_R_sum + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) end do - !$acc loop seq + $:LOOP() do i = 1, num_fluids qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)/max(alpha_R_sum, sgm_eps) end do end if - !$acc loop seq + $:LOOP() do i = 1, num_fluids rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) @@ -1379,13 +1379,13 @@ contains end do if (viscous) then - !$acc loop seq + $:LOOP() do i = 1, 2 Re_L(i) = dflt_real if (Re_size(i) > 0) Re_L(i) = 0._wp - !$acc loop seq + $:LOOP() do q = 1, Re_size(i) Re_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q))/Res(i, q) & + Re_L(i) @@ -1395,13 +1395,13 @@ contains end do - !$acc loop seq + $:LOOP() do i = 1, 2 Re_R(i) = dflt_real if (Re_size(i) > 0) Re_R(i) = 0._wp - !$acc loop seq + $:LOOP() do q = 1, Re_size(i) Re_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q))/Res(i, q) & + Re_R(i) @@ -1417,18 +1417,18 @@ contains ! ENERGY ADJUSTMENTS FOR HYPOELASTIC ENERGY if (hypoelasticity) then - !$acc loop seq + $:LOOP() do i = 1, strxe - strxb + 1 tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) end do G_L = 0_wp; G_R = 0_wp - !$acc loop seq + $:LOOP() do i = 1, num_fluids G_L = G_L + alpha_L(i)*Gs(i) G_R = G_R + alpha_R(i)*Gs(i) end do - !$acc loop seq + $:LOOP() do i = 1, strxe - strxb + 1 ! Elastic contribution to energy if G large enough if ((G_L > verysmall) .and. (G_R > verysmall)) then @@ -1445,13 +1445,13 @@ contains ! ENERGY ADJUSTMENTS FOR HYPERELASTIC ENERGY if (hyperelasticity) then - !$acc loop seq + $:LOOP() do i = 1, num_dims xi_field_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) xi_field_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, xibeg - 1 + i) end do G_L = 0_wp; G_R = 0_wp; - !$acc loop seq + $:LOOP() do i = 1, num_fluids ! Mixture left and right shear modulus G_L = G_L + alpha_L(i)*Gs(i) @@ -1462,7 +1462,7 @@ contains E_L = E_L + G_L*qL_prim_rs${XYZ}$_vf(j, k, l, xiend + 1) E_R = E_R + G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, xiend + 1) end if - !$acc loop seq + $:LOOP() do i = 1, b_size - 1 tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) @@ -1486,7 +1486,7 @@ contains vel_avg_rms, 0._wp, c_avg) if (viscous) then - !$acc loop seq + $:LOOP() do i = 1, 2 Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) end do @@ -1581,7 +1581,7 @@ contains ! COMPUTING FLUXES ! MASS FLUX. - !$acc loop seq + $:LOOP() do i = 1, contxe flux_rs${XYZ}$_vf(j, k, l, i) = & xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i)*(vel_L(idx1) + s_M*(xi_L - 1._wp)) + & @@ -1590,7 +1590,7 @@ contains ! MOMENTUM FLUX. ! f = \rho u u - \sigma, q = \rho u, q_star = \xi * \rho*(s_star, v, w) - !$acc loop seq + $:LOOP() do i = 1, num_dims idxi = dir_idx(i) flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) = rho_Star*vel_K_Star* & @@ -1606,7 +1606,7 @@ contains ! ELASTICITY. Elastic shear stress additions for the momentum and energy flux if (elasticity) then flux_ene_e = 0_wp; - !$acc loop seq + $:LOOP() do i = 1, num_dims idxi = dir_idx(i) ! MOMENTUM ELASTIC FLUX. @@ -1624,7 +1624,7 @@ contains end if ! VOLUME FRACTION FLUX. - !$acc loop seq + $:LOOP() do i = advxb, advxe flux_rs${XYZ}$_vf(j, k, l, i) = & xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i)*s_S + & @@ -1632,7 +1632,7 @@ contains end do ! SOURCE TERM FOR VOLUME FRACTION ADVECTION FLUX. - !$acc loop seq + $:LOOP() do i = 1, num_dims idxi = dir_idx(i) vel_src_rs${XYZ}$_vf(j, k, l, idxi) = & @@ -1642,7 +1642,7 @@ contains ! INTERNAL ENERGIES ADVECTION FLUX. ! K-th pressure and velocity in preparation for the internal energy flux - !$acc loop seq + $:LOOP() do i = 1, num_fluids p_K_Star = xi_M*(xi_MP*((pres_L + pi_infs(i)/(1_wp + gammas(i)))* & xi_L**(1_wp/gammas(i) + 1_wp) - pi_infs(i)/(1_wp + gammas(i)) - pres_L) + pres_L) + & @@ -1661,7 +1661,7 @@ contains ! HYPOELASTIC STRESS EVOLUTION FLUX. if (hypoelasticity) then - !$acc loop seq + $:LOOP() do i = 1, strxe - strxb + 1 flux_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) = & xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*tau_e_L(i) - rho_L*vel_L(idx1)*tau_e_L(i)) + & @@ -1671,7 +1671,7 @@ contains ! REFERENCE MAP FLUX. if (hyperelasticity) then - !$acc loop seq + $:LOOP() do i = 1, num_dims flux_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) = & xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*xi_field_L(i) & @@ -1692,11 +1692,11 @@ contains #:if (NORM_DIR == 2) if (cyl_coord) then !Substituting the advective flux into the inviscid geometrical source flux - !$acc loop seq + $:LOOP() do i = 1, E_idx flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) end do - !$acc loop seq + $:LOOP() do i = intxb, intxe flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) end do @@ -1704,7 +1704,7 @@ contains flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb - 1 + dir_idx(1)) = & flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb - 1 + dir_idx(1)) - p_Star ! Geometrical source of the void fraction(s) is zero - !$acc loop seq + $:LOOP() do i = advxb, advxe flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0_wp end do @@ -1712,7 +1712,7 @@ contains #:endif #:if (NORM_DIR == 3) if (grid_geometry == 3) then - !$acc loop seq + $:LOOP() do i = 1, sys_size flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0_wp end do @@ -1737,26 +1737,26 @@ contains do k = is2%beg, is2%end do j = is1%beg, is1%end - !$acc loop seq + $:LOOP() do i = 1, contxe alpha_rho_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, i) alpha_rho_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) end do - !$acc loop seq + $:LOOP() do i = 1, num_dims vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) end do vel_L_rms = 0._wp; vel_R_rms = 0._wp - !$acc loop seq + $:LOOP() do i = 1, num_dims vel_L_rms = vel_L_rms + vel_L(i)**2._wp vel_R_rms = vel_R_rms + vel_R(i)**2._wp end do - !$acc loop seq + $:LOOP() do i = 1, num_fluids alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) @@ -1769,7 +1769,7 @@ contains gamma_L = 0._wp pi_inf_L = 0._wp qv_L = 0._wp - !$acc loop seq + $:LOOP() do i = 1, num_fluids rho_L = rho_L + alpha_rho_L(i) gamma_L = gamma_L + alpha_L(i)*gammas(i) @@ -1781,7 +1781,7 @@ contains gamma_R = 0._wp pi_inf_R = 0._wp qv_R = 0._wp - !$acc loop seq + $:LOOP() do i = 1, num_fluids rho_R = rho_R + alpha_rho_R(i) gamma_R = gamma_R + alpha_R(i)*gammas(i) @@ -1856,7 +1856,7 @@ contains xi_M = (5e-1_wp + sign(5e-1_wp, s_S)) xi_P = (5e-1_wp - sign(5e-1_wp, s_S)) - !$acc loop seq + $:LOOP() do i = 1, contxe flux_rs${XYZ}$_vf(j, k, l, i) = & xi_M*alpha_rho_L(i) & @@ -1867,7 +1867,7 @@ contains ! Momentum flux. ! f = \rho u u + p I, q = \rho u, q_star = \xi * \rho*(s_star, v, w) - !$acc loop seq + $:LOOP() do i = 1, num_dims flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & xi_M*(rho_L*(vel_L(dir_idx(1))* & @@ -1886,7 +1886,7 @@ contains if (bubbles_euler) then ! Put p_tilde in - !$acc loop seq + $:LOOP() do i = 1, num_dims flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) + & @@ -1897,7 +1897,7 @@ contains flux_rs${XYZ}$_vf(j, k, l, E_idx) = 0._wp - !$acc loop seq + $:LOOP() do i = alf_idx, alf_idx !only advect the void fraction flux_rs${XYZ}$_vf(j, k, l, i) = & xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & @@ -1907,7 +1907,7 @@ contains end do ! Source for volume fraction advection equation - !$acc loop seq + $:LOOP() do i = 1, num_dims vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(i)) = 0._wp @@ -1918,7 +1918,7 @@ contains ! Add advection flux for bubble variables if (bubbles_euler) then - !$acc loop seq + $:LOOP() do i = bubxb, bubxe flux_rs${XYZ}$_vf(j, k, l, i) = & xi_M*nbub_L*qL_prim_rs${XYZ}$_vf(j, k, l, i) & @@ -1933,7 +1933,7 @@ contains #:if (NORM_DIR == 2) if (cyl_coord) then ! Substituting the advective flux into the inviscid geometrical source flux - !$acc loop seq + $:LOOP() do i = 1, E_idx flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) end do @@ -1950,7 +1950,7 @@ contains (1._wp - dir_flg(dir_idx(1)))* & vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) ! Geometrical source of the void fraction(s) is zero - !$acc loop seq + $:LOOP() do i = advxb, advxe flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp end do @@ -1958,7 +1958,7 @@ contains #:endif #:if (NORM_DIR == 3) if (grid_geometry == 3) then - !$acc loop seq + $:LOOP() do i = 1, sys_size flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp end do @@ -1992,7 +1992,7 @@ contains do k = is2%beg, is2%end do j = is1%beg, is1%end - !$acc loop seq + $:LOOP() do i = 1, num_fluids alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) @@ -2000,7 +2000,7 @@ contains vel_L_rms = 0._wp; vel_R_rms = 0._wp - !$acc loop seq + $:LOOP() do i = 1, num_dims vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) @@ -2018,7 +2018,7 @@ contains ! Retain this in the refactor if (mpp_lim .and. (num_fluids > 2)) then - !$acc loop seq + $:LOOP() do i = 1, num_fluids rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) @@ -2026,7 +2026,7 @@ contains qv_L = qv_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)*qvs(i) end do else if (num_fluids > 2) then - !$acc loop seq + $:LOOP() do i = 1, num_fluids - 1 rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) @@ -2046,7 +2046,7 @@ contains qv_R = 0._wp if (mpp_lim .and. (num_fluids > 2)) then - !$acc loop seq + $:LOOP() do i = 1, num_fluids rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*gammas(i) @@ -2054,7 +2054,7 @@ contains qv_R = qv_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*qvs(i) end do else if (num_fluids > 2) then - !$acc loop seq + $:LOOP() do i = 1, num_fluids - 1 rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*gammas(i) @@ -2070,13 +2070,13 @@ contains if (viscous) then if (num_fluids == 1) then ! Need to consider case with num_fluids >= 2 - !$acc loop seq + $:LOOP() do i = 1, 2 Re_L(i) = dflt_real if (Re_size(i) > 0) Re_L(i) = 0._wp - !$acc loop seq + $:LOOP() do q = 1, Re_size(i) Re_L(i) = (1._wp - qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q)))/Res(i, q) & + Re_L(i) @@ -2086,13 +2086,13 @@ contains end do - !$acc loop seq + $:LOOP() do i = 1, 2 Re_R(i) = dflt_real if (Re_size(i) > 0) Re_R(i) = 0._wp - !$acc loop seq + $:LOOP() do q = 1, Re_size(i) Re_R(i) = (1._wp - qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q)))/Res(i, q) & + Re_R(i) @@ -2111,7 +2111,7 @@ contains H_R = (E_R + pres_R)/rho_R if (avg_state == 2) then - !$acc loop seq + $:LOOP() do i = 1, nb R0_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, rs(i)) R0_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, rs(i)) @@ -2131,7 +2131,7 @@ contains else nbub_L_denom = 0._wp nbub_R_denom = 0._wp - !$acc loop seq + $:LOOP() do i = 1, nb nbub_L_denom = nbub_L_denom + (R0_L(i)**3._wp)*weight(i) nbub_R_denom = nbub_R_denom + (R0_R(i)**3._wp)*weight(i) @@ -2145,7 +2145,7 @@ contains nbub_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, bubxb) end if - !$acc loop seq + $:LOOP() do i = 1, nb if (.not. qbmm) then if (polytropic) then @@ -2178,7 +2178,7 @@ contains R3V2Lbar = 0._wp R3V2Rbar = 0._wp - !$acc loop seq + $:LOOP() do i = 1, nb PbwR3Lbar = PbwR3Lbar + pbw_L(i)*(R0_L(i)**3._wp)*weight(i) PbwR3Rbar = PbwR3Rbar + pbw_R(i)*(R0_R(i)**3._wp)*weight(i) @@ -2213,7 +2213,7 @@ contains gamma_avg = 5e-1_wp*(gamma_L + gamma_R) vel_avg_rms = 0._wp - !$acc loop seq + $:LOOP() do i = 1, num_dims vel_avg_rms = vel_avg_rms + (5e-1_wp*(vel_L(i) + vel_R(i)))**2._wp end do @@ -2232,7 +2232,7 @@ contains vel_avg_rms, 0._wp, c_avg) if (viscous) then - !$acc loop seq + $:LOOP() do i = 1, 2 Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) end do @@ -2296,7 +2296,7 @@ contains pcorr = 0._wp end if - !$acc loop seq + $:LOOP() do i = 1, contxe flux_rs${XYZ}$_vf(j, k, l, i) = & xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & @@ -2315,7 +2315,7 @@ contains ! Include p_tilde - !$acc loop seq + $:LOOP() do i = 1, num_dims flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & xi_M*(rho_L*(vel_L(dir_idx(1))* & @@ -2347,7 +2347,7 @@ contains + (s_M/s_L)*(s_P/s_R)*pcorr*s_S ! Volume fraction flux - !$acc loop seq + $:LOOP() do i = advxb, advxe flux_rs${XYZ}$_vf(j, k, l, i) = & xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & @@ -2357,7 +2357,7 @@ contains end do ! Source for volume fraction advection equation - !$acc loop seq + $:LOOP() do i = 1, num_dims vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(i)) = & xi_M*(vel_L(dir_idx(i)) + & @@ -2373,7 +2373,7 @@ contains flux_src_rs${XYZ}$_vf(j, k, l, advxb) = vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(1)) ! Add advection flux for bubble variables - !$acc loop seq + $:LOOP() do i = bubxb, bubxe flux_rs${XYZ}$_vf(j, k, l, i) = & xi_M*nbub_L*qL_prim_rs${XYZ}$_vf(j, k, l, i) & @@ -2402,7 +2402,7 @@ contains #:if (NORM_DIR == 2) if (cyl_coord) then ! Substituting the advective flux into the inviscid geometrical source flux - !$acc loop seq + $:LOOP() do i = 1, E_idx flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) end do @@ -2419,7 +2419,7 @@ contains (1._wp - dir_flg(dir_idx(1)))* & vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) ! Geometrical source of the void fraction(s) is zero - !$acc loop seq + $:LOOP() do i = advxb, advxe flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp end do @@ -2427,7 +2427,7 @@ contains #:endif #:if (NORM_DIR == 3) if (grid_geometry == 3) then - !$acc loop seq + $:LOOP() do i = 1, sys_size flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp end do @@ -2468,14 +2468,14 @@ contains !idx1 = 1; if (dir_idx(1) == 2) idx1 = 2; if (dir_idx(1) == 3) idx1 = 3 - !$acc loop seq + $:LOOP() do i = 1, num_fluids alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) end do vel_L_rms = 0._wp; vel_R_rms = 0._wp - !$acc loop seq + $:LOOP() do i = 1, num_dims vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) @@ -2502,32 +2502,32 @@ contains ! Change this by splitting it into the cases ! present in the bubbles_euler if (mpp_lim) then - !$acc loop seq + $:LOOP() do i = 1, num_fluids qL_prim_rs${XYZ}$_vf(j, k, l, i) = max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, i)) qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = min(max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)), 1._wp) alpha_L_sum = alpha_L_sum + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) end do - !$acc loop seq + $:LOOP() do i = 1, num_fluids qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)/max(alpha_L_sum, sgm_eps) end do - !$acc loop seq + $:LOOP() do i = 1, num_fluids qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) = max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)) qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = min(max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)), 1._wp) alpha_R_sum = alpha_R_sum + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) end do - !$acc loop seq + $:LOOP() do i = 1, num_fluids qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)/max(alpha_R_sum, sgm_eps) end do end if - !$acc loop seq + $:LOOP() do i = 1, num_fluids rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) @@ -2541,13 +2541,13 @@ contains end do if (viscous) then - !$acc loop seq + $:LOOP() do i = 1, 2 Re_L(i) = dflt_real if (Re_size(i) > 0) Re_L(i) = 0._wp - !$acc loop seq + $:LOOP() do q = 1, Re_size(i) Re_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q))/Res(i, q) & + Re_L(i) @@ -2557,13 +2557,13 @@ contains end do - !$acc loop seq + $:LOOP() do i = 1, 2 Re_R(i) = dflt_real if (Re_size(i) > 0) Re_R(i) = 0._wp - !$acc loop seq + $:LOOP() do q = 1, Re_size(i) Re_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q))/Res(i, q) & + Re_R(i) @@ -2575,7 +2575,7 @@ contains if (chemistry) then c_sum_Yi_Phi = 0.0_wp - !$acc loop seq + $:LOOP() do i = chemxb, chemxe Ys_L(i - chemxb + 1) = qL_prim_rs${XYZ}$_vf(j, k, l, i) Ys_R(i - chemxb + 1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) @@ -2634,19 +2634,19 @@ contains ! ENERGY ADJUSTMENTS FOR HYPOELASTIC ENERGY if (hypoelasticity) then - !$acc loop seq + $:LOOP() do i = 1, strxe - strxb + 1 tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) end do G_L = 0_wp G_R = 0_wp - !$acc loop seq + $:LOOP() do i = 1, num_fluids G_L = G_L + alpha_L(i)*Gs(i) G_R = G_R + alpha_R(i)*Gs(i) end do - !$acc loop seq + $:LOOP() do i = 1, strxe - strxb + 1 ! Elastic contribution to energy if G large enough if ((G_L > verysmall) .and. (G_R > verysmall)) then @@ -2663,14 +2663,14 @@ contains ! ENERGY ADJUSTMENTS FOR HYPERELASTIC ENERGY if (hyperelasticity) then - !$acc loop seq + $:LOOP() do i = 1, num_dims xi_field_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) xi_field_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, xibeg - 1 + i) end do G_L = 0_wp G_R = 0_wp - !$acc loop seq + $:LOOP() do i = 1, num_fluids ! Mixture left and right shear modulus G_L = G_L + alpha_L(i)*Gs(i) @@ -2681,7 +2681,7 @@ contains E_L = E_L + G_L*qL_prim_rs${XYZ}$_vf(j, k, l, xiend + 1) E_R = E_R + G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, xiend + 1) end if - !$acc loop seq + $:LOOP() do i = 1, b_size - 1 tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) @@ -2705,7 +2705,7 @@ contains vel_avg_rms, c_sum_Yi_Phi, c_avg) if (viscous) then - !$acc loop seq + $:LOOP() do i = 1, 2 Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) end do @@ -2781,7 +2781,7 @@ contains ! COMPUTING THE HLLC FLUXES ! MASS FLUX. - !$acc loop seq + $:LOOP() do i = 1, contxe flux_rs${XYZ}$_vf(j, k, l, i) = & xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & @@ -2792,7 +2792,7 @@ contains ! MOMENTUM FLUX. ! f = \rho u u - \sigma, q = \rho u, q_star = \xi * \rho*(s_star, v, w) - !$acc loop seq + $:LOOP() do i = 1, num_dims idxi = dir_idx(i) flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) = & @@ -2827,7 +2827,7 @@ contains ! ELASTICITY. Elastic shear stress additions for the momentum and energy flux if (elasticity) then flux_ene_e = 0_wp - !$acc loop seq + $:LOOP() do i = 1, num_dims idxi = dir_idx(i) ! MOMENTUM ELASTIC FLUX. @@ -2846,7 +2846,7 @@ contains ! HYPOELASTIC STRESS EVOLUTION FLUX. if (hypoelasticity) then - !$acc loop seq + $:LOOP() do i = 1, strxe - strxb + 1 flux_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) = & xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*tau_e_L(i) - rho_L*vel_L(idx1)*tau_e_L(i)) + & @@ -2855,7 +2855,7 @@ contains end if ! VOLUME FRACTION FLUX. - !$acc loop seq + $:LOOP() do i = advxb, advxe flux_rs${XYZ}$_vf(j, k, l, i) = & xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & @@ -2865,7 +2865,7 @@ contains end do ! VOLUME FRACTION SOURCE FLUX. - !$acc loop seq + $:LOOP() do i = 1, num_dims idxi = dir_idx(i) vel_src_rs${XYZ}$_vf(j, k, l, idxi) = & @@ -2888,7 +2888,7 @@ contains ! REFERENCE MAP FLUX. if (hyperelasticity) then - !$acc loop seq + $:LOOP() do i = 1, num_dims flux_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) = & xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*xi_field_L(i) & @@ -2901,7 +2901,7 @@ contains flux_src_rs${XYZ}$_vf(j, k, l, advxb) = vel_src_rs${XYZ}$_vf(j, k, l, idx1) if (chemistry) then - !$acc loop seq + $:LOOP() do i = chemxb, chemxe Y_L = qL_prim_rs${XYZ}$_vf(j, k, l, i) Y_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) @@ -2916,7 +2916,7 @@ contains #:if (NORM_DIR == 2) if (cyl_coord) then !Substituting the advective flux into the inviscid geometrical source flux - !$acc loop seq + $:LOOP() do i = 1, E_idx flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) end do @@ -2933,7 +2933,7 @@ contains (1._wp - dir_flg(idx1))* & vel_R(idx1)) - vel_R(idx1)))) ! Geometrical source of the void fraction(s) is zero - !$acc loop seq + $:LOOP() do i = advxb, advxe flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp end do @@ -2941,7 +2941,7 @@ contains #:endif #:if (NORM_DIR == 3) if (grid_geometry == 3) then - !$acc loop seq + $:LOOP() do i = 1, sys_size flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp end do @@ -3134,7 +3134,7 @@ contains ! Sum properties of all fluid components rho%L = 0._wp; gamma%L = 0._wp; pi_inf%L = 0._wp; qv%L = 0._wp rho%R = 0._wp; gamma%R = 0._wp; pi_inf%R = 0._wp; qv%R = 0._wp - !$acc loop seq + $:LOOP() do i = 1, num_fluids rho%L = rho%L + alpha_rho_L(i) gamma%L = gamma%L + alpha_L(i)*gammas(i) @@ -3304,7 +3304,7 @@ contains ! Energy flux_rs${XYZ}$_vf(j, k, l, E_idx) = F_hlld(7) ! Partial fraction - !$acc loop seq + $:LOOP() do i = advxb, advxe flux_rs${XYZ}$_vf(j, k, l, i) = 0._wp ! TODO multi-component (zero for now) end do @@ -4017,7 +4017,7 @@ contains ! Average velocities and their derivatives at the interface ! For cylindrical: x-dir ~ axial (z_cyl), y-dir ~ radial (r_cyl), z-dir ~ azimuthal (theta_cyl) - !$acc loop seq + $:LOOP() do i_vel = 1, num_dims avg_v_int(i_vel) = 0.5_wp*(velL_vf(i_vel)%sf(j, k, l) + velR_vf(i_vel)%sf(idx_rp(1), idx_rp(2), idx_rp(3))) @@ -4095,7 +4095,7 @@ contains end if end select - !$acc loop seq + $:LOOP() do i_vel = 1, num_dims flux_src_vf(momxb + i_vel - 1)%sf(j, k, l) = flux_src_vf(momxb + i_vel - 1)%sf(j, k, l) - stress_vector_shear(i_vel) flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - vel_src_int(i_vel)*stress_vector_shear(i_vel) diff --git a/src/simulation/m_sim_helpers.fpp b/src/simulation/m_sim_helpers.fpp index 85a00f7b13..49f1a5e7d7 100644 --- a/src/simulation/m_sim_helpers.fpp +++ b/src/simulation/m_sim_helpers.fpp @@ -49,7 +49,7 @@ contains integer :: i - !$acc loop seq + $:LOOP() do i = 1, num_fluids alpha_rho(i) = q_prim_vf(i)%sf(j, k, l) alpha(i) = q_prim_vf(E_idx + i)%sf(j, k, l) @@ -64,13 +64,13 @@ contains call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv, alpha, alpha_rho, Re, j, k, l) end if - !$acc loop seq + $:LOOP() do i = 1, num_vels vel(i) = q_prim_vf(contxe + i)%sf(j, k, l) end do vel_sum = 0._wp - !$acc loop seq + $:LOOP() do i = 1, num_vels vel_sum = vel_sum + vel(i)**2._wp end do diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp index cd48b01e50..84eb8a4329 100644 --- a/src/simulation/m_viscous.fpp +++ b/src/simulation/m_viscous.fpp @@ -82,7 +82,7 @@ contains do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg, is1_viscous%end - !$acc loop seq + $:LOOP() do i = momxb, E_idx tau_Re_vf(i)%sf(j, k, l) = 0._wp end do @@ -96,7 +96,7 @@ contains do k = -1, 1 do j = is1_viscous%beg, is1_viscous%end - !$acc loop seq + $:LOOP() do i = 1, num_fluids alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l) if (bubbles_euler .and. num_fluids == 1) then @@ -112,14 +112,14 @@ contains pi_inf_visc = 0._wp if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then - !$acc loop seq + $:LOOP() do i = 1, num_fluids rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) end do else if ((model_eqns == 2) .and. (num_fluids > 2)) then - !$acc loop seq + $:LOOP() do i = 1, num_fluids - 1 rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) @@ -138,7 +138,7 @@ contains alpha_visc_sum = 0._wp if (mpp_lim) then - !$acc loop seq + $:LOOP() do i = 1, num_fluids alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i)) alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp) @@ -149,7 +149,7 @@ contains end if - !$acc loop seq + $:LOOP() do i = 1, num_fluids rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) @@ -157,12 +157,12 @@ contains end do if (viscous) then - !$acc loop seq + $:LOOP() do i = 1, 2 Re_visc(i) = dflt_real if (Re_size(i) > 0) Re_visc(i) = 0._wp - !$acc loop seq + $:LOOP() do q = 1, Re_size(i) Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) & + Re_visc(i) @@ -182,7 +182,7 @@ contains - 2._wp*grad_x_vf(1)%sf(j, k, l) & - 2._wp*q_prim_vf(momxb + 1)%sf(j, k, l)/y_cc(k))/ & (3._wp*Re_visc(1)) - !$acc loop seq + $:LOOP() do i = 1, 2 tau_Re_vf(contxe + i)%sf(j, k, l) = & tau_Re_vf(contxe + i)%sf(j, k, l) - & @@ -204,7 +204,7 @@ contains do k = -1, 1 do j = is1_viscous%beg, is1_viscous%end - !$acc loop seq + $:LOOP() do i = 1, num_fluids alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l) if (bubbles_euler .and. num_fluids == 1) then @@ -220,14 +220,14 @@ contains pi_inf_visc = 0._wp if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then - !$acc loop seq + $:LOOP() do i = 1, num_fluids rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) end do else if ((model_eqns == 2) .and. (num_fluids > 2)) then - !$acc loop seq + $:LOOP() do i = 1, num_fluids - 1 rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) @@ -246,7 +246,7 @@ contains alpha_visc_sum = 0._wp if (mpp_lim) then - !$acc loop seq + $:LOOP() do i = 1, num_fluids alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i)) alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp) @@ -257,7 +257,7 @@ contains end if - !$acc loop seq + $:LOOP() do i = 1, num_fluids rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) @@ -265,12 +265,12 @@ contains end do if (viscous) then - !$acc loop seq + $:LOOP() do i = 1, 2 Re_visc(i) = dflt_real if (Re_size(i) > 0) Re_visc(i) = 0._wp - !$acc loop seq + $:LOOP() do q = 1, Re_size(i) Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) & + Re_visc(i) @@ -309,7 +309,7 @@ contains do k = -1, 1 do j = is1_viscous%beg, is1_viscous%end - !$acc loop seq + $:LOOP() do i = 1, num_fluids alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l) if (bubbles_euler .and. num_fluids == 1) then @@ -325,14 +325,14 @@ contains pi_inf_visc = 0._wp if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then - !$acc loop seq + $:LOOP() do i = 1, num_fluids rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) end do else if ((model_eqns == 2) .and. (num_fluids > 2)) then - !$acc loop seq + $:LOOP() do i = 1, num_fluids - 1 rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) @@ -351,7 +351,7 @@ contains alpha_visc_sum = 0._wp if (mpp_lim) then - !$acc loop seq + $:LOOP() do i = 1, num_fluids alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i)) alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp) @@ -362,7 +362,7 @@ contains end if - !$acc loop seq + $:LOOP() do i = 1, num_fluids rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) @@ -370,12 +370,12 @@ contains end do if (viscous) then - !$acc loop seq + $:LOOP() do i = 1, 2 Re_visc(i) = dflt_real if (Re_size(i) > 0) Re_visc(i) = 0._wp - !$acc loop seq + $:LOOP() do q = 1, Re_size(i) Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) & + Re_visc(i) @@ -395,7 +395,7 @@ contains y_cc(k) + grad_y_vf(3)%sf(j, k, l))/ & Re_visc(1) - !$acc loop seq + $:LOOP() do i = 2, 3 tau_Re_vf(contxe + i)%sf(j, k, l) = & tau_Re_vf(contxe + i)%sf(j, k, l) - & @@ -418,7 +418,7 @@ contains do k = -1, 1 do j = is1_viscous%beg, is1_viscous%end - !$acc loop seq + $:LOOP() do i = 1, num_fluids alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l) if (bubbles_euler .and. num_fluids == 1) then @@ -434,14 +434,14 @@ contains pi_inf_visc = 0._wp if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then - !$acc loop seq + $:LOOP() do i = 1, num_fluids rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) end do else if ((model_eqns == 2) .and. (num_fluids > 2)) then - !$acc loop seq + $:LOOP() do i = 1, num_fluids - 1 rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) @@ -460,7 +460,7 @@ contains alpha_visc_sum = 0._wp if (mpp_lim) then - !$acc loop seq + $:LOOP() do i = 1, num_fluids alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i)) alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp) @@ -471,7 +471,7 @@ contains end if - !$acc loop seq + $:LOOP() do i = 1, num_fluids rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) @@ -479,12 +479,12 @@ contains end do if (viscous) then - !$acc loop seq + $:LOOP() do i = 1, 2 Re_visc(i) = dflt_real if (Re_size(i) > 0) Re_visc(i) = 0._wp - !$acc loop seq + $:LOOP() do q = 1, Re_size(i) Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) & + Re_visc(i) @@ -598,7 +598,7 @@ contains do l = is3_viscous%beg, is3_viscous%end do k = iy%beg, iy%end do j = is1_viscous%beg + 1, is1_viscous%end - !$acc loop seq + $:LOOP() do i = iv%beg, iv%end dqL_prim_dx_n(1)%vf(i)%sf(j, k, l) = & (q_prim_qp%vf(i)%sf(j, k, l) - & @@ -613,7 +613,7 @@ contains do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg, is1_viscous%end - 1 - !$acc loop seq + $:LOOP() do i = iv%beg, iv%end dqR_prim_dx_n(1)%vf(i)%sf(j, k, l) = & (q_prim_qp%vf(i)%sf(j + 1, k, l) - & @@ -630,7 +630,7 @@ contains do l = is3_viscous%beg, is3_viscous%end do j = is2_viscous%beg + 1, is2_viscous%end do k = is1_viscous%beg, is1_viscous%end - !$acc loop seq + $:LOOP() do i = iv%beg, iv%end dqL_prim_dy_n(2)%vf(i)%sf(k, j, l) = & (q_prim_qp%vf(i)%sf(k, j, l) - & @@ -645,7 +645,7 @@ contains do l = is3_viscous%beg, is3_viscous%end do j = is2_viscous%beg, is2_viscous%end - 1 do k = is1_viscous%beg, is1_viscous%end - !$acc loop seq + $:LOOP() do i = iv%beg, iv%end dqR_prim_dy_n(2)%vf(i)%sf(k, j, l) = & (q_prim_qp%vf(i)%sf(k, j + 1, l) - & @@ -660,7 +660,7 @@ contains do l = is3_viscous%beg, is3_viscous%end do j = is2_viscous%beg + 1, is2_viscous%end do k = is1_viscous%beg + 1, is1_viscous%end - 1 - !$acc loop seq + $:LOOP() do i = iv%beg, iv%end dqL_prim_dx_n(2)%vf(i)%sf(k, j, l) = & (dqL_prim_dx_n(1)%vf(i)%sf(k, j, l) + & @@ -679,7 +679,7 @@ contains do l = is3_viscous%beg, is3_viscous%end do j = is2_viscous%beg, is2_viscous%end - 1 do k = is1_viscous%beg + 1, is1_viscous%end - 1 - !$acc loop seq + $:LOOP() do i = iv%beg, iv%end dqR_prim_dx_n(2)%vf(i)%sf(k, j, l) = & (dqL_prim_dx_n(1)%vf(i)%sf(k, j + 1, l) + & @@ -699,7 +699,7 @@ contains do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg + 1, is2_viscous%end - 1 do j = is1_viscous%beg + 1, is1_viscous%end - !$acc loop seq + $:LOOP() do i = iv%beg, iv%end dqL_prim_dy_n(1)%vf(i)%sf(j, k, l) = & (dqL_prim_dy_n(2)%vf(i)%sf(j, k, l) + & @@ -719,7 +719,7 @@ contains do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg + 1, is2_viscous%end - 1 do j = is1_viscous%beg, is1_viscous%end - 1 - !$acc loop seq + $:LOOP() do i = iv%beg, iv%end dqR_prim_dy_n(1)%vf(i)%sf(j, k, l) = & (dqL_prim_dy_n(2)%vf(i)%sf(j + 1, k, l) + & @@ -741,7 +741,7 @@ contains do j = is3_viscous%beg + 1, is3_viscous%end do l = is2_viscous%beg, is2_viscous%end do k = is1_viscous%beg, is1_viscous%end - !$acc loop seq + $:LOOP() do i = iv%beg, iv%end dqL_prim_dz_n(3)%vf(i)%sf(k, l, j) = & @@ -757,7 +757,7 @@ contains do j = is3_viscous%beg, is3_viscous%end - 1 do l = is2_viscous%beg, is2_viscous%end do k = is1_viscous%beg, is1_viscous%end - !$acc loop seq + $:LOOP() do i = iv%beg, iv%end dqR_prim_dz_n(3)%vf(i)%sf(k, l, j) = & @@ -773,7 +773,7 @@ contains do l = is3_viscous%beg + 1, is3_viscous%end - 1 do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg + 1, is1_viscous%end - !$acc loop seq + $:LOOP() do i = iv%beg, iv%end dqL_prim_dz_n(1)%vf(i)%sf(j, k, l) = & @@ -794,7 +794,7 @@ contains do l = is3_viscous%beg + 1, is3_viscous%end - 1 do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg, is1_viscous%end - 1 - !$acc loop seq + $:LOOP() do i = iv%beg, iv%end dqR_prim_dz_n(1)%vf(i)%sf(j, k, l) = & @@ -815,7 +815,7 @@ contains do l = is3_viscous%beg + 1, is3_viscous%end - 1 do j = is2_viscous%beg + 1, is2_viscous%end do k = is1_viscous%beg, is1_viscous%end - !$acc loop seq + $:LOOP() do i = iv%beg, iv%end dqL_prim_dz_n(2)%vf(i)%sf(k, j, l) = & @@ -836,7 +836,7 @@ contains do l = is3_viscous%beg + 1, is3_viscous%end - 1 do j = is2_viscous%beg, is2_viscous%end - 1 do k = is1_viscous%beg, is1_viscous%end - !$acc loop seq + $:LOOP() do i = iv%beg, iv%end dqR_prim_dz_n(2)%vf(i)%sf(k, j, l) = & @@ -857,7 +857,7 @@ contains do j = is3_viscous%beg + 1, is3_viscous%end do l = is2_viscous%beg + 1, is2_viscous%end - 1 do k = is1_viscous%beg, is1_viscous%end - !$acc loop seq + $:LOOP() do i = iv%beg, iv%end dqL_prim_dy_n(3)%vf(i)%sf(k, l, j) = & @@ -878,7 +878,7 @@ contains do j = is3_viscous%beg, is3_viscous%end - 1 do l = is2_viscous%beg + 1, is2_viscous%end - 1 do k = is1_viscous%beg, is1_viscous%end - !$acc loop seq + $:LOOP() do i = iv%beg, iv%end dqR_prim_dy_n(3)%vf(i)%sf(k, l, j) = & @@ -898,7 +898,7 @@ contains do j = is3_viscous%beg + 1, is3_viscous%end do l = is2_viscous%beg, is2_viscous%end do k = is1_viscous%beg + 1, is1_viscous%end - 1 - !$acc loop seq + $:LOOP() do i = iv%beg, iv%end dqL_prim_dx_n(3)%vf(i)%sf(k, l, j) = & @@ -918,7 +918,7 @@ contains do j = is3_viscous%beg, is3_viscous%end - 1 do l = is2_viscous%beg, is2_viscous%end do k = is1_viscous%beg + 1, is1_viscous%end - 1 - !$acc loop seq + $:LOOP() do i = iv%beg, iv%end dqR_prim_dx_n(3)%vf(i)%sf(k, l, j) = & (dqL_prim_dx_n(1)%vf(i)%sf(k, l, j + 1) + & @@ -1215,7 +1215,7 @@ contains do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg + 1, is1_viscous%end - 1 - !$acc loop seq + $:LOOP() do i = iv%beg, iv%end dv_ds_vf(i)%sf(j, k, l) = & 1._wp/((1._wp + wa_flg)*dL(j)) & @@ -1243,7 +1243,7 @@ contains do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg + 1, is2_viscous%end - 1 do j = is1_viscous%beg, is1_viscous%end - !$acc loop seq + $:LOOP() do i = iv%beg, iv%end dv_ds_vf(i)%sf(j, k, l) = & 1._wp/((1._wp + wa_flg)*dL(k)) & @@ -1271,7 +1271,7 @@ contains do l = is3_viscous%beg + 1, is3_viscous%end - 1 do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg, is1_viscous%end - !$acc loop seq + $:LOOP() do i = iv%beg, iv%end dv_ds_vf(i)%sf(j, k, l) = & 1._wp/((1._wp + wa_flg)*dL(l)) & diff --git a/src/simulation/m_weno.fpp b/src/simulation/m_weno.fpp index 1915254cc3..830c852c83 100644 --- a/src/simulation/m_weno.fpp +++ b/src/simulation/m_weno.fpp @@ -799,7 +799,7 @@ contains do l = is3_weno%beg, is3_weno%end do k = is2_weno%beg, is2_weno%end do j = is1_weno%beg, is1_weno%end - !$acc loop seq + $:LOOP() do i = 1, v_size ! reconstruct from left side @@ -919,7 +919,7 @@ contains do l = is3_weno%beg, is3_weno%end do k = is2_weno%beg, is2_weno%end do j = is1_weno%beg, is1_weno%end - !$acc loop seq + $:LOOP() do i = 1, v_size if (teno) v = v_rs_ws_${XYZ}$ (j - 3:j + 3, k, l, i) ! temporary field value array for clarity From 8061a0d51c40f8779b6030e7aaf485dc7ac1d262 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Wed, 11 Jun 2025 12:51:07 -0400 Subject: [PATCH 18/75] Added declare macros --- src/common/m_boundary_common.fpp | 2 +- src/common/m_mpi_common.fpp | 7 +- src/common/m_phase_change.fpp | 8 +- src/common/m_variables_conversion.fpp | 6 +- src/simulation/m_acoustic_src.fpp | 26 ++++--- src/simulation/m_body_forces.fpp | 2 +- src/simulation/m_bubbles.fpp | 2 +- src/simulation/m_bubbles_EE.fpp | 6 +- src/simulation/m_bubbles_EL.fpp | 15 ++-- src/simulation/m_cbc.fpp | 25 +++--- src/simulation/m_data_output.fpp | 5 +- src/simulation/m_fftw.fpp | 4 +- src/simulation/m_global_parameters.fpp | 104 ++++++++++++++----------- src/simulation/m_hyperelastic.fpp | 6 +- src/simulation/m_hypoelastic.fpp | 8 +- src/simulation/m_ibm.fpp | 6 +- src/simulation/m_mhd.fpp | 4 +- src/simulation/m_mpi_proxy.fpp | 3 +- src/simulation/m_qbmm.fpp | 8 +- src/simulation/m_rhs.fpp | 40 +++++----- src/simulation/m_riemann_solvers.fpp | 17 ++-- src/simulation/m_surface_tension.fpp | 6 +- src/simulation/m_time_steppers.fpp | 2 +- src/simulation/m_viscous.fpp | 4 +- src/simulation/m_weno.fpp | 17 ++-- 25 files changed, 177 insertions(+), 156 deletions(-) diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index a3460a3950..f793c94e4d 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -21,7 +21,7 @@ module m_boundary_common implicit none type(scalar_field), dimension(:, :), allocatable :: bc_buffers - !$acc declare create(bc_buffers) + $:DECLARE(create=["bc_buffers"]) real(wp) :: bcxb, bcxe, bcyb, bcye, bczb, bcze diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index 4c84b6ff8c..1706b848c2 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -1,5 +1,6 @@ #:include 'macros.fpp' +#:include 'directive_macros.fpp' !> @brief The module serves as a proxy to the parameters and subroutines !! available in the MPI implementation's MPI module. Specifically, @@ -25,7 +26,7 @@ module m_mpi_common implicit none integer, private :: err_code, ierr, v_size !< - !$acc declare create(v_size) + $:DECLARE(create=["v_size"]) !! Generic flags used to identify and report MPI errors real(wp), private, allocatable, dimension(:), target :: buff_send !< @@ -38,10 +39,10 @@ module m_mpi_common !! average primitive variables, for a single computational domain boundary !! at the time, from the relevant neighboring processor. - !$acc declare create(buff_send, buff_recv) + $:DECLARE(create=["buff_send","buff_recv"]) integer :: halo_size, nVars - !$acc declare create(halo_size, nVars) + $:DECLARE(create=["halo_size","nVars"]) contains diff --git a/src/common/m_phase_change.fpp b/src/common/m_phase_change.fpp index 89d4e412fe..7725743e78 100644 --- a/src/common/m_phase_change.fpp +++ b/src/common/m_phase_change.fpp @@ -41,7 +41,7 @@ module m_phase_change real(wp) :: A, B, C, D !> @} - !$acc declare create(max_iter,pCr,TCr,mixM,lp,vp,A,B,C,D) + $:DECLARE(create=["max_iter","pCr","TCr","mixM","lp","vp","A","B","C","D"]) contains @@ -88,15 +88,15 @@ contains real(wp) :: rho, rM, m1, m2, MCT !< total density, total reacting mass, individual reacting masses real(wp) :: TvF !< total volume fraction - !$acc declare create(pS, pSOV, pSSL, TS, TSOV, TSatOV, TSatSL, TSSL, rhoe, dynE, rhos, rho, rM, m1, m2, MCT, TvF) + $:DECLARE(create=["pS","pSOV","pSSL","TS","TSOV","TSSL","TSatOV","TSatSL"]) + $:DECLARE(create=["rhoe","dynE","rhos","rho","rM","m1","m2","MCT","TvF"]) real(wp), dimension(num_fluids) :: p_infOV, p_infpT, p_infSL, sk, hk, gk, ek, rhok + $:DECLARE(create=["p_infOV","p_infpT","p_infSL","sk","hk","gk","ek","rhok"]) !< Generic loop iterators integer :: i, j, k, l - !$acc declare create(p_infOV, p_infpT, p_infSL, sk, hk, gk, ek, rhok) - ! starting equilibrium solver $:PARALLEL_LOOP(collapse=3, private=["p_infOV", "p_infpT", "p_infSL", & "sk", "hk", "gk", "ek", "rhok", "pS", "pSOV", "pSSL", & diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp index 4634ee0e34..d75d69528b 100644 --- a/src/common/m_variables_conversion.fpp +++ b/src/common/m_variables_conversion.fpp @@ -51,16 +51,16 @@ module m_variables_conversion !! In simulation, gammas, pi_infs, and qvs are already declared in m_global_variables #ifndef MFC_SIMULATION real(wp), allocatable, public, dimension(:) :: gammas, gs_min, pi_infs, ps_inf, cvs, qvs, qvps - !$acc declare create(gammas, gs_min, pi_infs, ps_inf, cvs, qvs, qvps) + $:DECLARE(create=["gammas","gs_min","pi_infs","ps_inf","cvs","qvs","qvps"]) #endif real(wp), allocatable, dimension(:) :: Gs integer, allocatable, dimension(:) :: bubrs real(wp), allocatable, dimension(:, :) :: Res - !$acc declare create(bubrs, Gs, Res) + $:DECLARE(create=["bubrs","Gs","Res"]) integer :: is1b, is2b, is3b, is1e, is2e, is3e - !$acc declare create(is1b, is2b, is3b, is1e, is2e, is3e) + $:DECLARE(create=["is1b","is2b","is3b","is1e","is2e","is3e"]) real(wp), allocatable, dimension(:, :, :), public :: rho_sf !< Scalar density function real(wp), allocatable, dimension(:, :, :), public :: gamma_sf !< Scalar sp. heat ratio function diff --git a/src/simulation/m_acoustic_src.fpp b/src/simulation/m_acoustic_src.fpp index 73d1f14fb8..0a03a1b7f9 100644 --- a/src/simulation/m_acoustic_src.fpp +++ b/src/simulation/m_acoustic_src.fpp @@ -24,41 +24,43 @@ module m_acoustic_src private; public :: s_initialize_acoustic_src, s_precalculate_acoustic_spatial_sources, s_acoustic_src_calculations integer, allocatable, dimension(:) :: pulse, support - !$acc declare create(pulse, support) + $:DECLARE(create=["pulse","support"]) logical, allocatable, dimension(:) :: dipole - !$acc declare create(dipole) + $:DECLARE(create=["dipole"]) real(wp), allocatable, target, dimension(:, :) :: loc_acoustic - !$acc declare create(loc_acoustic) + $:DECLARE(create=["loc_acoustic"]) - real(wp), allocatable, dimension(:) :: mag, length, height, wavelength, frequency, gauss_sigma_dist, gauss_sigma_time, npulse, dir, delay - !$acc declare create(mag, length, height, wavelength, frequency, gauss_sigma_dist, gauss_sigma_time, npulse, dir, delay) + real(wp), allocatable, dimension(:) :: mag, length, height, wavelength, frequency + real(wp), allocatable, dimension(:) :: gauss_sigma_dist, gauss_sigma_time, npulse, dir, delay + $:DECLARE(create=["mag","length","height","wavelength","frequency"]) + $:DECLARE(create=["gauss_sigma_dist","gauss_sigma_time","npulse","dir","delay"]) real(wp), allocatable, dimension(:) :: foc_length, aperture - !$acc declare create(foc_length, aperture) + $:DECLARE(create=["foc_length","aperture"]) real(wp), allocatable, dimension(:) :: element_spacing_angle, element_polygon_ratio, rotate_angle - !$acc declare create(element_spacing_angle, element_polygon_ratio, rotate_angle) + $:DECLARE(create=["element_spacing_angle","element_polygon_ratio","rotate_angle"]) real(wp), allocatable, dimension(:) :: bb_bandwidth, bb_lowest_freq - !$acc declare create(bb_bandwidth, bb_lowest_freq) + $:DECLARE(create=["bb_bandwidth","bb_lowest_freq"]) integer, allocatable, dimension(:) :: num_elements, element_on, bb_num_freq - !$acc declare create(num_elements, element_on, bb_num_freq) + $:DECLARE(create=["num_elements","element_on","bb_num_freq"]) !> @name Acoustic source terms !> @{ real(wp), allocatable, dimension(:, :, :) :: mass_src, e_src real(wp), allocatable, dimension(:, :, :, :) :: mom_src !> @} - !$acc declare create(mass_src, e_src, mom_src) + $:DECLARE(create=["mass_src","e_src","mom_src"]) integer, dimension(:), allocatable :: source_spatials_num_points !< Number of non-zero source grid points for each source - !$acc declare create(source_spatials_num_points) + $:DECLARE(create=["source_spatials_num_points"]) type(source_spatial_type), dimension(:), allocatable :: source_spatials !< Data of non-zero source grid points for each source - !$acc declare create(source_spatials) + $:DECLARE(create=["source_spatials"]) contains diff --git a/src/simulation/m_body_forces.fpp b/src/simulation/m_body_forces.fpp index 4080a7ddf1..17e700fcfe 100644 --- a/src/simulation/m_body_forces.fpp +++ b/src/simulation/m_body_forces.fpp @@ -23,7 +23,7 @@ module m_body_forces s_finalize_body_forces_module real(wp), allocatable, dimension(:, :, :) :: rhoM - !$acc declare create(rhoM) + $:DECLARE(create=["rhoM"]) contains diff --git a/src/simulation/m_bubbles.fpp b/src/simulation/m_bubbles.fpp index b64c28a4ee..11a7c075d2 100644 --- a/src/simulation/m_bubbles.fpp +++ b/src/simulation/m_bubbles.fpp @@ -21,7 +21,7 @@ module m_bubbles real(wp) :: chi_vw !< Bubble wall properties (Ando 2010) real(wp) :: k_mw !< Bubble wall properties (Ando 2010) real(wp) :: rho_mw !< Bubble wall properties (Ando 2010) - !$acc declare create(chi_vw, k_mw, rho_mw) + $:DECLARE(create=["chi_vw","k_mw","rho_mw"]) contains diff --git a/src/simulation/m_bubbles_EE.fpp b/src/simulation/m_bubbles_EE.fpp index 84ac15ecb3..fd770ebcf1 100644 --- a/src/simulation/m_bubbles_EE.fpp +++ b/src/simulation/m_bubbles_EE.fpp @@ -22,13 +22,13 @@ module m_bubbles_EE real(wp), allocatable, dimension(:, :, :) :: bub_adv_src real(wp), allocatable, dimension(:, :, :, :) :: bub_r_src, bub_v_src, bub_p_src, bub_m_src - !$acc declare create(bub_adv_src, bub_r_src, bub_v_src, bub_p_src, bub_m_src) + $:DECLARE(create=["bub_adv_src","bub_r_src","bub_v_src","bub_p_src","bub_m_src"]) type(scalar_field) :: divu !< matrix for div(u) - !$acc declare create(divu) + $:DECLARE(create=["divu"]) integer, allocatable, dimension(:) :: rs, vs, ms, ps - !$acc declare create(rs, vs, ms, ps) + $:DECLARE(create=["rs","vs","ms","ps"]) contains diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index 4fa66a2757..af9aa96436 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -33,20 +33,26 @@ module m_bubbles_EL real(wp), allocatable, dimension(:) :: bub_R0 !< Initial bubble radius real(wp), allocatable, dimension(:) :: Rmax_stats !< Maximum radius real(wp), allocatable, dimension(:) :: Rmin_stats !< Minimum radius + $:DECLARE(create=["lag_id", "bub_R0", "Rmax_stats", "Rmin_stats"]) + real(wp), allocatable, dimension(:) :: gas_mg !< Bubble's gas mass real(wp), allocatable, dimension(:) :: gas_betaT !< heatflux model (Preston et al., 2007) real(wp), allocatable, dimension(:) :: gas_betaC !< massflux model (Preston et al., 2007) real(wp), allocatable, dimension(:) :: bub_dphidt !< subgrid velocity potential (Maeda & Colonius, 2018) + $:DECLARE(create=["gas_mg", "gas_betaT", "gas_betaC", "bub_dphidt"]) + !(nBub, 1 -> actual val or 2 -> temp val) real(wp), allocatable, dimension(:, :) :: gas_p !< Pressure in the bubble real(wp), allocatable, dimension(:, :) :: gas_mv !< Vapor mass in the bubble real(wp), allocatable, dimension(:, :) :: intfc_rad !< Bubble radius real(wp), allocatable, dimension(:, :) :: intfc_vel !< Velocity of the bubble interface + $:DECLARE(create=["gas_p", "gas_mv", "intfc_rad", "intfc_vel"]) !(nBub, 1-> x or 2->y or 3 ->z, 1 -> actual or 2 -> temporal val) real(wp), allocatable, dimension(:, :, :) :: mtn_pos !< Bubble's position real(wp), allocatable, dimension(:, :, :) :: mtn_posPrev !< Bubble's previous position real(wp), allocatable, dimension(:, :, :) :: mtn_vel !< Bubble's velocity real(wp), allocatable, dimension(:, :, :) :: mtn_s !< Bubble's computational cell position in real format + $:DECLARE(create=["mtn_pos", "mtn_posPrev", "mtn_vel", "mtn_s"]) !(nBub, 1-> x or 2->y or 3 ->z, time-stage) real(wp), allocatable, dimension(:, :) :: intfc_draddt !< Time derivative of bubble's radius real(wp), allocatable, dimension(:, :) :: intfc_dveldt !< Time derivative of bubble's interface velocity @@ -54,21 +60,18 @@ module m_bubbles_EL real(wp), allocatable, dimension(:, :) :: gas_dmvdt !< Time derivative of the vapor mass in the bubble real(wp), allocatable, dimension(:, :, :) :: mtn_dposdt !< Time derivative of the bubble's position real(wp), allocatable, dimension(:, :, :) :: mtn_dveldt !< Time derivative of the bubble's velocity - - !$acc declare create(lag_id, bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, gas_betaC, bub_dphidt, & - !$acc gas_p, gas_mv, intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, mtn_s, intfc_draddt, intfc_dveldt, & - !$acc gas_dpdt, gas_dmvdt, mtn_dposdt, mtn_dveldt) + $:DECLARE(create=["intfc_draddt", "intfc_dveldt", "gas_dpdt", "gas_dmvdt", "mtn_dposdt", "mtn_dveldt"]) integer, private :: lag_num_ts !< Number of time stages in the time-stepping scheme - !$acc declare create(lag_num_ts) + $:DECLARE(create=["lag_num_ts"]) integer :: nBubs !< Number of bubbles in the local domain real(wp) :: Rmax_glb, Rmin_glb !< Maximum and minimum bubbe size in the local domain type(vector_field) :: q_beta !< Projection of the lagrangian particles in the Eulerian framework integer :: q_beta_idx !< Size of the q_beta vector field - !$acc declare create(nBubs, Rmax_glb, Rmin_glb, q_beta, q_beta_idx) + $:DECLARE(create=["nBubs","Rmax_glb","Rmin_glb","q_beta","q_beta_idx"]) contains diff --git a/src/simulation/m_cbc.fpp b/src/simulation/m_cbc.fpp index dd26870eb8..28fe01bea8 100644 --- a/src/simulation/m_cbc.fpp +++ b/src/simulation/m_cbc.fpp @@ -49,6 +49,7 @@ module m_cbc real(wp), allocatable, dimension(:, :, :, :) :: q_prim_rsx_vf real(wp), allocatable, dimension(:, :, :, :) :: q_prim_rsy_vf real(wp), allocatable, dimension(:, :, :, :) :: q_prim_rsz_vf + $:DECLARE(create=["q_prim_rsx_vf","q_prim_rsy_vf","q_prim_rsz_vf"]) type(scalar_field), allocatable, dimension(:) :: F_rs_vf, F_src_rs_vf !< @@ -58,6 +59,7 @@ module m_cbc real(wp), allocatable, dimension(:, :, :, :) :: F_rsx_vf, F_src_rsx_vf !< real(wp), allocatable, dimension(:, :, :, :) :: F_rsy_vf, F_src_rsy_vf !< real(wp), allocatable, dimension(:, :, :, :) :: F_rsz_vf, F_src_rsz_vf !< + $:DECLARE(create=["F_rsx_vf","F_src_rsx_vf","F_rsy_vf","F_src_rsy_vf","F_rsz_vf","F_src_rsz_vf"]) !! There is a CCE bug that is causing some subset of these variables to interfere !! with variables of the same name in m_riemann_solvers.fpp, and giving this versions @@ -68,13 +70,14 @@ module m_cbc real(wp), allocatable, dimension(:, :, :, :) :: flux_rsx_vf_l, flux_src_rsx_vf_l !< real(wp), allocatable, dimension(:, :, :, :) :: flux_rsy_vf_l, flux_src_rsy_vf_l real(wp), allocatable, dimension(:, :, :, :) :: flux_rsz_vf_l, flux_src_rsz_vf_l + $:DECLARE(create=["flux_rsx_vf_l","flux_src_rsx_vf_l","flux_rsy_vf_l","flux_src_rsy_vf_l","flux_rsz_vf_l","flux_src_rsz_vf_l"]) real(wp) :: c !< Cell averaged speed of sound real(wp), dimension(2) :: Re !< Cell averaged Reynolds numbers - !$acc declare create(c, Re) + $:DECLARE(create=["c","Re"]) real(wp) :: dpres_ds !< Spatial derivatives in s-dir of pressure - !$acc declare create(dpres_ds) + $:DECLARE(create=["dpres_ds"]) real(wp), allocatable, dimension(:) :: ds !< Cell-width distribution in the s-direction @@ -94,18 +97,21 @@ module m_cbc real(wp), allocatable, dimension(:, :, :) :: pi_coef_y !< Polynomial interpolant coefficients in y-dir real(wp), allocatable, dimension(:, :, :) :: pi_coef_z !< Polynomial interpolant coefficients in z-dir + $:DECLARE(create=["ds","fd_coef_x","fd_coef_y","fd_coef_z","pi_coef_x","pi_coef_y","pi_coef_z"]) + !! The first dimension of the array identifies the polynomial, the !! second dimension identifies the position of its coefficients and the last !! dimension denotes the location of the CBC. type(int_bounds_info) :: is1, is2, is3 !< Indical bounds in the s1-, s2- and s3-directions - !$acc declare create(is1, is2, is3) + $:DECLARE(create=["is1","is2","is3"]) integer :: dj integer :: bcxb, bcxe, bcyb, bcye, bczb, bcze integer :: cbc_dir, cbc_loc integer :: flux_cbc_index - !$acc declare create(dj, bcxb, bcxe, bcyb, bcye, bczb, bcze, cbc_dir, cbc_loc, flux_cbc_index) + $:DECLARE(create=["dj","bcxb","bcxe","bcyb","bcye","bczb","bcze"]) + $:DECLARE(create=["cbc_dir", "cbc_loc","flux_cbc_index"]) !! GRCBC inputs for subsonic inflow and outflow conditions consisting of !! inflow velocities, pressure, density and void fraction as well as @@ -114,14 +120,9 @@ module m_cbc real(wp), allocatable, dimension(:) :: pres_in, pres_out, Del_in, Del_out real(wp), allocatable, dimension(:, :) :: vel_in, vel_out real(wp), allocatable, dimension(:, :) :: alpha_rho_in, alpha_in - !$acc declare create(pres_in, pres_out, Del_in, Del_out) - !$acc declare create(vel_in, vel_out) - !$acc declare create(alpha_rho_in, alpha_in) - - !$acc declare create(q_prim_rsx_vf, q_prim_rsy_vf, q_prim_rsz_vf, F_rsx_vf, F_src_rsx_vf,flux_rsx_vf_l, flux_src_rsx_vf_l, & - !$acc F_rsy_vf, F_src_rsy_vf,flux_rsy_vf_l, flux_src_rsy_vf_l, F_rsz_vf, F_src_rsz_vf,flux_rsz_vf_l, flux_src_rsz_vf_l, & - !$acc ds,fd_coef_x,fd_coef_y,fd_coef_z, & - !$acc pi_coef_x,pi_coef_y,pi_coef_z) + $:DECLARE(create=["pres_in","pres_out","Del_in","Del_out"]) + $:DECLARE(create=["vel_in","vel_out"]) + $:DECLARE(create=["alpha_rho_in","alpha_in"]) contains diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp index ca0d7dbb8d..7ff2756709 100644 --- a/src/simulation/m_data_output.fpp +++ b/src/simulation/m_data_output.fpp @@ -55,13 +55,14 @@ module m_data_output real(wp), allocatable, dimension(:, :, :) :: ccfl_sf !< CCFL stability criterion real(wp), allocatable, dimension(:, :, :) :: Rc_sf !< Rc stability criterion real(wp), public, allocatable, dimension(:, :) :: c_mass - !$acc declare create(icfl_sf, vcfl_sf, ccfl_sf, Rc_sf) + $:DECLARE(create=["icfl_sf","vcfl_sf","ccfl_sf","Rc_sf"]) real(wp) :: icfl_max_loc, icfl_max_glb !< ICFL stability extrema on local and global grids real(wp) :: vcfl_max_loc, vcfl_max_glb !< VCFL stability extrema on local and global grids real(wp) :: ccfl_max_loc, ccfl_max_glb !< CCFL stability extrema on local and global grids real(wp) :: Rc_min_loc, Rc_min_glb !< Rc stability extrema on local and global grids - !$acc declare create(icfl_max_loc, icfl_max_glb, vcfl_max_loc, vcfl_max_glb, ccfl_max_loc, ccfl_max_glb, Rc_min_loc, Rc_min_glb) + $:DECLARE(create=["icfl_max_loc","icfl_max_glb","vcfl_max_loc","vcfl_max_glb"]) + $:DECLARE(create=["ccfl_max_loc","ccfl_max_glb","Rc_min_loc","Rc_min_glb"]) !> @name ICFL, VCFL, CCFL and Rc stability criteria extrema over all the time-steps !> @{ diff --git a/src/simulation/m_fftw.fpp b/src/simulation/m_fftw.fpp index 10e063496c..c0cdc226ea 100644 --- a/src/simulation/m_fftw.fpp +++ b/src/simulation/m_fftw.fpp @@ -47,12 +47,12 @@ module m_fftw !! Filtered complex data in Fourier space #if defined(MFC_OpenACC) - !$acc declare create(real_size, cmplx_size, x_size, batch_size, Nfq) + $:DECLARE(create=["real_size","cmplx_size","x_size","batch_size","Nfq"]) real(dp), allocatable, target :: data_real_gpu(:) complex(dp), allocatable, target :: data_cmplx_gpu(:) complex(dp), allocatable, target :: data_fltr_cmplx_gpu(:) -!$acc declare create(data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu) + $:DECLARE(create=["data_real_gpu","data_cmplx_gpu","data_fltr_cmplx_gpu"]) #if defined(__PGI) integer :: fwd_plan_gpu, bwd_plan_gpu diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index d0b94f6b7a..348f9d542c 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -4,6 +4,7 @@ #:include 'case.fpp' #:include 'macros.fpp' +#:include 'directive_macros.fpp' !> @brief The module contains all of the parameters describing the program !! logistics, the computational domain and the simulation algorithm. @@ -53,7 +54,7 @@ module m_global_parameters logical :: cyl_coord integer :: grid_geometry !> @} - !$acc declare create(cyl_coord, grid_geometry) + $:DECLARE(create=["cyl_coord","grid_geometry"]) !> @name Cell-boundary (CB) locations in the x-, y- and z-directions, respectively !> @{ @@ -76,7 +77,7 @@ module m_global_parameters real(wp) :: dt !< Size of the time-step - !$acc declare create(x_cb, y_cb, z_cb, x_cc, y_cc, z_cc, dx, dy, dz, dt, m, n, p) + $:DECLARE(create=["x_cb","y_cb","z_cb","x_cc","y_cc","z_cc","dx","dy","dz","dt","m","n","p"]) !> @name Starting time-step iteration, stopping time-step iteration and the number !! of time-step iterations between successive solution backups, respectively @@ -90,7 +91,7 @@ module m_global_parameters real(wp) :: t_stop, t_save, cfl_target integer :: n_start !> @} - !$acc declare create(cfl_target) + $:DECLARE(create=["cfl_target"]) logical :: cfl_adap_dt, cfl_const_dt, cfl_dt @@ -158,7 +159,7 @@ module m_global_parameters logical :: bulk_stress !< Bulk stresses logical :: cont_damage !< Continuum damage modeling - !$acc declare create(chemistry) + $:DECLARE(create=["chemistry"]) logical :: bodyForces logical :: bf_x, bf_y, bf_z !< body force toggle in three directions @@ -169,24 +170,27 @@ module m_global_parameters #:endfor #:endfor real(wp), dimension(3) :: accel_bf - !$acc declare create(accel_bf) + $:DECLARE(create=["accel_bf"]) integer :: cpu_start, cpu_end, cpu_rate #:if not MFC_CASE_OPTIMIZATION - !$acc declare create(num_dims, num_vels, weno_polyn, weno_order, weno_num_stencils, num_fluids, wenojs, mapped_weno, wenoz, teno, wenoz_q, mhd, relativity) + $:DECLARE(create=["num_dims","num_vels","weno_polyn","weno_order"]) + $:DECLARE(create=["weno_num_stencils","num_fluids","wenojs"]) + $:DECLARE(create=["mapped_weno", "wenoz","teno","wenoz_q","mhd","relativity"]) #:endif - !$acc declare create(mpp_lim, model_eqns, mixture_err, alt_soundspeed, avg_state, mp_weno, weno_eps, teno_CT, hypoelasticity, hyperelasticity, hyper_model, elasticity, low_Mach, viscous, shear_stress, bulk_stress, cont_damage) + $:DECLARE(create=["mpp_lim","model_eqns","mixture_err","alt_soundspeed"]) + $:DECLARE(create=["avg_state","mp_weno","weno_eps","teno_CT","hypoelasticity"]) + $:DECLARE(create=["hyperelasticity","hyper_model","elasticity","low_Mach"]) + $:DECLARE(create=["viscous","shear_stress","bulk_stress","cont_damage"]) logical :: relax !< activate phase change integer :: relax_model !< Relaxation model real(wp) :: palpha_eps !< trigger parameter for the p relaxation procedure, phase change model real(wp) :: ptgalpha_eps !< trigger parameter for the pTg relaxation procedure, phase change model -!#ifndef _CRAYFTN -!$acc declare create(relax, relax_model, palpha_eps,ptgalpha_eps) -!#endif + $:DECLARE(create=["relax", "relax_model", "palpha_eps","ptgalpha_eps"]) integer :: num_bc_patches logical :: bc_io @@ -196,6 +200,10 @@ module m_global_parameters !> @{ type(int_bounds_info) :: bc_x, bc_y, bc_z !> @} + $:DECLARE(create=["bc_x%vb1", "bc_x%vb2", "bc_x%vb3", "bc_x%ve1", "bc_x%ve2", "bc_x%ve3"]) + $:DECLARE(create=["bc_y%vb1", "bc_y%vb2", "bc_y%vb3", "bc_y%ve1", "bc_y%ve2", "bc_y%ve3"]) + $:DECLARE(create=["bc_z%vb1", "bc_z%vb2", "bc_z%vb3", "bc_z%ve1", "bc_z%ve2", "bc_z%ve3"]) + type(bounds_info) :: x_domain, y_domain, z_domain real(wp) :: x_a, y_a, z_a real(wp) :: x_b, y_b, z_b @@ -248,19 +256,20 @@ module m_global_parameters integer :: c_idx !< Index of color function integer :: damage_idx !< Index of damage state variable (D) for continuum damage model !> @} - - !$acc declare create(bub_idx) + $:DECLARE(create=["sys_size","E_idx","n_idx","bub_idx","alf_idx","gamma_idx"]) + $:DECLARE(create=["pi_inf_idx","B_idx","stress_idx","xi_idx","b_size"]) + $:DECLARE(create=["tensor_size","species_idx","c_idx"]) ! Cell Indices for the (local) interior points (O-m, O-n, 0-p). ! Stands for "InDices With INTerior". type(int_bounds_info) :: idwint(1:3) - !$acc declare create(idwint) + $:DECLARE(create=["idwint"]) ! Cell Indices for the entire (local) domain. In simulation and post_process, ! this includes the buffer region. idwbuff and idwint are the same otherwise. ! Stands for "InDices With BUFFer". type(int_bounds_info) :: idwbuff(1:3) - !$acc declare create(idwbuff) + $:DECLARE(create=["idwbuff"]) !> @name The number of fluids, along with their identifying indexes, respectively, !! for which viscous effects, e.g. the shear and/or the volume Reynolds (Re) @@ -270,7 +279,7 @@ module m_global_parameters integer, allocatable, dimension(:, :) :: Re_idx !> @} - !$acc declare create(Re_size, Re_idx) + $:DECLARE(create=["Re_size","Re_idx"]) ! The WENO average (WA) flag regulates whether the calculation of any cell- ! average spatial derivatives is carried out in each cell by utilizing the @@ -281,7 +290,7 @@ module m_global_parameters real(wp) :: wa_flg !> @{ - !$acc declare create(wa_flg) + $:DECLARE(create=["wa_flg"]) !> @name The coordinate direction indexes and flags (flg), respectively, for which !! the configurations will be determined with respect to a working direction @@ -293,14 +302,15 @@ module m_global_parameters integer, dimension(3) :: dir_idx_tau !!used for hypoelasticity=true !> @} - !$acc declare create(dir_idx, dir_flg, dir_idx_tau) + $:DECLARE(create=["dir_idx","dir_flg","dir_idx_tau"]) integer :: buff_size !< !! The number of cells that are necessary to be able to store enough boundary !! conditions data to march the solution in the physical computational domain !! to the next time-step. - !$acc declare create(sys_size, buff_size, E_idx, gamma_idx, pi_inf_idx, alf_idx, n_idx, stress_idx, b_size, tensor_size, xi_idx, species_idx, B_idx, c_idx) + $:DECLARE(create=["buff_size"]) + integer :: shear_num !! Number of shear stress components integer, dimension(3) :: shear_indices !< @@ -311,7 +321,7 @@ module m_global_parameters !! Indices of shear stress components to reflect for boundary conditions. !! Size: (1:3, 1:shear_BC_flip_num) for (x/y/z, [indices]) - !$acc declare create(shear_num, shear_indices, shear_BC_flip_num, shear_BC_flip_indices) + $:DECLARE(create=["shear_num","shear_indices","shear_BC_flip_num","shear_BC_flip_indices"]) ! END: Simulation Algorithm Parameters @@ -322,10 +332,6 @@ module m_global_parameters !! in the flow. These include the stiffened gas equation of state parameters, !! the Reynolds numbers and the Weber numbers. - !$acc declare create(bc_x%vb1, bc_x%vb2, bc_x%vb3, bc_x%ve1, bc_x%ve2, bc_x%ve3) - !$acc declare create(bc_y%vb1, bc_y%vb2, bc_y%vb3, bc_y%ve1, bc_y%ve2, bc_y%ve3) - !$acc declare create(bc_z%vb1, bc_z%vb2, bc_z%vb3, bc_z%ve1, bc_z%ve2, bc_z%ve3) - integer :: fd_order !< !! The order of the finite-difference (fd) approximations of the first-order !! derivatives that need to be evaluated when the CoM or flow probe data @@ -335,7 +341,7 @@ module m_global_parameters !! The finite-difference number is given by MAX(1, fd_order/2). Essentially, !! it is a measure of the half-size of the finite-difference stencil for the !! selected order of accuracy. - !$acc declare create(fd_order,fd_number) + $:DECLARE(create=["fd_order","fd_number"]) logical :: probe_wrt logical :: integral_wrt @@ -348,7 +354,7 @@ module m_global_parameters !> @{ real(wp) :: rhoref, pref !> @} - !$acc declare create(rhoref, pref) + $:DECLARE(create=["rhoref","pref"]) !> @name Immersed Boundaries !> @{ @@ -363,7 +369,7 @@ module m_global_parameters !! the maximum allowable number of patches, num_patches_max, may be changed !! in the module m_derived_types.f90. - !$acc declare create(ib, num_ibs, patch_ib) + $:DECLARE(create=["ib","num_ibs","patch_ib"]) !> @} !> @name Bubble modeling @@ -378,26 +384,31 @@ module m_global_parameters real(wp) :: Ca !< Cavitation number real(wp) :: Web !< Weber number real(wp) :: Re_inv !< Inverse Reynolds number + $:DECLARE(create=["R0ref","Ca","Web","Re_inv"]) real(wp), dimension(:), allocatable :: weight !< Simpson quadrature weights real(wp), dimension(:), allocatable :: R0 !< Bubble sizes real(wp), dimension(:), allocatable :: V0 !< Bubble velocities - !$acc declare create(weight, R0, V0) + $:DECLARE(create=["weight","R0","V0"]) logical :: bubbles_euler !< Bubbles euler on/off logical :: polytropic !< Polytropic switch logical :: polydisperse !< Polydisperse bubbles + $:DECLARE(create=["bubbles_euler","polytropic","polydisperse"]) + logical :: adv_n !< Solve the number density equation and compute alpha from number density logical :: adap_dt !< Adaptive step size control real(wp) :: adap_dt_tol !< Tolerance to control adaptive step size + $:DECLARE(create=["adv_n","adap_dt","adap_dt_tol"]) integer :: bubble_model !< Gilmore or Keller--Miksis bubble model integer :: thermal !< Thermal behavior. 1 = adiabatic, 2 = isotherm, 3 = transfer + $:DECLARE(create=["bubble_model","thermal"]) real(wp), allocatable, dimension(:, :, :) :: ptil !< Pressure modification - !$acc declare create(ptil) real(wp) :: poly_sigma !< log normal sigma for polydisperse PDF + $:DECLARE(create=["ptil", "poly_sigma"]) logical :: qbmm !< Quadrature moment method integer, parameter :: nmom = 6 !< Number of carried moments per R0 location @@ -406,38 +417,39 @@ module m_global_parameters integer :: R0_type real(wp) :: pi_fac !< Factor for artificial pi_inf + $:DECLARE(create=["qbmm", "nmomsp","nmomtot","R0_type","pi_fac"]) #:if not MFC_CASE_OPTIMIZATION - !$acc declare create(nb) + $:DECLARE(create=["nb"]) #:endif - !$acc declare create(R0ref, Ca, Web, Re_inv, bubbles_euler, polytropic, polydisperse, qbmm, nmomsp, nmomtot, R0_type, bubble_model, thermal, poly_sigma, adv_n, adap_dt, adap_dt_tol, pi_fac) - type(scalar_field), allocatable, dimension(:) :: mom_sp type(scalar_field), allocatable, dimension(:, :, :) :: mom_3d - !$acc declare create(mom_sp, mom_3d) + $:DECLARE(create=["mom_sp","mom_3d"]) !> @} type(chemistry_parameters) :: chem_params - !$acc declare create(chem_params) + $:DECLARE(create=["chem_params"]) !> @name Physical bubble parameters (see Ando 2010, Preston 2007) !> @{ real(wp) :: R_n, R_v, phi_vn, phi_nv, Pe_c, Tw, pv, M_n, M_v, k_vl, k_nl, cp_n, cp_v - !$acc declare create(R_n, R_v, phi_vn, phi_nv, Pe_c, Tw, pv, M_n, M_v, k_vl, k_nl, cp_n, cp_v) + $:DECLARE(create=["R_n","R_v","phi_vn","phi_nv","Pe_c","Tw",]) + $:DECLARE(create=["pv","M_n", "M_v","k_vl","k_nl","cp_n","cp_v"]) real(wp), dimension(:), allocatable :: k_n, k_v, pb0, mass_n0, mass_v0, Pe_T real(wp), dimension(:), allocatable :: Re_trans_T, Re_trans_c, Im_trans_T, Im_trans_c, omegaN - !$acc declare create( k_n, k_v, pb0, mass_n0, mass_v0, Pe_T, Re_trans_T, Re_trans_c, Im_trans_T, Im_trans_c, omegaN) + $:DECLARE(create=["k_n","k_v","pb0","mass_n0","mass_v0","Pe_T"]) + $:DECLARE(create=["Re_trans_T","Re_trans_c","Im_trans_T","Im_trans_c","omegaN"]) real(wp) :: mul0, ss, gamma_v, mu_v real(wp) :: gamma_m, gamma_n, mu_n real(wp) :: gam !> @} - !$acc declare create(mul0, ss, gamma_v, mu_v, gamma_m, gamma_n, mu_n, gam) + $:DECLARE(create=["mul0","ss","gamma_v","mu_v","gamma_m","gamma_n","mu_n","gam"]) !> @name Acoustic acoustic_source parameters !> @{ @@ -445,14 +457,14 @@ module m_global_parameters type(acoustic_parameters), dimension(num_probes_max) :: acoustic !< Acoustic source parameters integer :: num_source !< Number of acoustic sources !> @} - !$acc declare create(acoustic_source, acoustic, num_source) + $:DECLARE(create=["acoustic_source","acoustic","num_source"]) !> @name Surface tension parameters !> @{ real(wp) :: sigma logical :: surface_tension - !$acc declare create(sigma, surface_tension) + $:DECLARE(create=["sigma","surface_tension"]) !> @} integer :: momxb, momxe @@ -463,11 +475,13 @@ module m_global_parameters integer :: strxb, strxe integer :: chemxb, chemxe integer :: xibeg, xiend - !$acc declare create(momxb, momxe, advxb, advxe, contxb, contxe, intxb, intxe, bubxb, bubxe, strxb, strxe, chemxb, chemxe) - !$acc declare create(xibeg,xiend) + $:DECLARE(create=["momxb","momxe","advxb","advxe","contxb","contxe"]) + $:DECLARE(create=["intxb","intxe", "bubxb","bubxe"]) + $:DECLARE(create=["strxb","strxe","chemxb","chemxe"]) + $:DECLARE(create=["xibeg","xiend"]) real(wp), allocatable, dimension(:) :: gammas, gs_min, pi_infs, ps_inf, cvs, qvs, qvps - !$acc declare create(gammas, gs_min, pi_infs, ps_inf, cvs, qvs, qvps) + $:DECLARE(create=["gammas","gs_min","pi_infs","ps_inf","cvs","qvs","qvps"]) real(wp) :: mytime !< Current simulation time real(wp) :: finaltime !< Final simulation time @@ -478,25 +492,25 @@ module m_global_parameters type(pres_field), allocatable, dimension(:) :: mv_ts - !$acc declare create(pb_ts, mv_ts) + $:DECLARE(create=["pb_ts","mv_ts"]) !> @name lagrangian subgrid bubble parameters !> @{! logical :: bubbles_lagrange !< Lagrangian subgrid bubble model switch type(bubbles_lagrange_parameters) :: lag_params !< Lagrange bubbles' parameters - !$acc declare create(bubbles_lagrange, lag_params) + $:DECLARE(create=["bubbles_lagrange","lag_params"]) !> @} real(wp) :: Bx0 !< Constant magnetic field in the x-direction (1D) logical :: powell !< Powell‐correction for div B = 0 - !$acc declare create(Bx0, powell) + $:DECLARE(create=["Bx0","powell"]) !> @name Continuum damage model parameters !> @{! real(wp) :: tau_star !< Stress threshold for continuum damage modeling real(wp) :: cont_damage_s !< Exponent s for continuum damage modeling real(wp) :: alpha_bar !< Damage rate factor for continuum damage modeling - !$acc declare create(tau_star, cont_damage_s, alpha_bar) + $:DECLARE(create=["tau_star","cont_damage_s","alpha_bar"]) !> @} contains diff --git a/src/simulation/m_hyperelastic.fpp b/src/simulation/m_hyperelastic.fpp index 43581328b7..11091627e8 100644 --- a/src/simulation/m_hyperelastic.fpp +++ b/src/simulation/m_hyperelastic.fpp @@ -27,14 +27,14 @@ module m_hyperelastic !! The btensor at the cell-interior Gaussian quadrature points. !! These tensor is needed to be calculated once and make the code DRY. type(vector_field) :: btensor !< - !$acc declare create(btensor) + $:DECLARE(create=["btensor"]) real(wp), allocatable, dimension(:, :) :: fd_coeff_x real(wp), allocatable, dimension(:, :) :: fd_coeff_y real(wp), allocatable, dimension(:, :) :: fd_coeff_z - !$acc declare create(fd_coeff_x,fd_coeff_y,fd_coeff_z) + $:DECLARE(create=["fd_coeff_x","fd_coeff_y", "fd_coeff_z"]) real(wp), allocatable, dimension(:) :: Gs - !$acc declare create(Gs) + $:DECLARE(create=["Gs"]) contains diff --git a/src/simulation/m_hypoelastic.fpp b/src/simulation/m_hypoelastic.fpp index 4c1a101e49..430b69d5e5 100644 --- a/src/simulation/m_hypoelastic.fpp +++ b/src/simulation/m_hypoelastic.fpp @@ -21,20 +21,20 @@ module m_hypoelastic s_compute_damage_state real(wp), allocatable, dimension(:) :: Gs - !$acc declare create(Gs) + $:DECLARE(create=["Gs"]) real(wp), allocatable, dimension(:, :, :) :: du_dx, du_dy, du_dz real(wp), allocatable, dimension(:, :, :) :: dv_dx, dv_dy, dv_dz real(wp), allocatable, dimension(:, :, :) :: dw_dx, dw_dy, dw_dz - !$acc declare create(du_dx,du_dy,du_dz,dv_dx,dv_dy,dv_dz,dw_dx,dw_dy,dw_dz) + $:DECLARE(create=["du_dx","du_dy","du_dz","dv_dx","dv_dy","dv_dz","dw_dx","dw_dy","dw_dz"]) real(wp), allocatable, dimension(:, :, :) :: rho_K_field, G_K_field - !$acc declare create(rho_K_field, G_K_field) + $:DECLARE(create=["rho_K_field","G_K_field"]) real(wp), allocatable, dimension(:, :) :: fd_coeff_x_h real(wp), allocatable, dimension(:, :) :: fd_coeff_y_h real(wp), allocatable, dimension(:, :) :: fd_coeff_z_h - !$acc declare create(fd_coeff_x_h,fd_coeff_y_h,fd_coeff_z_h) + $:DECLARE(create=["fd_coeff_x_h","fd_coeff_y_h","fd_coeff_z_h"]) contains diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index d118a2fa55..a3538b2a7e 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -36,15 +36,15 @@ module m_ibm type(integer_field), public :: ib_markers type(levelset_field), public :: levelset type(levelset_norm_field), public :: levelset_norm - !$acc declare create(ib_markers, levelset, levelset_norm) + $:DECLARE(create=["ib_markers","levelset","levelset_norm"]) type(ghost_point), dimension(:), allocatable :: ghost_points type(ghost_point), dimension(:), allocatable :: inner_points - !$acc declare create(ghost_points, inner_points) + $:DECLARE(create=["ghost_points","inner_points"]) integer :: num_gps !< Number of ghost points integer :: num_inner_gps !< Number of ghost points - !$acc declare create(gp_layers, num_gps, num_inner_gps) + $:DECLARE(create=["gp_layers","num_gps","num_inner_gps"]) contains diff --git a/src/simulation/m_mhd.fpp b/src/simulation/m_mhd.fpp index a096a4fe04..a32412398c 100644 --- a/src/simulation/m_mhd.fpp +++ b/src/simulation/m_mhd.fpp @@ -25,12 +25,12 @@ module m_mhd real(wp), allocatable, dimension(:, :, :) :: du_dx, du_dy, du_dz real(wp), allocatable, dimension(:, :, :) :: dv_dx, dv_dy, dv_dz real(wp), allocatable, dimension(:, :, :) :: dw_dx, dw_dy, dw_dz - !$acc declare create(du_dx,du_dy,du_dz,dv_dx,dv_dy,dv_dz,dw_dx,dw_dy,dw_dz) + $:DECLARE(create=["du_dx","du_dy","du_dz","dv_dx","dv_dy","dv_dz","dw_dx","dw_dy","dw_dz"]) real(wp), allocatable, dimension(:, :) :: fd_coeff_x_h real(wp), allocatable, dimension(:, :) :: fd_coeff_y_h real(wp), allocatable, dimension(:, :) :: fd_coeff_z_h - !$acc declare create(fd_coeff_x_h,fd_coeff_y_h,fd_coeff_z_h) + $:DECLARE(create=["fd_coeff_x_h","fd_coeff_y_h","fd_coeff_z_h"]) contains diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index f931227f4d..f7c05cd07f 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -4,6 +4,7 @@ #:include 'case.fpp' #:include 'macros.fpp' +#:include 'directive_macros.fpp' !> @brief The module serves as a proxy to the parameters and subroutines !! available in the MPI implementation's MPI module. Specifically, @@ -46,7 +47,7 @@ module m_mpi_proxy !> @{ integer, private :: err_code, ierr, v_size !> @} - !$acc declare create(v_size) + $:DECLARE(create=["v_size"]) contains diff --git a/src/simulation/m_qbmm.fpp b/src/simulation/m_qbmm.fpp index 075334a2d0..863324b71d 100644 --- a/src/simulation/m_qbmm.fpp +++ b/src/simulation/m_qbmm.fpp @@ -26,21 +26,21 @@ module m_qbmm private; public :: s_initialize_qbmm_module, s_mom_inv, s_coeff, s_compute_qbmm_rhs real(wp), allocatable, dimension(:, :, :, :, :) :: momrhs - !$acc declare create(momrhs) + $:DECLARE(create=["momrhs"]) #:if MFC_CASE_OPTIMIZATION integer, parameter :: nterms = ${nterms}$ #:else integer :: nterms - !$acc declare create(nterms) + $:DECLARE(create=["nterms"]) #:endif type(int_bounds_info) :: is1_qbmm, is2_qbmm, is3_qbmm - !$acc declare create(is1_qbmm, is2_qbmm, is3_qbmm) + $:DECLARE(create=["is1_qbmm","is2_qbmm","is3_qbmm"]) integer, allocatable, dimension(:) :: bubrs integer, allocatable, dimension(:, :) :: bubmoms - !$acc declare create(bubrs, bubmoms) + $:DECLARE(create=["bubrs","bubmoms"]) contains diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index 9f96010e56..54899dc0fa 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -75,13 +75,13 @@ module m_rhs !! conservative variables, which are located in q_cons_vf, at cell-interior !! Gaussian quadrature points (QP). type(vector_field) :: q_cons_qp !< - !$acc declare create(q_cons_qp) + $:DECLARE(create=["q_cons_qp"]) !! The primitive variables at cell-interior Gaussian quadrature points. These !! are calculated from the conservative variables and gradient magnitude (GM) !! of the volume fractions, q_cons_qp and gm_alpha_qp, respectively. type(vector_field) :: q_prim_qp !< - !$acc declare create(q_prim_qp) + $:DECLARE(create=["q_prim_qp"]) !> @name The first-order spatial derivatives of the primitive variables at cell- !! interior Gaussian quadrature points. These are WENO-reconstructed from @@ -90,7 +90,7 @@ module m_rhs !! of the primitive variables, located in qK_prim_n, where K = L or R. !> @{ type(vector_field), allocatable, dimension(:) :: dq_prim_dx_qp, dq_prim_dy_qp, dq_prim_dz_qp - !$acc declare create(dq_prim_dx_qp, dq_prim_dy_qp, dq_prim_dz_qp) + $:DECLARE(create=["dq_prim_dx_qp","dq_prim_dy_qp","dq_prim_dz_qp"]) !> @} !> @name The left and right WENO-reconstructed cell-boundary values of the cell- @@ -100,26 +100,26 @@ module m_rhs !> @{ type(vector_field), allocatable, dimension(:) :: dqL_prim_dx_n, dqL_prim_dy_n, dqL_prim_dz_n type(vector_field), allocatable, dimension(:) :: dqR_prim_dx_n, dqR_prim_dy_n, dqR_prim_dz_n - !$acc declare create(dqL_prim_dx_n, dqL_prim_dy_n, dqL_prim_dz_n) - !$acc declare create(dqR_prim_dx_n, dqR_prim_dy_n, dqR_prim_dz_n) + $:DECLARE(create=["dqL_prim_dx_n","dqL_prim_dy_n","dqL_prim_dz_n"]) + $:DECLARE(create=["dqR_prim_dx_n","dqR_prim_dy_n","dqR_prim_dz_n"]) !> @} type(scalar_field), allocatable, dimension(:) :: tau_Re_vf - !$acc declare create(tau_Re_vf) + $:DECLARE(create=["tau_Re_vf"]) type(vector_field) :: gm_alpha_qp !< !! The gradient magnitude of the volume fractions at cell-interior Gaussian !! quadrature points. gm_alpha_qp is calculated from individual first-order !! spatial derivatives located in dq_prim_ds_qp. - !$acc declare create(gm_alpha_qp) + $:DECLARE(create=["gm_alpha_qp"]) !> @name The left and right WENO-reconstructed cell-boundary values of the cell- !! average gradient magnitude of volume fractions, located in gm_alpha_qp. !> @{ type(vector_field), allocatable, dimension(:) :: gm_alphaL_n type(vector_field), allocatable, dimension(:) :: gm_alphaR_n - !$acc declare create(gm_alphaL_n, gm_alphaR_n) + $:DECLARE(create=["gm_alphaL_n","gm_alphaR_n"]) !> @} !> @name The cell-boundary values of the fluxes (src - source, gsrc - geometrical @@ -129,44 +129,44 @@ module m_rhs type(vector_field), allocatable, dimension(:) :: flux_n type(vector_field), allocatable, dimension(:) :: flux_src_n type(vector_field), allocatable, dimension(:) :: flux_gsrc_n - !$acc declare create(flux_n, flux_src_n, flux_gsrc_n) + $:DECLARE(create=["flux_n","flux_src_n","flux_gsrc_n"]) !> @} type(vector_field), allocatable, dimension(:) :: qL_prim, qR_prim - !$acc declare create(qL_prim, qR_prim) + $:DECLARE(create=["qL_prim","qR_prim"]) type(int_bounds_info) :: iv !< Vector field indical bounds - !$acc declare create(iv) + $:DECLARE(create=["iv"]) !> @name Indical bounds in the x-, y- and z-directions !> @{ type(int_bounds_info) :: irx, iry, irz - !$acc declare create(irx, iry, irz) + $:DECLARE(create=["irx","iry","irz"]) type(int_bounds_info) :: is1, is2, is3 - !$acc declare create(is1, is2, is3) + $:DECLARE(create=["is1","is2","is3"]) !> @name Saved fluxes for testing !> @{ type(scalar_field) :: alf_sum !> @} - !$acc declare create(alf_sum) + $:DECLARE(create=["alf_sum"]) real(wp), allocatable, dimension(:, :, :) :: blkmod1, blkmod2, alpha1, alpha2, Kterm real(wp), allocatable, dimension(:, :, :, :) :: qL_rsx_vf, qL_rsy_vf, qL_rsz_vf, qR_rsx_vf, qR_rsy_vf, qR_rsz_vf real(wp), allocatable, dimension(:, :, :, :) :: dqL_rsx_vf, dqL_rsy_vf, dqL_rsz_vf, dqR_rsx_vf, dqR_rsy_vf, dqR_rsz_vf - !$acc declare create(blkmod1, blkmod2, alpha1, alpha2, Kterm) - !$acc declare create(qL_rsx_vf, qL_rsy_vf, qL_rsz_vf, qR_rsx_vf, qR_rsy_vf, qR_rsz_vf) - !$acc declare create(dqL_rsx_vf, dqL_rsy_vf, dqL_rsz_vf, dqR_rsx_vf, dqR_rsy_vf, dqR_rsz_vf) + $:DECLARE(create=["blkmod1","blkmod2","alpha1","alpha2","Kterm"]) + $:DECLARE(create=["qL_rsx_vf","qL_rsy_vf","qL_rsz_vf","qR_rsx_vf","qR_rsy_vf","qR_rsz_vf"]) + $:DECLARE(create=["dqL_rsx_vf","dqL_rsy_vf","dqL_rsz_vf","dqR_rsx_vf","dqR_rsy_vf","dqR_rsz_vf"]) real(wp), allocatable, dimension(:) :: gamma_min, pres_inf - !$acc declare create(gamma_min, pres_inf) + $:DECLARE(create=["gamma_min","pres_inf"]) real(wp), allocatable, dimension(:, :) :: Res - !$acc declare create(Res) + $:DECLARE(create=["Res"]) real(wp), allocatable, dimension(:, :, :) :: nbub !< Bubble number density - !$acc declare create(nbub) + $:DECLARE(create=["nbub"]) contains diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp index a6d1c80a77..a593a36c71 100644 --- a/src/simulation/m_riemann_solvers.fpp +++ b/src/simulation/m_riemann_solvers.fpp @@ -66,8 +66,7 @@ module m_riemann_solvers real(wp), allocatable, dimension(:, :, :, :) :: flux_rsx_vf, flux_src_rsx_vf real(wp), allocatable, dimension(:, :, :, :) :: flux_rsy_vf, flux_src_rsy_vf real(wp), allocatable, dimension(:, :, :, :) :: flux_rsz_vf, flux_src_rsz_vf - !$acc declare create( flux_rsx_vf, flux_src_rsx_vf, flux_rsy_vf, & - !$acc flux_src_rsy_vf, flux_rsz_vf, flux_src_rsz_vf ) + $:DECLARE(create=["flux_rsx_vf","flux_src_rsx_vf","flux_rsy_vf","flux_src_rsy_vf","flux_rsz_vf","flux_src_rsz_vf"]) !> @} !> The cell-boundary values of the geometrical source flux that are computed @@ -78,7 +77,7 @@ module m_riemann_solvers real(wp), allocatable, dimension(:, :, :, :) :: flux_gsrc_rsx_vf !< real(wp), allocatable, dimension(:, :, :, :) :: flux_gsrc_rsy_vf !< real(wp), allocatable, dimension(:, :, :, :) :: flux_gsrc_rsz_vf !< - !$acc declare create( flux_gsrc_rsx_vf, flux_gsrc_rsy_vf, flux_gsrc_rsz_vf ) + $:DECLARE(create=["flux_gsrc_rsx_vf","flux_gsrc_rsy_vf","flux_gsrc_rsz_vf"]) !> @} ! The cell-boundary values of the velocity. vel_src_rs_vf is determined as @@ -87,17 +86,17 @@ module m_riemann_solvers real(wp), allocatable, dimension(:, :, :, :) :: vel_src_rsx_vf real(wp), allocatable, dimension(:, :, :, :) :: vel_src_rsy_vf real(wp), allocatable, dimension(:, :, :, :) :: vel_src_rsz_vf - !$acc declare create(vel_src_rsx_vf, vel_src_rsy_vf, vel_src_rsz_vf) + $:DECLARE(create=["vel_src_rsx_vf","vel_src_rsy_vf","vel_src_rsz_vf"]) real(wp), allocatable, dimension(:, :, :, :) :: mom_sp_rsx_vf real(wp), allocatable, dimension(:, :, :, :) :: mom_sp_rsy_vf real(wp), allocatable, dimension(:, :, :, :) :: mom_sp_rsz_vf - !$acc declare create(mom_sp_rsx_vf, mom_sp_rsy_vf, mom_sp_rsz_vf) + $:DECLARE(create=["mom_sp_rsx_vf","mom_sp_rsy_vf","mom_sp_rsz_vf"]) real(wp), allocatable, dimension(:, :, :, :) :: Re_avg_rsx_vf real(wp), allocatable, dimension(:, :, :, :) :: Re_avg_rsy_vf real(wp), allocatable, dimension(:, :, :, :) :: Re_avg_rsz_vf - !$acc declare create(Re_avg_rsx_vf, Re_avg_rsy_vf, Re_avg_rsz_vf) + $:DECLARE(create=["Re_avg_rsx_vf","Re_avg_rsy_vf","Re_avg_rsz_vf"]) !> @name Indical bounds in the s1-, s2- and s3-directions !> @{ @@ -105,13 +104,13 @@ module m_riemann_solvers type(int_bounds_info) :: isx, isy, isz !> @} - !$acc declare create(is1, is2, is3, isx, isy, isz) + $:DECLARE(create=["is1","is2","is3","isx","isy","isz"]) real(wp), allocatable, dimension(:) :: Gs - !$acc declare create(Gs) + $:DECLARE(create=["Gs"]) real(wp), allocatable, dimension(:, :) :: Res - !$acc declare create(Res) + $:DECLARE(create=["Res"]) contains diff --git a/src/simulation/m_surface_tension.fpp b/src/simulation/m_surface_tension.fpp index a3be874f86..4ab6c23a7c 100644 --- a/src/simulation/m_surface_tension.fpp +++ b/src/simulation/m_surface_tension.fpp @@ -30,16 +30,16 @@ module m_surface_tension !> @{ type(scalar_field), allocatable, dimension(:) :: c_divs !> @) - !$acc declare create(c_divs) + $:DECLARE(create=["c_divs"]) !> @name cell boundary reconstructed gradient components and magnitude !> @{ real(wp), allocatable, dimension(:, :, :, :) :: gL_x, gR_x, gL_y, gR_y, gL_z, gR_z !> @} - !$acc declare create(gL_x, gR_x, gL_y, gR_y, gL_z, gR_z) + $:DECLARE(create=["gL_x","gR_x","gL_y","gR_y","gL_z","gR_z"]) type(int_bounds_info) :: is1, is2, is3, iv - !$acc declare create(is1, is2, is3, iv) + $:DECLARE(create=["is1","is2","is3","iv"]) contains diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 7c1c986ac9..b19600c6fd 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -76,7 +76,7 @@ module m_time_steppers integer, private :: num_ts !< !! Number of time stages in the time-stepping scheme - !$acc declare create(q_cons_ts, q_prim_vf, q_T_sf, rhs_vf, q_prim_ts, rhs_mv, rhs_pb, max_dt) + $:DECLARE(create=["q_cons_ts","q_prim_vf","q_T_sf","rhs_vf","q_prim_ts","rhs_mv","rhs_pb","max_dt"]) contains diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp index 84eb8a4329..323444c1ee 100644 --- a/src/simulation/m_viscous.fpp +++ b/src/simulation/m_viscous.fpp @@ -25,10 +25,10 @@ module m_viscous type(int_bounds_info) :: iv type(int_bounds_info) :: is1_viscous, is2_viscous, is3_viscous - !$acc declare create(is1_viscous, is2_viscous, is3_viscous, iv) + $:DECLARE(create=["is1_viscous","is2_viscous","is3_viscous","iv"]) real(wp), allocatable, dimension(:, :) :: Res_viscous - !$acc declare create(Res_viscous) + $:DECLARE(create=["Res_viscous"]) contains diff --git a/src/simulation/m_weno.fpp b/src/simulation/m_weno.fpp index 830c852c83..9c9778a405 100644 --- a/src/simulation/m_weno.fpp +++ b/src/simulation/m_weno.fpp @@ -43,6 +43,7 @@ module m_weno !> @{ real(wp), allocatable, dimension(:, :, :, :) :: v_rs_ws_x, v_rs_ws_y, v_rs_ws_z !> @} + $:DECLARE(create=["v_rs_ws_x","v_rs_ws_y","v_rs_ws_z"]) ! WENO Coefficients @@ -59,6 +60,8 @@ module m_weno real(wp), target, allocatable, dimension(:, :, :) :: poly_coef_cbR_y real(wp), target, allocatable, dimension(:, :, :) :: poly_coef_cbR_z !> @} + $:DECLARE(create=["poly_coef_cbL_x","poly_coef_cbL_y","poly_coef_cbL_z"]) + $:DECLARE(create=["poly_coef_cbR_x","poly_coef_cbR_y","poly_coef_cbR_z"]) !> @name The ideal weights at the left and the right cell-boundaries and at the !! left and the right quadrature points, in x-, y- and z-directions. Note @@ -73,6 +76,7 @@ module m_weno real(wp), target, allocatable, dimension(:, :) :: d_cbR_y real(wp), target, allocatable, dimension(:, :) :: d_cbR_z !> @} + $:DECLARE(create=["d_cbL_x","d_cbL_y","d_cbL_z","d_cbR_x","d_cbR_y","d_cbR_z"]) !> @name Smoothness indicator coefficients in the x-, y-, and z-directions. Note !! that the first array dimension identifies the smoothness indicator, the @@ -83,27 +87,22 @@ module m_weno real(wp), target, allocatable, dimension(:, :, :) :: beta_coef_y real(wp), target, allocatable, dimension(:, :, :) :: beta_coef_z !> @} + $:DECLARE(create=["beta_coef_x","beta_coef_y","beta_coef_z"]) ! END: WENO Coefficients integer :: v_size !< Number of WENO-reconstructed cell-average variables - !$acc declare create(v_size) + $:DECLARE(create=["v_size"]) !> @name Indical bounds in the s1-, s2- and s3-directions !> @{ type(int_bounds_info) :: is1_weno, is2_weno, is3_weno - !$acc declare create(is1_weno, is2_weno, is3_weno) + $:DECLARE(create=["is1_weno","is2_weno","is3_weno"]) ! !> @} real(wp) :: test - !$acc declare create(test) - - !$acc declare create( & - !$acc v_rs_ws_x, v_rs_ws_y, v_rs_ws_z, & - !$acc poly_coef_cbL_x,poly_coef_cbL_y,poly_coef_cbL_z, & - !$acc poly_coef_cbR_x,poly_coef_cbR_y,poly_coef_cbR_z,d_cbL_x, & - !$acc d_cbL_y,d_cbL_z,d_cbR_x,d_cbR_y,d_cbR_z,beta_coef_x,beta_coef_y,beta_coef_z) + $:DECLARE(create=["test"]) contains From bb0d2ed432753d1652a1f4094e58ae409c117263 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Wed, 11 Jun 2025 13:42:59 -0400 Subject: [PATCH 19/75] Added update device macros --- src/common/m_mpi_common.fpp | 8 ++-- src/common/m_variables_conversion.fpp | 8 ++-- src/simulation/m_acoustic_src.fpp | 15 ++++--- src/simulation/m_body_forces.fpp | 2 +- src/simulation/m_boundary_conditions.fpp | 9 +++-- src/simulation/m_bubbles_EE.fpp | 4 +- src/simulation/m_bubbles_EL.fpp | 10 ++--- src/simulation/m_cbc.fpp | 24 ++++++----- src/simulation/m_fftw.fpp | 6 +-- src/simulation/m_global_parameters.fpp | 51 ++++++++++++++---------- src/simulation/m_hyperelastic.fpp | 8 ++-- src/simulation/m_hypoelastic.fpp | 8 ++-- src/simulation/m_ibm.fpp | 14 +++---- src/simulation/m_mhd.fpp | 6 +-- src/simulation/m_mpi_proxy.fpp | 12 +++--- src/simulation/m_qbmm.fpp | 10 ++--- src/simulation/m_rhs.fpp | 8 ++-- src/simulation/m_riemann_solvers.fpp | 12 +++--- src/simulation/m_sim_helpers.fpp | 6 +-- src/simulation/m_start_up.fpp | 45 ++++++++++++--------- src/simulation/m_surface_tension.fpp | 2 +- src/simulation/m_time_steppers.fpp | 2 +- src/simulation/m_viscous.fpp | 18 ++++----- src/simulation/m_weno.fpp | 10 ++--- 24 files changed, 162 insertions(+), 136 deletions(-) diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index 1706b848c2..c580281ef2 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -643,7 +643,7 @@ contains #ifdef MFC_MPI call nvtxStartRange("RHS-COMM-PACKBUF") -!$acc update device(v_size) +$:UPDATE(device=["v_size"]) #ifdef MFC_SIMULATION if (qbmm .and. .not. polytropic) then @@ -878,7 +878,7 @@ contains !$acc wait #:else call nvtxStartRange("RHS-COMM-HOST2DEV") - !$acc update device(buff_recv) + $:UPDATE(device=["buff_recv"]) call nvtxEndRange #:endif end if @@ -1093,7 +1093,7 @@ contains #ifdef MFC_MPI nVars = num_dims + 1 - !$acc update device(nVars) + $:UPDATE(device=["nVars"]) buffer_counts = (/ & buff_size*nVars*(n + 1)*(p + 1), & @@ -1208,7 +1208,7 @@ contains !$acc wait #:else call nvtxStartRange("RHS-COMM-HOST2DEV") - !$acc update device(buff_recv) + $:UPDATE(device=["buff_recv"]) call nvtxEndRange #:endif end if diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp index d75d69528b..7c49767723 100644 --- a/src/common/m_variables_conversion.fpp +++ b/src/common/m_variables_conversion.fpp @@ -653,7 +653,7 @@ contains qvs(i) = fluid_pp(i)%qv qvps(i) = fluid_pp(i)%qvp end do -!$acc update device(gammas, gs_min, pi_infs, ps_inf, cvs, qvs, qvps, Gs) +$:UPDATE(device=["gammas","gs_min","pi_infs","ps_inf","cvs","qvs","qvps","Gs"]) #ifdef MFC_SIMULATION @@ -665,7 +665,7 @@ contains end do end do - !$acc update device(Res, Re_idx, Re_size) + $:UPDATE(device=["Res","Re_idx","Re_size"]) end if #endif @@ -679,7 +679,7 @@ contains do i = 1, nb bubrs(i) = bub_idx%rs(i) end do - !$acc update device(bubrs) + $:UPDATE(device=["bubrs"]) end if #ifdef MFC_POST_PROCESS @@ -1476,7 +1476,7 @@ contains is2b = is2%beg; is2e = is2%end is3b = is3%beg; is3e = is3%end - !$acc update device(is1b, is2b, is3b, is1e, is2e, is3e) + $:UPDATE(device=["is1b","is2b","is3b","is1e","is2e","is3e"]) ! Computing the flux variables from the primitive variables, without ! accounting for the contribution of either viscosity or capillarity diff --git a/src/simulation/m_acoustic_src.fpp b/src/simulation/m_acoustic_src.fpp index 0a03a1b7f9..a779c0e5d5 100644 --- a/src/simulation/m_acoustic_src.fpp +++ b/src/simulation/m_acoustic_src.fpp @@ -111,7 +111,12 @@ contains delay(i) = acoustic(i)%delay end if end do - !$acc update device(loc_acoustic, mag, dipole, support, length, height, wavelength, frequency, gauss_sigma_dist, gauss_sigma_time, foc_length, aperture, npulse, pulse, dir, delay, element_polygon_ratio, rotate_angle, element_spacing_angle, num_elements, element_on, bb_num_freq, bb_bandwidth, bb_lowest_freq) + $:UPDATE(device=["loc_acoustic","mag","dipole","support","length", & + & "height","wavelength","frequency","gauss_sigma_dist", & + & "gauss_sigma_time","foc_length","aperture","npulse","pulse", & + & "dir","delay","element_polygon_ratio","rotate_angle", & + & "element_spacing_angle","num_elements","element_on", & + & "bb_num_freq","bb_bandwidth","bb_lowest_freq"]) @:ALLOCATE(mass_src(0:m, 0:n, 0:p)) @:ALLOCATE(mom_src(1:num_vels, 0:m, 0:n, 0:p)) @@ -469,14 +474,14 @@ contains call s_mpi_abort('Fatal Error: Inconsistent allocation of source_spatials') end if - !$acc update device(source_spatials(ai)%coord) - !$acc update device(source_spatials(ai)%val) + $:UPDATE(device=["source_spatials(ai)%coord"]) + $:UPDATE(device=["source_spatials(ai)%val"]) if (support(ai) >= 5) then if (dim == 2) then - !$acc update device(source_spatials(ai)%angle) + $:UPDATE(device=["source_spatials(ai)%angle"]) end if if (dim == 3) then - !$acc update device(source_spatials(ai)%xyz_to_r_ratios) + $:UPDATE(device=["source_spatials(ai)%xyz_to_r_ratios"]) end if end if diff --git a/src/simulation/m_body_forces.fpp b/src/simulation/m_body_forces.fpp index 17e700fcfe..b0d7cda444 100644 --- a/src/simulation/m_body_forces.fpp +++ b/src/simulation/m_body_forces.fpp @@ -68,7 +68,7 @@ contains end if end if - !$acc update device(accel_bf) + $:UPDATE(device=["accel_bf"]) end subroutine s_compute_acceleration diff --git a/src/simulation/m_boundary_conditions.fpp b/src/simulation/m_boundary_conditions.fpp index d0666e2ca7..ad50b559ac 100644 --- a/src/simulation/m_boundary_conditions.fpp +++ b/src/simulation/m_boundary_conditions.fpp @@ -2,6 +2,7 @@ ! @brief Contains module m_boundary_conditions !> @brief This module contains +#:include 'directive_macros.fpp' module m_boundary_conditions use m_derived_types @@ -39,7 +40,7 @@ contains do dir = 1, num_dims do loc = -1, 1, 2 read (1) bc_type(dir, loc)%sf - !$acc update device(bc_type(dir, loc)%sf) + $:UPDATE(device=["bc_type(dir, loc)%sf"]) end do end do close (1) @@ -55,7 +56,7 @@ contains do dir = 1, num_dims do loc = -1, 1, 2 read (1) bc_buffers(dir, loc)%sf - !$acc update device(bc_buffers(dir, loc)%sf) + $:UPDATE(device=["bc_buffers(dir, loc)%sf"]) end do end do close (1) @@ -103,7 +104,7 @@ contains call MPI_File_set_view(file_id, int(offset, KIND=MPI_ADDRESS_KIND), MPI_INTEGER, MPI_BC_TYPE_TYPE(dir, loc), 'native', MPI_INFO_NULL, ierr) call MPI_File_read_all(file_id, bc_type(dir, loc)%sf, 1, MPI_BC_TYPE_TYPE(dir, loc), MPI_STATUS_IGNORE, ierr) offset = offset + sizeof(bc_type(dir, loc)%sf) - !$acc update device(bc_type(dir, loc)%sf) + $:UPDATE(device=["bc_type(dir, loc)%sf"]) end do end do @@ -113,7 +114,7 @@ contains call MPI_File_set_view(file_id, int(offset, KIND=MPI_ADDRESS_KIND), mpi_p, MPI_BC_BUFFER_TYPE(dir, loc), 'native', MPI_INFO_NULL, ierr) call MPI_File_read_all(file_id, bc_buffers(dir, loc)%sf, 1, MPI_BC_BUFFER_TYPE(dir, loc), MPI_STATUS_IGNORE, ierr) offset = offset + sizeof(bc_buffers(dir, loc)%sf) - !$acc update device(bc_buffers(dir, loc)%sf) + $:UPDATE(device=["bc_buffers(dir, loc)%sf"]) end do end do diff --git a/src/simulation/m_bubbles_EE.fpp b/src/simulation/m_bubbles_EE.fpp index fd770ebcf1..714f95913c 100644 --- a/src/simulation/m_bubbles_EE.fpp +++ b/src/simulation/m_bubbles_EE.fpp @@ -52,9 +52,9 @@ contains end if end do - !$acc update device(rs, vs) + $:UPDATE(device=["rs", "vs"]) if (.not. polytropic) then - !$acc update device(ps, ms) + $:UPDATE(device=["ps", "ms"]) end if @:ALLOCATE(divu%sf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, idwbuff(3)%beg:idwbuff(3)%end)) diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index af9aa96436..7f9d63c994 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -101,7 +101,7 @@ contains call s_mpi_abort('Please check the lag_params%solver_approach input') end if - !$acc update device(lag_num_ts, q_beta_idx) + $:UPDATE(device=["lag_num_ts", "q_beta_idx"]) @:ALLOCATE(q_beta%vf(1:q_beta_idx)) @@ -249,7 +249,7 @@ contains print *, " Lagrange bubbles running, in proc", proc_rank, "number:", bub_id, "/", id - !$acc update device(bubbles_lagrange, lag_params) + $:UPDATE(device=["bubbles_lagrange", "lag_params"]) !$acc update device(lag_id, bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, gas_betaC, & !$acc bub_dphidt, gas_p, gas_mv, intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, & @@ -257,9 +257,9 @@ contains Rmax_glb = min(dflt_real, -dflt_real) Rmin_glb = max(dflt_real, -dflt_real) - !$acc update device(Rmax_glb, Rmin_glb) + $:UPDATE(device=["Rmax_glb", "Rmin_glb"]) - !$acc update device(dx, dy, dz, x_cb, x_cc, y_cb, y_cc, z_cb, z_cc) + $:UPDATE(device=["dx","dy","dz","x_cb","x_cc","y_cb","y_cc","z_cb","z_cc"]) !Populate temporal variables call s_transfer_data_to_tmp() @@ -1676,7 +1676,7 @@ contains end do nBubs = nBubs - 1 - !$acc update device(nBubs) + $:UPDATE(device=["nBubs"]) end subroutine s_remove_lag_bubble diff --git a/src/simulation/m_cbc.fpp b/src/simulation/m_cbc.fpp index 28fe01bea8..e306717513 100644 --- a/src/simulation/m_cbc.fpp +++ b/src/simulation/m_cbc.fpp @@ -139,7 +139,7 @@ contains else flux_cbc_index = adv_idx%end end if - !$acc update device(flux_cbc_index) + $:UPDATE(device=["flux_cbc_index"]) call s_any_cbc_boundaries(is_cbc) @@ -389,7 +389,8 @@ contains end if - !$acc update device(fd_coef_x, fd_coef_y, fd_coef_z, pi_coef_x, pi_coef_y, pi_coef_z) + $:UPDATE(device=["fd_coef_x","fd_coef_y","fd_coef_z", & + & "pi_coef_x","pi_coef_y","pi_coef_z"]) ! Associating the procedural pointer to the appropriate subroutine ! that will be utilized in the conversion to the mixture variables @@ -397,20 +398,20 @@ contains bcxb = bc_x%beg bcxe = bc_x%end - !$acc update device(bcxb, bcxe) + $:UPDATE(device=["bcxb", "bcxe"]) if (n > 0) then bcyb = bc_y%beg bcye = bc_y%end - !$acc update device(bcyb, bcye) + $:UPDATE(device=["bcyb", "bcye"]) end if if (p > 0) then bczb = bc_z%beg bcze = bc_z%end - !$acc update device(bczb, bcze) + $:UPDATE(device=["bczb", "bcze"]) end if ! Allocate GRCBC inputs @@ -442,7 +443,8 @@ contains end do end if #:endfor - !$acc update device(vel_in, vel_out, pres_in, pres_out, Del_in, Del_out, alpha_rho_in, alpha_in) + $:UPDATE(device=["vel_in","vel_out","pres_in","pres_out", & + & "Del_in","Del_out","alpha_rho_in","alpha_in"]) end subroutine s_initialize_cbc_module @@ -606,7 +608,7 @@ contains end if - !$acc update device(ds) + $:UPDATE(device=["ds"]) end subroutine s_associate_cbc_coefficients_pointers @@ -682,7 +684,7 @@ contains cbc_dir = cbc_dir_norm cbc_loc = cbc_loc_norm - !$acc update device(cbc_dir, cbc_loc) + $:UPDATE(device=["cbc_dir", "cbc_loc"]) call s_initialize_cbc(q_prim_vf, flux_vf, flux_src_vf, & ix, iy, iz) @@ -1163,8 +1165,8 @@ contains end if dj = max(0, cbc_loc) - !$acc update device(is1, is2, is3, dj) - !$acc update device( dir_idx, dir_flg) + $:UPDATE(device=["is1","is2","is3","dj"]) + $:UPDATE(device=["dir_idx","dir_flg"]) ! Reshaping Inputted Data in x-direction if (cbc_dir == 1) then @@ -1420,7 +1422,7 @@ contains ! Determining the indicial shift based on CBC location dj = max(0, cbc_loc) - !$acc update device(dj) + $:UPDATE(device=["dj"]) ! Reshaping Outputted Data in x-direction if (cbc_dir == 1) then diff --git a/src/simulation/m_fftw.fpp b/src/simulation/m_fftw.fpp index c0cdc226ea..c8170d39a1 100644 --- a/src/simulation/m_fftw.fpp +++ b/src/simulation/m_fftw.fpp @@ -91,7 +91,7 @@ contains iembed(1) = 0 oembed(1) = 0 !$acc enter data copyin(real_size, cmplx_size, x_size, sys_size, batch_size, Nfq) - !$acc update device(real_size, cmplx_size, x_size, sys_size, batch_size) + $:UPDATE(device=["real_size","cmplx_size","x_size","sys_size","batch_size"]) #else ! Allocate input and output DFT data sizes fftw_real_data = fftw_alloc_real(int(real_size, c_size_t)) @@ -172,7 +172,7 @@ contains #endif !$acc end host_data Nfq = 3 - !$acc update device(Nfq) + $:UPDATE(device=["Nfq"]) $:PARALLEL_LOOP(collapse=3) do k = 1, sys_size @@ -232,7 +232,7 @@ contains !$acc end host_data Nfq = min(floor(2_dp*real(i, dp)*pi), cmplx_size) - !$acc update device(Nfq) + $:UPDATE(device=["Nfq"]) $:PARALLEL_LOOP(collapse=3) do k = 1, sys_size diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index 348f9d542c..b93f023d05 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -803,10 +803,10 @@ contains else weno_num_stencils = weno_polyn end if - !$acc update device(weno_polyn) - !$acc update device(weno_num_stencils) - !$acc update device(nb) - !$acc update device(num_dims, num_vels, num_fluids) + $:UPDATE(device=["weno_polyn"]) + $:UPDATE(device=["weno_num_stencils"]) + $:UPDATE(device=["nb"]) + $:UPDATE(device=["num_dims","num_vels","num_fluids"]) #:endif ! Initializing the number of fluids for which viscous effects will @@ -1042,7 +1042,7 @@ contains if (Re_size(1) > 0._wp) shear_stress = .true. if (Re_size(2) > 0._wp) bulk_stress = .true. - !$acc update device(Re_size, viscous, shear_stress, bulk_stress) + $:UPDATE(device=["Re_size","viscous","shear_stress","bulk_stress"]) ! Bookkeeping the indexes of any viscous fluids and any pairs of ! fluids whose interface will support effects of surface tension @@ -1098,7 +1098,7 @@ contains ! y-dir: flip tau_xy and tau_yz ! z-dir: flip tau_xz and tau_yz end if - !$acc update device(shear_num, shear_indices, shear_BC_flip_num, shear_BC_flip_indices) + $:UPDATE(device=["shear_num","shear_indices","shear_BC_flip_num","shear_BC_flip_indices"]) end if if (hyperelasticity) then @@ -1165,7 +1165,7 @@ contains ! cell-boundary values or otherwise, the unaltered left and right, ! WENO-reconstructed, cell-boundary values wa_flg = 0._wp; if (weno_avg) wa_flg = 1._wp - !$acc update device(wa_flg) + $:UPDATE(device=["wa_flg"]) ! Resort to default WENO-JS if no other WENO scheme is selected #:if not MFC_CASE_OPTIMIZATION @@ -1175,7 +1175,7 @@ contains if (ib) allocate (MPI_IO_IB_DATA%var%sf(0:m, 0:n, 0:p)) Np = 0 - !$acc update device(Re_size) + $:UPDATE(device=["Re_size"]) if (elasticity) then fd_number = max(1, fd_order/2) @@ -1193,7 +1193,7 @@ contains idwint, idwbuff, viscous, & bubbles_lagrange, m, n, p, & num_dims) - !$acc update device(idwint, idwbuff) + $:UPDATE(device=["idwint", "idwbuff"]) ! Configuring Coordinate Direction Indexes if (bubbles_euler) then @@ -1203,7 +1203,7 @@ contains & idwbuff(3)%beg:idwbuff(3)%end)) end if - !$acc update device(fd_order,fd_number) + $:UPDATE(device=["fd_order", "fd_number"]) if (cyl_coord .neqv. .true.) then ! Cartesian grid grid_geometry = 1 @@ -1230,23 +1230,32 @@ contains chemxb = species_idx%beg chemxe = species_idx%end - !$acc update device(momxb, momxe, advxb, advxe, contxb, contxe, bubxb, bubxe, intxb, intxe, sys_size, buff_size, E_idx, alf_idx, n_idx, adv_n, adap_dt, pi_fac, strxb, strxe, chemxb, chemxe, c_idx) - !$acc update device(b_size, xibeg, xiend, tensor_size) + $:UPDATE(device=["momxb","momxe","advxb","advxe","contxb","contxe", & + & "bubxb","bubxe","intxb","intxe","sys_size","buff_size","E_idx", & + & "alf_idx","n_idx","adv_n","adap_dt","pi_fac","strxb","strxe", & + & "chemxb","chemxe","c_idx"]) + $:UPDATE(device=["b_size","xibeg","xiend","tensor_size"]) - !$acc update device(species_idx) - !$acc update device(cfl_target, m, n, p) + $:UPDATE(device=["species_idx"]) + $:UPDATE(device=["cfl_target","m","n","p"]) - !$acc update device(alt_soundspeed, acoustic_source, num_source) - !$acc update device(dt, sys_size, buff_size, pref, rhoref, gamma_idx, pi_inf_idx, E_idx, alf_idx, stress_idx, mpp_lim, bubbles_euler, hypoelasticity, alt_soundspeed, avg_state, num_fluids, model_eqns, num_dims, num_vels, mixture_err, grid_geometry, cyl_coord, mp_weno, weno_eps, teno_CT, hyperelasticity, hyper_model, elasticity, xi_idx, B_idx, low_Mach) + $:UPDATE(device=["alt_soundspeed","acoustic_source","num_source"]) + $:UPDATE(device=["dt","sys_size","buff_size","pref","rhoref", & + & "gamma_idx","pi_inf_idx","E_idx","alf_idx","stress_idx", & + & "mpp_lim","bubbles_euler","hypoelasticity","alt_soundspeed", & + & "avg_state","num_fluids","model_eqns","num_dims","num_vels", & + & "mixture_err","grid_geometry","cyl_coord","mp_weno","weno_eps", & + & "teno_CT","hyperelasticity","hyper_model","elasticity","xi_idx", & + & "B_idx","low_Mach"]) - !$acc update device(Bx0, powell) + $:UPDATE(device=["Bx0", "powell"]) - !$acc update device(cont_damage, tau_star, cont_damage_s, alpha_bar) + $:UPDATE(device=["cont_damage","tau_star","cont_damage_s","alpha_bar"]) #:if not MFC_CASE_OPTIMIZATION - !$acc update device(wenojs, mapped_weno, wenoz, teno) - !$acc update device(wenoz_q) - !$acc update device(mhd, relativity) + $:UPDATE(device=["wenojs","mapped_weno","wenoz","teno"]) + $:UPDATE(device=["wenoz_q"]) + $:UPDATE(device=["mhd", "relativity"]) #:endif !$acc enter data copyin(nb, R0ref, Ca, Web, Re_inv, weight, R0, V0, bubbles_euler, polytropic, polydisperse, qbmm, R0_type, ptil, bubble_model, thermal, poly_sigma) diff --git a/src/simulation/m_hyperelastic.fpp b/src/simulation/m_hyperelastic.fpp index 11091627e8..efe984ef05 100644 --- a/src/simulation/m_hyperelastic.fpp +++ b/src/simulation/m_hyperelastic.fpp @@ -60,7 +60,7 @@ contains do i = 1, num_fluids Gs(i) = fluid_pp(i)%G end do - !$acc update device(Gs) + $:UPDATE(device=["Gs"]) @:ALLOCATE(fd_coeff_x(-fd_number:fd_number, 0:m)) if (n > 0) then @@ -73,16 +73,16 @@ contains ! Computing centered finite difference coefficients call s_compute_finite_difference_coefficients(m, x_cc, fd_coeff_x, buff_size, & fd_number, fd_order) - !$acc update device(fd_coeff_x) + $:UPDATE(device=["fd_coeff_x"]) if (n > 0) then call s_compute_finite_difference_coefficients(n, y_cc, fd_coeff_y, buff_size, & fd_number, fd_order) - !$acc update device(fd_coeff_y) + $:UPDATE(device=["fd_coeff_y"]) end if if (p > 0) then call s_compute_finite_difference_coefficients(p, z_cc, fd_coeff_z, buff_size, & fd_number, fd_order) - !$acc update device(fd_coeff_z) + $:UPDATE(device=["fd_coeff_z"]) end if end subroutine s_initialize_hyperelastic_module diff --git a/src/simulation/m_hypoelastic.fpp b/src/simulation/m_hypoelastic.fpp index 430b69d5e5..e954eebbc8 100644 --- a/src/simulation/m_hypoelastic.fpp +++ b/src/simulation/m_hypoelastic.fpp @@ -56,7 +56,7 @@ contains do i = 1, num_fluids Gs(i) = fluid_pp(i)%G end do - !$acc update device(Gs) + $:UPDATE(device=["Gs"]) @:ALLOCATE(fd_coeff_x_h(-fd_number:fd_number, 0:m)) if (n > 0) then @@ -69,16 +69,16 @@ contains ! Computing centered finite difference coefficients call s_compute_finite_difference_coefficients(m, x_cc, fd_coeff_x_h, buff_size, & fd_number, fd_order) - !$acc update device(fd_coeff_x_h) + $:UPDATE(device=["fd_coeff_x_h"]) if (n > 0) then call s_compute_finite_difference_coefficients(n, y_cc, fd_coeff_y_h, buff_size, & fd_number, fd_order) - !$acc update device(fd_coeff_y_h) + $:UPDATE(device=["fd_coeff_y_h"]) end if if (p > 0) then call s_compute_finite_difference_coefficients(p, z_cc, fd_coeff_z_h, buff_size, & fd_number, fd_order) - !$acc update device(fd_coeff_z_h) + $:UPDATE(device=["fd_coeff_z_h"]) end if end subroutine s_initialize_hypoelastic_module diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index a3538b2a7e..cd09f74d1f 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -81,9 +81,9 @@ contains integer :: i, j, k - !$acc update device(ib_markers%sf) - !$acc update device(levelset%sf) - !$acc update device(levelset_norm%sf) + $:UPDATE(device=["ib_markers%sf"]) + $:UPDATE(device=["levelset%sf"]) + $:UPDATE(device=["levelset_norm%sf"]) ! Get neighboring IB variables from other processors call s_mpi_sendrecv_ib_buffers(ib_markers, gp_layers) @@ -92,20 +92,20 @@ contains call s_find_num_ghost_points(num_gps, num_inner_gps) - !$acc update device(num_gps, num_inner_gps) + $:UPDATE(device=["num_gps", "num_inner_gps"]) @:ALLOCATE(ghost_points(1:num_gps)) @:ALLOCATE(inner_points(1:num_inner_gps)) !$acc enter data copyin(ghost_points, inner_points) call s_find_ghost_points(ghost_points, inner_points) - !$acc update device(ghost_points, inner_points) + $:UPDATE(device=["ghost_points", "inner_points"]) call s_compute_image_points(ghost_points, levelset, levelset_norm) - !$acc update device(ghost_points) + $:UPDATE(device=["ghost_points"]) call s_compute_interpolation_coeffs(ghost_points) - !$acc update device(ghost_points) + $:UPDATE(device=["ghost_points"]) end subroutine s_ibm_setup diff --git a/src/simulation/m_mhd.fpp b/src/simulation/m_mhd.fpp index a32412398c..6a6821e602 100644 --- a/src/simulation/m_mhd.fpp +++ b/src/simulation/m_mhd.fpp @@ -53,12 +53,12 @@ contains ! Computing centered finite difference coefficients call s_compute_finite_difference_coefficients(m, x_cc, fd_coeff_x_h, buff_size, fd_number, fd_order) - !$acc update device(fd_coeff_x_h) + $:UPDATE(device=["fd_coeff_x_h"]) call s_compute_finite_difference_coefficients(n, y_cc, fd_coeff_y_h, buff_size, fd_number, fd_order) - !$acc update device(fd_coeff_y_h) + $:UPDATE(device=["fd_coeff_y_h"]) if (p > 0) then call s_compute_finite_difference_coefficients(p, z_cc, fd_coeff_z_h, buff_size, fd_number, fd_order) - !$acc update device(fd_coeff_z_h) + $:UPDATE(device=["fd_coeff_z_h"]) end if end subroutine s_initialize_mhd_powell_module diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index f7c05cd07f..1eeec98361 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -909,7 +909,7 @@ contains #if defined(MFC_OpenACC) if (rdma_mpi .eqv. .false.) then - !$acc update device(ib_buff_recv) + $:UPDATE(device=["ib_buff_recv"]) end if #endif @@ -1038,7 +1038,7 @@ contains end if if (rdma_mpi .eqv. .false.) then - !$acc update device(ib_buff_recv) + $:UPDATE(device=["ib_buff_recv"]) end if ! Unpacking buffer received from bc_x%end @@ -1175,7 +1175,7 @@ contains #if defined(MFC_OpenACC) if (rdma_mpi .eqv. .false.) then - !$acc update device(ib_buff_recv) + $:UPDATE(device=["ib_buff_recv"]) end if #endif @@ -1311,7 +1311,7 @@ contains #if defined(MFC_OpenACC) if (rdma_mpi .eqv. .false.) then - !$acc update device(ib_buff_recv) + $:UPDATE(device=["ib_buff_recv"]) end if #endif @@ -1450,7 +1450,7 @@ contains #if defined(MFC_OpenACC) if (rdma_mpi .eqv. .false.) then - !$acc update device(ib_buff_recv) + $:UPDATE(device=["ib_buff_recv"]) end if #endif @@ -1586,7 +1586,7 @@ contains #if defined(MFC_OpenACC) if (rdma_mpi .eqv. .false.) then - !$acc update device(ib_buff_recv) + $:UPDATE(device=["ib_buff_recv"]) end if #endif diff --git a/src/simulation/m_qbmm.fpp b/src/simulation/m_qbmm.fpp index 863324b71d..912ab99e13 100644 --- a/src/simulation/m_qbmm.fpp +++ b/src/simulation/m_qbmm.fpp @@ -59,7 +59,7 @@ contains end if !$acc enter data copyin(nterms) - !$acc update device(nterms) + $:UPDATE(device=["nterms"]) #:endif @@ -393,7 +393,7 @@ contains end do end if - !$acc update device(momrhs) + $:UPDATE(device=["momrhs"]) @:ALLOCATE(bubrs(1:nb)) @:ALLOCATE(bubmoms(1:nb, 1:nmom)) @@ -401,14 +401,14 @@ contains do i = 1, nb bubrs(i) = bub_idx%rs(i) end do - !$acc update device(bubrs) + $:UPDATE(device=["bubrs"]) do j = 1, nmom do i = 1, nb bubmoms(i, j) = bub_idx%moms(i, j) end do end do - !$acc update device(bubmoms) + $:UPDATE(device=["bubmoms"]) end subroutine s_initialize_qbmm_module @@ -838,7 +838,7 @@ contains is1_qbmm = ix; is2_qbmm = iy; is3_qbmm = iz - !$acc update device(is1_qbmm, is2_qbmm, is3_qbmm) + $:UPDATE(device=["is1_qbmm","is2_qbmm","is3_qbmm"]) $:PARALLEL_LOOP(collapse=3, private=["moms", "msum", "wght", "abscX", & "abscY", "wght_pb", "wght_mv", "wght_ht", "coeff", "ht", "r", "q", & diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index 54899dc0fa..cbc50298ba 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -180,7 +180,7 @@ contains integer :: num_eqns_after_adv !$acc enter data copyin(idwbuff, idwbuff) - !$acc update device(idwbuff, idwbuff) + $:UPDATE(device=["idwbuff", "idwbuff"]) @:ALLOCATE(q_cons_qp%vf(1:sys_size)) @:ALLOCATE(q_prim_qp%vf(1:sys_size)) @@ -578,7 +578,7 @@ contains gamma_min(i) = 1._wp/fluid_pp(i)%gamma + 1._wp pres_inf(i) = fluid_pp(i)%pi_inf/(1._wp + fluid_pp(i)%gamma) end do - !$acc update device(gamma_min, pres_inf) + $:UPDATE(device=["gamma_min", "pres_inf"]) if (viscous) then @:ALLOCATE(Res(1:2, 1:maxval(Re_size))) @@ -590,7 +590,7 @@ contains Res(i, j) = fluid_pp(Re_idx(i, j))%Re(i) end do end do - !$acc update device(Res, Re_idx, Re_size) + $:UPDATE(device=["Res","Re_idx","Re_size"]) end if $:PARALLEL_LOOP(collapse=4) @@ -2005,7 +2005,7 @@ contains end if - !$acc update device(is1, is2, is3, iv) + $:UPDATE(device=["is1","is2","is3","iv"]) if (recon_dir == 1) then !$acc parallel loop collapse(4) default(present) diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp index a593a36c71..1f52925f00 100644 --- a/src/simulation/m_riemann_solvers.fpp +++ b/src/simulation/m_riemann_solvers.fpp @@ -3335,7 +3335,7 @@ contains do i = 1, num_fluids Gs(i) = fluid_pp(i)%G end do - !$acc update device(Gs) + $:UPDATE(device=["Gs"]) if (viscous) then @:ALLOCATE(Res(1:2, 1:maxval(Re_size))) @@ -3347,7 +3347,7 @@ contains Res(i, j) = fluid_pp(Re_idx(i, j))%Re(i) end do end do - !$acc update device(Res, Re_idx, Re_size) + $:UPDATE(device=["Res","Re_idx","Re_size"]) end if !$acc enter data copyin(is1, is2, is3, isx, isy, isz) @@ -3496,7 +3496,7 @@ contains dir_idx = (/3, 1, 2/); dir_flg = (/0._wp, 0._wp, 1._wp/) end if - !$acc update device(is1, is2, is3) + $:UPDATE(device=["is1","is2","is3"]) if (elasticity) then if (norm_dir == 1) then @@ -3509,8 +3509,10 @@ contains end if isx = ix; isy = iy; isz = iz - !$acc update device(isx, isy, isz) ! for stuff in the same module - !$acc update device(dir_idx, dir_flg, dir_idx_tau) ! for stuff in different modules + ! for stuff in the same module + $:UPDATE(device=["isx","isy","isz"]) + ! for stuff in different modules + $:UPDATE(device=["dir_idx","dir_flg","dir_idx_tau"]) ! Population of Buffers in x-direction if (norm_dir == 1) then diff --git a/src/simulation/m_sim_helpers.fpp b/src/simulation/m_sim_helpers.fpp index 49f1a5e7d7..4f51c59898 100644 --- a/src/simulation/m_sim_helpers.fpp +++ b/src/simulation/m_sim_helpers.fpp @@ -275,17 +275,17 @@ contains bc_type(1, -1)%sf(:, :, :) = bc_x%beg bc_type(1, 1)%sf(:, :, :) = bc_x%end - !$acc update device(bc_type(1,-1)%sf, bc_type(1,1)%sf) + $:UPDATE(device=["bc_type(1,-1)%sf","bc_type(1,1)%sf"]) if (n > 0) then bc_type(2, -1)%sf(:, :, :) = bc_y%beg bc_type(2, 1)%sf(:, :, :) = bc_y%end - !$acc update device(bc_type(2,-1)%sf, bc_type(2,1)%sf) + $:UPDATE(device=["bc_type(2,-1)%sf","bc_type(2,1)%sf"]) if (p > 0) then bc_type(3, -1)%sf(:, :, :) = bc_z%beg bc_type(3, 1)%sf(:, :, :) = bc_z%end - !$acc update device(bc_type(3,-1)%sf, bc_type(3,1)%sf) + $:UPDATE(device=["bc_type(3,-1)%sf","bc_type(3,1)%sf"]) end if end if diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index bf1d429714..859e30afe9 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -3,6 +3,7 @@ !! @brief Contains module m_start_up #:include 'case.fpp' +#:include 'directive_macros.fpp' !> @brief The purpose of the module is primarily to read in the files that !! contain the inputs, the initial condition data and the grid data @@ -1268,12 +1269,12 @@ contains if (cfl_dt) then if ((mytime + dt) >= t_stop) then dt = t_stop - mytime - !$acc update device(dt) + $:UPDATE(device=["dt"]) end if else if ((mytime + dt) >= finaltime) then dt = finaltime - mytime - !$acc update device(dt) + $:UPDATE(device=["dt"]) end if end if @@ -1634,38 +1635,44 @@ contains integer :: i !Update GPU DATA do i = 1, sys_size - !$acc update device(q_cons_ts(1)%vf(i)%sf) + $:UPDATE(device=["q_cons_ts(1)%vf(i)%sf"]) end do if (qbmm .and. .not. polytropic) then - !$acc update device(pb_ts(1)%sf, mv_ts(1)%sf) + $:UPDATE(device=["pb_ts(1)%sf","mv_ts(1)%sf"]) end if if (chemistry) then - !$acc update device(q_T_sf%sf) + $:UPDATE(device=["q_T_sf%sf"]) end if - !$acc update device(nb, R0ref, Ca, Web, Re_inv, weight, R0, V0, bubbles_euler, polytropic, polydisperse, qbmm, R0_type, ptil, bubble_model, thermal, poly_sigma, adv_n, adap_dt, adap_dt_tol, n_idx, pi_fac, low_Mach) - !$acc update device(R_n, R_v, phi_vn, phi_nv, Pe_c, Tw, pv, M_n, M_v, k_n, k_v, pb0, mass_n0, mass_v0, Pe_T, Re_trans_T, Re_trans_c, Im_trans_T, Im_trans_c, omegaN , mul0, ss, gamma_v, mu_v, gamma_m, gamma_n, mu_n, gam) + $:UPDATE(device=["nb","R0ref","Ca","Web","Re_inv","weight","R0","V0", & + & "bubbles_euler","polytropic","polydisperse","qbmm","R0_type", & + & "ptil","bubble_model","thermal","poly_sigma","adv_n","adap_dt", & + & "adap_dt_tol","n_idx","pi_fac","low_Mach"]) + $:UPDATE(device=["R_n","R_v","phi_vn","phi_nv","Pe_c","Tw","pv","M_n", & + & "M_v","k_n","k_v","pb0","mass_n0","mass_v0","Pe_T","Re_trans_T", & + & "Re_trans_c","Im_trans_T","Im_trans_c","omegaN","mul0","ss", & + & "gamma_v","mu_v","gamma_m","gamma_n","mu_n","gam"]) - !$acc update device(acoustic_source, num_source) - !$acc update device(sigma, surface_tension) + $:UPDATE(device=["acoustic_source", "num_source"]) + $:UPDATE(device=["sigma", "surface_tension"]) - !$acc update device(dx, dy, dz, x_cb, x_cc, y_cb, y_cc, z_cb, z_cc) + $:UPDATE(device=["dx","dy","dz","x_cb","x_cc","y_cb","y_cc","z_cb","z_cc"]) - !$acc update device(bc_x%vb1, bc_x%vb2, bc_x%vb3, bc_x%ve1, bc_x%ve2, bc_x%ve3) - !$acc update device(bc_y%vb1, bc_y%vb2, bc_y%vb3, bc_y%ve1, bc_y%ve2, bc_y%ve3) - !$acc update device(bc_z%vb1, bc_z%vb2, bc_z%vb3, bc_z%ve1, bc_z%ve2, bc_z%ve3) + $:UPDATE(device=["bc_x%vb1","bc_x%vb2","bc_x%vb3","bc_x%ve1","bc_x%ve2","bc_x%ve3"]) + $:UPDATE(device=["bc_y%vb1","bc_y%vb2","bc_y%vb3","bc_y%ve1","bc_y%ve2","bc_y%ve3"]) + $:UPDATE(device=["bc_z%vb1","bc_z%vb2","bc_z%vb3","bc_z%ve1","bc_z%ve2","bc_z%ve3"]) - !$acc update device(bc_x%grcbc_in, bc_x%grcbc_out, bc_x%grcbc_vel_out) - !$acc update device(bc_y%grcbc_in, bc_y%grcbc_out, bc_y%grcbc_vel_out) - !$acc update device(bc_z%grcbc_in, bc_z%grcbc_out, bc_z%grcbc_vel_out) + $:UPDATE(device=["bc_x%grcbc_in","bc_x%grcbc_out","bc_x%grcbc_vel_out"]) + $:UPDATE(device=["bc_y%grcbc_in","bc_y%grcbc_out","bc_y%grcbc_vel_out"]) + $:UPDATE(device=["bc_z%grcbc_in","bc_z%grcbc_out","bc_z%grcbc_vel_out"]) - !$acc update device(relax, relax_model) + $:UPDATE(device=["relax", "relax_model"]) if (relax) then - !$acc update device(palpha_eps, ptgalpha_eps) + $:UPDATE(device=["palpha_eps", "ptgalpha_eps"]) end if if (ib) then - !$acc update device(ib_markers%sf) + $:UPDATE(device=["ib_markers%sf"]) end if end subroutine s_initialize_gpu_vars diff --git a/src/simulation/m_surface_tension.fpp b/src/simulation/m_surface_tension.fpp index 4ab6c23a7c..3f11f01443 100644 --- a/src/simulation/m_surface_tension.fpp +++ b/src/simulation/m_surface_tension.fpp @@ -337,7 +337,7 @@ contains end if - !$acc update device(is1, is2, is3, iv) + $:UPDATE(device=["is1","is2","is3","iv"]) if (recon_dir == 1) then !$acc parallel loop collapse(4) default(present) diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index b19600c6fd..0196ce2f25 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -1006,7 +1006,7 @@ contains call s_mpi_allreduce_min(dt_local, dt) end if - !$acc update device(dt) + $:UPDATE(device=["dt"]) end subroutine s_compute_dt diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp index 323444c1ee..e37f75d456 100644 --- a/src/simulation/m_viscous.fpp +++ b/src/simulation/m_viscous.fpp @@ -43,7 +43,7 @@ contains Res_viscous(i, j) = fluid_pp(Re_idx(i, j))%Re(i) end do end do - !$acc update device(Res_viscous, Re_idx, Re_size) + $:UPDATE(device=["Res_viscous","Re_idx","Re_size"]) !$acc enter data copyin(is1_viscous, is2_viscous, is3_viscous, iv) end subroutine s_initialize_viscous_module @@ -76,7 +76,7 @@ contains is1_viscous = ix; is2_viscous = iy; is3_viscous = iz - !$acc update device(is1_viscous, is2_viscous, is3_viscous) + $:UPDATE(device=["is1_viscous","is2_viscous","is3_viscous"]) $:PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end @@ -550,7 +550,7 @@ contains iv%beg = mom_idx%beg; iv%end = mom_idx%end - !$acc update device(iv) + $:UPDATE(device=["iv"]) call s_reconstruct_cell_boundary_values_visc( & q_prim_qp%vf(iv%beg:iv%end), & @@ -588,11 +588,11 @@ contains else ! Compute velocity gradient at cell centers using finite differences iv%beg = mom_idx%beg; iv%end = mom_idx%end - !$acc update device(iv) + $:UPDATE(device=["iv"]) is1_viscous = ix; is2_viscous = iy; is3_viscous = iz - !$acc update device(is1_viscous, is2_viscous, is3_viscous) + $:UPDATE(device=["is1_viscous","is2_viscous","is3_viscous"]) $:PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end @@ -1000,7 +1000,7 @@ contains end if - !$acc update device(is1_viscous, is2_viscous, is3_viscous, iv) + $:UPDATE(device=["is1_viscous","is2_viscous","is3_viscous","iv"]) if (n > 0) then if (p > 0) then @@ -1097,7 +1097,7 @@ contains end if - !$acc update device(is1_viscous, is2_viscous, is3_viscous, iv) + $:UPDATE(device=["is1_viscous","is2_viscous","is3_viscous","iv"]) if (n > 0) then if (p > 0) then @@ -1200,7 +1200,7 @@ contains is3_viscous = iz iv = iv_in - !$acc update device(is1_viscous, is2_viscous, is3_viscous, iv) + $:UPDATE(device=["is1_viscous","is2_viscous","is3_viscous","iv"]) ! First-Order Spatial Derivatives in x-direction if (norm_dir == 1) then @@ -1320,7 +1320,7 @@ contains is1_viscous = ix; is2_viscous = iy; is3_viscous = iz - !$acc update device(is1_viscous, is2_viscous, is3_viscous) + $:UPDATE(device=["is1_viscous","is2_viscous","is3_viscous"]) $:PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end diff --git a/src/simulation/m_weno.fpp b/src/simulation/m_weno.fpp index 9c9778a405..0ca1775217 100644 --- a/src/simulation/m_weno.fpp +++ b/src/simulation/m_weno.fpp @@ -625,11 +625,11 @@ contains #:endfor if (weno_dir == 1) then - !$acc update device(poly_coef_cbL_x, poly_coef_cbR_x, d_cbL_x, d_cbR_x, beta_coef_x) + $:UPDATE(device=["poly_coef_cbL_x","poly_coef_cbR_x","d_cbL_x","d_cbR_x","beta_coef_x"]) elseif (weno_dir == 2) then - !$acc update device(poly_coef_cbL_y, poly_coef_cbR_y, d_cbL_y, d_cbR_y, beta_coef_y) + $:UPDATE(device=["poly_coef_cbL_y","poly_coef_cbR_y","d_cbL_y","d_cbR_y","beta_coef_y"]) else - !$acc update device(poly_coef_cbL_z, poly_coef_cbR_z, d_cbL_z, d_cbR_z, beta_coef_z) + $:UPDATE(device=["poly_coef_cbL_z","poly_coef_cbR_z","d_cbL_z","d_cbR_z","beta_coef_z"]) end if ! Nullifying WENO coefficients and cell-boundary locations pointers @@ -664,7 +664,7 @@ contains is2_weno = is2_weno_d is3_weno = is3_weno_d - !$acc update device(is1_weno, is2_weno, is3_weno) + $:UPDATE(device=["is1_weno","is2_weno","is3_weno"]) if (weno_order /= 1) then call s_initialize_weno(v_vf, & @@ -1135,7 +1135,7 @@ contains ! as to reshape the inputted data in the coordinate direction of ! the WENO reconstruction v_size = ubound(v_vf, 1) - !$acc update device(v_size) + $:UPDATE(device=["v_size"]) if (weno_dir == 1) then $:PARALLEL_LOOP(collapse=4) From 9814a90ffa6e56ca2eef1925403ab2f24a626afe Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Wed, 11 Jun 2025 13:58:36 -0400 Subject: [PATCH 20/75] Added update host macros --- src/common/m_mpi_common.fpp | 4 ++-- src/simulation/m_bubbles_EL.fpp | 16 +++++++++------- src/simulation/m_data_output.fpp | 6 +++--- src/simulation/m_ibm.fpp | 2 +- src/simulation/m_mpi_proxy.fpp | 24 ++++++++++++------------ src/simulation/m_start_up.fpp | 14 +++++++------- src/simulation/m_time_steppers.fpp | 4 ++-- 7 files changed, 36 insertions(+), 34 deletions(-) diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index c580281ef2..10398001fa 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -860,7 +860,7 @@ $:UPDATE(device=["v_size"]) call nvtxStartRange("RHS-COMM-SENDRECV-RDMA") #:else call nvtxStartRange("RHS-COMM-DEV2HOST") - !$acc update host(buff_send) + $:UPDATE(host=["buff_send"]) call nvtxEndRange call nvtxStartRange("RHS-COMM-SENDRECV-NO-RMDA") #:endif @@ -1190,7 +1190,7 @@ $:UPDATE(device=["v_size"]) call nvtxStartRange("RHS-COMM-SENDRECV-RDMA") #:else call nvtxStartRange("RHS-COMM-DEV2HOST") - !$acc update host(buff_send) + $:UPDATE(host=["buff_send"]) call nvtxEndRange call nvtxStartRange("RHS-COMM-SENDRECV-NO-RMDA") #:endif diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index 7f9d63c994..14ecf35355 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -251,9 +251,11 @@ contains $:UPDATE(device=["bubbles_lagrange", "lag_params"]) - !$acc update device(lag_id, bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, gas_betaC, & - !$acc bub_dphidt, gas_p, gas_mv, intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, & - !$acc mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, gas_dmvdt, mtn_dposdt, mtn_dveldt, nBubs) + $:UPDATE(device=["lag_id","bub_R0","Rmax_stats","Rmin_stats","gas_mg", & + & "gas_betaT","gas_betaC","bub_dphidt","gas_p","gas_mv", & + & "intfc_rad","intfc_vel","mtn_pos","mtn_posPrev","mtn_vel", & + & "mtn_s","intfc_draddt","intfc_dveldt","gas_dpdt","gas_dmvdt", & + & "mtn_dposdt","mtn_dveldt","nBubs"]) Rmax_glb = min(dflt_real, -dflt_real) Rmin_glb = max(dflt_real, -dflt_real) @@ -1044,7 +1046,7 @@ contains if (lag_params%write_bubbles_stats) call s_calculate_lag_bubble_stats() if (lag_params%write_bubbles) then - !$acc update host(gas_p, gas_mv, intfc_rad, intfc_vel) + $:UPDATE(host=["gas_p","gas_mv","intfc_rad","intfc_vel"]) call s_write_lag_particles(mytime) end if @@ -1078,7 +1080,7 @@ contains if (lag_params%write_bubbles_stats) call s_calculate_lag_bubble_stats() if (lag_params%write_bubbles) then - !$acc update host(gas_p, gas_mv, intfc_rad, intfc_vel) + $:UPDATE(host=["gas_p","gas_mv","intfc_rad","intfc_vel"]) call s_write_lag_particles(mytime) end if @@ -1125,7 +1127,7 @@ contains if (lag_params%write_bubbles_stats) call s_calculate_lag_bubble_stats() if (lag_params%write_bubbles) then - !$acc update host(gas_p, gas_mv, intfc_rad, intfc_vel) + $:UPDATE(host=["gas_p","gas_mv","intfc_rad","intfc_vel"]) call s_write_lag_particles(mytime) end if @@ -1623,7 +1625,7 @@ contains write (file_loc, '(A,I0,A)') 'stats_lag_bubbles_', proc_rank, '.dat' file_loc = trim(case_dir)//'/D/'//trim(file_loc) - !$acc update host(Rmax_glb, Rmin_glb) + $:UPDATE(host=["Rmax_glb","Rmin_glb"]) open (13, FILE=trim(file_loc), FORM='formatted', position='rewind') write (13, *) 'proc_rank, particleID, x, y, z, Rmax_glb, Rmin_glb' diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp index 7ff2756709..e8f4c92f06 100644 --- a/src/simulation/m_data_output.fpp +++ b/src/simulation/m_data_output.fpp @@ -303,10 +303,10 @@ contains ! Determining local stability criteria extrema at current time-step #ifdef _CRAYFTN - !$acc update host(icfl_sf) + $:UPDATE(host=["icfl_sf"]) if (viscous) then - !$acc update host(vcfl_sf, Rc_sf) + $:UPDATE(host=["vcfl_sf","Rc_sf"]) end if icfl_max_loc = maxval(icfl_sf) @@ -529,7 +529,7 @@ contains if (prim_vars_wrt .or. (n == 0 .and. p == 0)) then call s_convert_conservative_to_primitive_variables(q_cons_vf, q_T_sf, q_prim_vf, idwint) do i = 1, sys_size - !$acc update host(q_prim_vf(i)%sf(:,:,:)) + $:UPDATE(host=["q_prim_vf(i)%sf(:,:,:)"]) end do ! q_prim_vf(bubxb) stores the value of nb needed in riemann solvers, so replace with true primitive value (=1._wp) if (qbmm) then diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index cd09f74d1f..57ce03c183 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -88,7 +88,7 @@ contains ! Get neighboring IB variables from other processors call s_mpi_sendrecv_ib_buffers(ib_markers, gp_layers) - !$acc update host(ib_markers%sf) + $:UPDATE(host=["ib_markers%sf"]) call s_find_num_ghost_points(num_gps, num_inner_gps) diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index 1eeec98361..37882ec1ab 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -835,7 +835,7 @@ contains else #endif - !$acc update host(ib_buff_send, ib_buff_send) + $:UPDATE(host=["ib_buff_send","ib_buff_send"]) ! Send/receive buffer to/from bc_x%end/bc_x%beg call MPI_SENDRECV( & @@ -889,7 +889,7 @@ contains !$acc wait else #endif - !$acc update host(ib_buff_send) + $:UPDATE(host=["ib_buff_send"]) ! Send/receive buffer to/from bc_x%end/bc_x%beg call MPI_SENDRECV( & @@ -967,7 +967,7 @@ contains else #endif - !$acc update host(ib_buff_send) + $:UPDATE(host=["ib_buff_send"]) call MPI_SENDRECV( & ib_buff_send(0), & gp_layers*(n + 1)*(p + 1), & @@ -1020,7 +1020,7 @@ contains else #endif - !$acc update host(ib_buff_send) + $:UPDATE(host=["ib_buff_send"]) call MPI_SENDRECV( & ib_buff_send(0), & @@ -1099,7 +1099,7 @@ contains else #endif - !$acc update host(ib_buff_send) + $:UPDATE(host=["ib_buff_send"]) ! Send/receive buffer to/from bc_x%end/bc_x%beg call MPI_SENDRECV( & @@ -1155,7 +1155,7 @@ contains else #endif - !$acc update host(ib_buff_send) + $:UPDATE(host=["ib_buff_send"]) ! Send/receive buffer to/from bc_x%end/bc_x%beg call MPI_SENDRECV( & @@ -1235,7 +1235,7 @@ contains else #endif - !$acc update host(ib_buff_send) + $:UPDATE(host=["ib_buff_send"]) ! Send/receive buffer to/from bc_x%end/bc_x%beg call MPI_SENDRECV( & @@ -1291,7 +1291,7 @@ contains else #endif - !$acc update host(ib_buff_send) + $:UPDATE(host=["ib_buff_send"]) ! Send/receive buffer to/from bc_x%end/bc_x%beg call MPI_SENDRECV( & @@ -1374,7 +1374,7 @@ contains else #endif - !$acc update host(ib_buff_send) + $:UPDATE(host=["ib_buff_send"]) ! Send/receive buffer to/from bc_x%end/bc_x%beg call MPI_SENDRECV( & @@ -1430,7 +1430,7 @@ contains else #endif - !$acc update host(ib_buff_send) + $:UPDATE(host=["ib_buff_send"]) ! Send/receive buffer to/from bc_x%end/bc_x%beg call MPI_SENDRECV( & @@ -1510,7 +1510,7 @@ contains !$acc wait else #endif - !$acc update host(ib_buff_send) + $:UPDATE(host=["ib_buff_send"]) ! Send/receive buffer to/from bc_x%end/bc_x%beg call MPI_SENDRECV( & @@ -1566,7 +1566,7 @@ contains !$acc wait else #endif - !$acc update host(ib_buff_send) + $:UPDATE(host=["ib_buff_send"]) ! Send/receive buffer to/from bc_x%end/bc_x%beg call MPI_SENDRECV( & diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index 859e30afe9..208f0524e6 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -1298,7 +1298,7 @@ contains if (probe_wrt) then do i = 1, sys_size - !$acc update host(q_cons_ts(1)%vf(i)%sf) + $:UPDATE(host=["q_cons_ts(1)%vf(i)%sf"]) end do end if @@ -1404,7 +1404,7 @@ contains call cpu_time(start) call nvtxStartRange("SAVE-DATA") do i = 1, sys_size - !$acc update host(q_cons_ts(1)%vf(i)%sf) + $:UPDATE(host=["q_cons_ts(1)%vf(i)%sf"]) do l = 0, p do k = 0, n do j = 0, m @@ -1418,8 +1418,8 @@ contains end do if (qbmm .and. .not. polytropic) then - !$acc update host(pb_ts(1)%sf) - !$acc update host(mv_ts(1)%sf) + $:UPDATE(host=["pb_ts(1)%sf"]) + $:UPDATE(host=["mv_ts(1)%sf"]) end if if (cfl_dt) then @@ -1429,16 +1429,16 @@ contains end if if (bubbles_lagrange) then - !$acc update host(intfc_rad) + $:UPDATE(host=["intfc_rad"]) do i = 1, nBubs if (ieee_is_nan(intfc_rad(i, 1)) .or. intfc_rad(i, 1) <= 0._wp) then call s_mpi_abort("Bubble radius is negative or NaN, please reduce dt.") end if end do - !$acc update host(q_beta%vf(1)%sf) + $:UPDATE(host=["q_beta%vf(1)%sf"]) call s_write_data_files(q_cons_ts(1)%vf, q_T_sf, q_prim_vf, save_count, q_beta%vf(1)) - !$acc update host(Rmax_stats, Rmin_stats, gas_p, gas_mv, intfc_vel) + $:UPDATE(host=["Rmax_stats","Rmin_stats","gas_p","gas_mv","intfc_vel"]) call s_write_restart_lag_bubbles(save_count) !parallel if (lag_params%write_bubbles_stats) call s_write_lag_bubble_stats() else diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 0196ce2f25..15fd95e52c 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -948,7 +948,7 @@ contains if (stage == 3) then if (lag_params%write_bubbles_stats) call s_calculate_lag_bubble_stats() if (lag_params%write_bubbles) then - !$acc update host(gas_p, gas_mv, intfc_rad, intfc_vel) + $:UPDATE(host=["gas_p","gas_mv","intfc_rad","intfc_vel"]) call s_write_lag_particles(mytime) end if call s_write_void_evol(mytime) @@ -1051,7 +1051,7 @@ contains integer :: i !< Generic loop iterator do i = 1, sys_size - !$acc update host(q_prim_vf(i)%sf) + $:UPDATE(host=["q_prim_vf(i)%sf"]) end do if (t_step == t_step_start) then From 2bcfa220b74e9915b5f7cb60c9e6cc632774d853 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Wed, 11 Jun 2025 16:50:43 -0400 Subject: [PATCH 21/75] Replaced rest of parallel loop directives --- src/common/m_mpi_common.fpp | 48 ++++++++++----------- src/simulation/m_acoustic_src.fpp | 2 +- src/simulation/m_bubbles_EL.fpp | 25 +++++------ src/simulation/m_bubbles_EL_kernels.fpp | 4 +- src/simulation/m_hypoelastic.fpp | 2 +- src/simulation/m_ibm.fpp | 10 ++++- src/simulation/m_mpi_proxy.fpp | 36 ++++++++-------- src/simulation/m_qbmm.fpp | 8 ++-- src/simulation/m_rhs.fpp | 56 ++++++++++++------------- src/simulation/m_surface_tension.fpp | 6 +-- src/simulation/m_viscous.fpp | 10 ++--- src/simulation/m_weno.fpp | 14 +++---- 12 files changed, 114 insertions(+), 107 deletions(-) diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index 10398001fa..19bd461d5c 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -697,7 +697,7 @@ $:UPDATE(device=["v_size"]) #:for mpi_dir in [1, 2, 3] if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 - !$acc parallel loop collapse(4) gang vector default(present) private(r) + $:PARALLEL_LOOP(collapse=4,private=["r"]) do l = 0, p do k = 0, n do j = 0, buff_size - 1 @@ -711,7 +711,7 @@ $:UPDATE(device=["v_size"]) #ifdef MFC_SIMULATION if (qbmm .and. .not. polytropic) then - !$acc parallel loop collapse(4) gang vector default(present) private(r) + $:PARALLEL_LOOP(collapse=4,private=["r"]) do l = 0, p do k = 0, n do j = 0, buff_size - 1 @@ -726,7 +726,7 @@ $:UPDATE(device=["v_size"]) end do end do - !$acc parallel loop collapse(5) gang vector default(present) private(r) + $:PARALLEL_LOOP(collapse=5,private=["r"]) do l = 0, p do k = 0, n do j = 0, buff_size - 1 @@ -743,7 +743,7 @@ $:UPDATE(device=["v_size"]) end if #endif #:elif mpi_dir == 2 - !$acc parallel loop collapse(4) gang vector default(present) private(r) + $:PARALLEL_LOOP(collapse=4,private=["r"]) do i = 1, sys_size do l = 0, p do k = 0, buff_size - 1 @@ -759,7 +759,7 @@ $:UPDATE(device=["v_size"]) #ifdef MFC_SIMULATION if (qbmm .and. .not. polytropic) then - !$acc parallel loop collapse(5) gang vector default(present) private(r) + $:PARALLEL_LOOP(collapse=5,private=["r"]) do i = sys_size + 1, sys_size + 4 do l = 0, p do k = 0, buff_size - 1 @@ -775,7 +775,7 @@ $:UPDATE(device=["v_size"]) end do end do - !$acc parallel loop collapse(5) gang vector default(present) private(r) + $:PARALLEL_LOOP(collapse=5,private=["r"]) do i = sys_size + 1, sys_size + 4 do l = 0, p do k = 0, buff_size - 1 @@ -793,7 +793,7 @@ $:UPDATE(device=["v_size"]) end if #endif #:else - !$acc parallel loop collapse(4) gang vector default(present) private(r) + $:PARALLEL_LOOP(collapse=4,private=["r"]) do i = 1, sys_size do l = 0, buff_size - 1 do k = -buff_size, n + buff_size @@ -809,7 +809,7 @@ $:UPDATE(device=["v_size"]) #ifdef MFC_SIMULATION if (qbmm .and. .not. polytropic) then - !$acc parallel loop collapse(5) gang vector default(present) private(r) + $:PARALLEL_LOOP(collapse=5,private=["r"]) do i = sys_size + 1, sys_size + 4 do l = 0, buff_size - 1 do k = -buff_size, n + buff_size @@ -825,7 +825,7 @@ $:UPDATE(device=["v_size"]) end do end do - !$acc parallel loop collapse(5) gang vector default(present) private(r) + $:PARALLEL_LOOP(collapse=5,private=["r"]) do i = sys_size + 1, sys_size + 4 do l = 0, buff_size - 1 do k = -buff_size, n + buff_size @@ -895,7 +895,7 @@ $:UPDATE(device=["v_size"]) #:for mpi_dir in [1, 2, 3] if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 - !$acc parallel loop collapse(4) gang vector default(present) private(r) + $:PARALLEL_LOOP(collapse=4,private=["r"]) do l = 0, p do k = 0, n do j = -buff_size, -1 @@ -916,7 +916,7 @@ $:UPDATE(device=["v_size"]) #ifdef MFC_SIMULATION if (qbmm .and. .not. polytropic) then - !$acc parallel loop collapse(5) gang vector default(present) private(r) + $:PARALLEL_LOOP(collapse=5,private=["r"]) do l = 0, p do k = 0, n do j = -buff_size, -1 @@ -931,7 +931,7 @@ $:UPDATE(device=["v_size"]) end do end do - !$acc parallel loop collapse(5) gang vector default(present) private(r) + $:PARALLEL_LOOP(collapse=5,private=["r"]) do l = 0, p do k = 0, n do j = -buff_size, -1 @@ -948,7 +948,7 @@ $:UPDATE(device=["v_size"]) end if #endif #:elif mpi_dir == 2 - !$acc parallel loop collapse(4) gang vector default(present) private(r) + $:PARALLEL_LOOP(collapse=4,private=["r"]) do i = 1, sys_size do l = 0, p do k = -buff_size, -1 @@ -970,7 +970,7 @@ $:UPDATE(device=["v_size"]) #ifdef MFC_SIMULATION if (qbmm .and. .not. polytropic) then - !$acc parallel loop collapse(5) gang vector default(present) private(r) + $:PARALLEL_LOOP(collapse=5,private=["r"]) do i = sys_size + 1, sys_size + 4 do l = 0, p do k = -buff_size, -1 @@ -986,7 +986,7 @@ $:UPDATE(device=["v_size"]) end do end do - !$acc parallel loop collapse(5) gang vector default(present) private(r) + $:PARALLEL_LOOP(collapse=5,private=["r"]) do i = sys_size + 1, sys_size + 4 do l = 0, p do k = -buff_size, -1 @@ -1005,7 +1005,7 @@ $:UPDATE(device=["v_size"]) #endif #:else ! Unpacking buffer from bc_z%beg - !$acc parallel loop collapse(4) gang vector default(present) private(r) + $:PARALLEL_LOOP(collapse=4,private=["r"]) do i = 1, sys_size do l = -buff_size, -1 do k = -buff_size, n + buff_size @@ -1028,7 +1028,7 @@ $:UPDATE(device=["v_size"]) #ifdef MFC_SIMULATION if (qbmm .and. .not. polytropic) then - !$acc parallel loop collapse(5) gang vector default(present) private(r) + $:PARALLEL_LOOP(collapse=5,private=["r"]) do i = sys_size + 1, sys_size + 4 do l = -buff_size, -1 do k = -buff_size, n + buff_size @@ -1045,7 +1045,7 @@ $:UPDATE(device=["v_size"]) end do end do - !$acc parallel loop collapse(5) gang vector default(present) private(r) + $:PARALLEL_LOOP(collapse=5,private=["r"]) do i = sys_size + 1, sys_size + 4 do l = -buff_size, -1 do k = -buff_size, n + buff_size @@ -1135,7 +1135,7 @@ $:UPDATE(device=["v_size"]) #:for mpi_dir in [1, 2, 3] if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 - !$acc parallel loop collapse(4) gang vector default(present) private(r) + $:PARALLEL_LOOP(collapse=4,private=["r"]) do l = 0, p do k = 0, n do j = 0, buff_size - 1 @@ -1148,7 +1148,7 @@ $:UPDATE(device=["v_size"]) end do #:elif mpi_dir == 2 - !$acc parallel loop collapse(4) gang vector default(present) private(r) + $:PARALLEL_LOOP(collapse=4,private=["r"]) do i = 1, nVars do l = 0, p do k = 0, buff_size - 1 @@ -1163,7 +1163,7 @@ $:UPDATE(device=["v_size"]) end do #:else - !$acc parallel loop collapse(4) gang vector default(present) private(r) + $:PARALLEL_LOOP(collapse=4,private=["r"]) do i = 1, nVars do l = 0, buff_size - 1 do k = -buff_size, n + buff_size @@ -1224,7 +1224,7 @@ $:UPDATE(device=["v_size"]) #:for mpi_dir in [1, 2, 3] if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 - !$acc parallel loop collapse(4) gang vector default(present) private(r) + $:PARALLEL_LOOP(collapse=4,private=["r"]) do l = 0, p do k = 0, n do j = -buff_size, -1 @@ -1244,7 +1244,7 @@ $:UPDATE(device=["v_size"]) end do #:elif mpi_dir == 2 - !$acc parallel loop collapse(4) gang vector default(present) private(r) + $:PARALLEL_LOOP(collapse=4,private=["r"]) do i = 1, nVars do l = 0, p do k = -buff_size, -1 @@ -1266,7 +1266,7 @@ $:UPDATE(device=["v_size"]) #:else ! Unpacking buffer from bc_z%beg - !$acc parallel loop collapse(4) gang vector default(present) private(r) + $:PARALLEL_LOOP(collapse=4,private=["r"]) do i = 1, nVars do l = -buff_size, -1 do k = -buff_size, n + buff_size diff --git a/src/simulation/m_acoustic_src.fpp b/src/simulation/m_acoustic_src.fpp index a779c0e5d5..d7186d75a3 100644 --- a/src/simulation/m_acoustic_src.fpp +++ b/src/simulation/m_acoustic_src.fpp @@ -220,7 +220,7 @@ contains deallocate (phi_rn) - !$acc parallel loop gang vector default(present) private(myalpha, myalpha_rho) + $:PARALLEL_LOOP(private=["myalpha","myalpha_rho"]) do i = 1, num_points j = source_spatials(ai)%coord(1, i) k = source_spatials(ai)%coord(2, i) diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index 14ecf35355..a1746ddac6 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -530,7 +530,7 @@ contains ! Subgrid p_inf model based on Maeda and Colonius (2018). if (lag_params%pressure_corrector) then ! Calculate velocity potentials (valid for one bubble per cell) - !$acc parallel loop gang vector default(present) private(k, cell) + $:PARALLEL_LOOP(private=["k","cell"]) do k = 1, nBubs call s_get_pinf(k, q_prim_vf, 2, paux, cell, preterm1, term2, Romega) myR0 = bub_R0(k) @@ -551,8 +551,9 @@ contains ! Radial motion model adap_dt_stop_max = 0 - !$acc parallel loop gang vector default(present) private(k, myalpha_rho, myalpha, Re, cell) & - !$acc reduction(MAX:adap_dt_stop_max) copy(adap_dt_stop_max) copyin(stage) + $:PARALLEL_LOOP(private=["k","myalpha_rho","myalpha","Re","cell"], & + & reduction=["adap_dt_stop_max"],reductionOp="MAX", & + & copy=["adap_dt_stop_max"],copyin=["stage"]) do k = 1, nBubs ! Keller-Miksis model @@ -1030,7 +1031,7 @@ contains integer :: k if (time_stepper == 1) then ! 1st order TVD RK - !$acc parallel loop gang vector default(present) private(k) + $:PARALLEL_LOOP(private=["k"]) do k = 1, nBubs !u{1} = u{n} + dt * RHS{n} intfc_rad(k, 1) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) @@ -1052,7 +1053,7 @@ contains elseif (time_stepper == 2) then ! 2nd order TVD RK if (stage == 1) then - !$acc parallel loop gang vector default(present) private(k) + $:PARALLEL_LOOP(private=["k"]) do k = 1, nBubs !u{1} = u{n} + dt * RHS{n} intfc_rad(k, 2) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) @@ -1064,7 +1065,7 @@ contains end do elseif (stage == 2) then - !$acc parallel loop gang vector default(present) private(k) + $:PARALLEL_LOOP(private=["k"]) do k = 1, nBubs !u{1} = u{n} + (1/2) * dt * (RHS{n} + RHS{1}) intfc_rad(k, 1) = intfc_rad(k, 1) + dt*(intfc_draddt(k, 1) + intfc_draddt(k, 2))/2._wp @@ -1088,7 +1089,7 @@ contains elseif (time_stepper == 3) then ! 3rd order TVD RK if (stage == 1) then - !$acc parallel loop gang vector default(present) private(k) + $:PARALLEL_LOOP(private=["k"]) do k = 1, nBubs !u{1} = u{n} + dt * RHS{n} intfc_rad(k, 2) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) @@ -1100,7 +1101,7 @@ contains end do elseif (stage == 2) then - !$acc parallel loop gang vector default(present) private(k) + $:PARALLEL_LOOP(private=["k"]) do k = 1, nBubs !u{2} = u{n} + (1/4) * dt * [RHS{n} + RHS{1}] intfc_rad(k, 2) = intfc_rad(k, 1) + dt*(intfc_draddt(k, 1) + intfc_draddt(k, 2))/4._wp @@ -1111,7 +1112,7 @@ contains gas_mv(k, 2) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/4._wp end do elseif (stage == 3) then - !$acc parallel loop gang vector default(present) private(k) + $:PARALLEL_LOOP(private=["k"]) do k = 1, nBubs !u{n+1} = u{n} + (2/3) * dt * [(1/4)* RHS{n} + (1/4)* RHS{1} + RHS{2}] intfc_rad(k, 1) = intfc_rad(k, 1) + (2._wp/3._wp)*dt*(intfc_draddt(k, 1)/4._wp + intfc_draddt(k, 2)/4._wp + intfc_draddt(k, 3)) @@ -1197,7 +1198,7 @@ contains integer :: k - !$acc parallel loop gang vector default(present) private(k) + $:PARALLEL_LOOP(private=["k"]) do k = 1, nBubs gas_p(k, 2) = gas_p(k, 1) gas_mv(k, 2) = gas_mv(k, 1) @@ -1605,8 +1606,8 @@ contains integer :: k - !$acc parallel loop gang vector default(present) reduction(MAX:Rmax_glb) & - !$acc reduction(MIN: Rmin_glb) copy(Rmax_glb, Rmin_glb) + $:PARALLEL_LOOP(reduction=[["Rmax_glb"], ["Rmin_glb"]], & + & reductionOp=["MAX", "MIN"], copy=["Rmax_glb","Rmin_glb"]) do k = 1, nBubs Rmax_glb = max(Rmax_glb, intfc_rad(k, 1)/bub_R0(k)) Rmin_glb = min(Rmin_glb, intfc_rad(k, 1)/bub_R0(k)) diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp index c173c82953..ae00f42a25 100644 --- a/src/simulation/m_bubbles_EL_kernels.fpp +++ b/src/simulation/m_bubbles_EL_kernels.fpp @@ -56,7 +56,7 @@ contains real(wp), dimension(3) :: s_coord integer :: l - !$acc parallel loop gang vector default(present) private(l, s_coord, cell) + $:PARALLEL_LOOP(private=["l","s_coord","cell"]) do l = 1, nBubs volpart = 4._wp/3._wp*pi*lbk_rad(l, 2)**3._wp @@ -121,7 +121,7 @@ contains smearGridz = smearGrid if (p == 0) smearGridz = 1 - !$acc parallel loop gang vector default(present) private(nodecoord, l, s_coord, cell, center) copyin(smearGrid, smearGridz) + $:PARALLEL_LOOP(private=["nodecoord","l","s_coord","cell","center"], copyin=["smearGrid","smearGridz"]) do l = 1, nBubs nodecoord(1:3) = 0 center(1:3) = 0._wp diff --git a/src/simulation/m_hypoelastic.fpp b/src/simulation/m_hypoelastic.fpp index e954eebbc8..16c3008687 100644 --- a/src/simulation/m_hypoelastic.fpp +++ b/src/simulation/m_hypoelastic.fpp @@ -400,7 +400,7 @@ contains if (n == 0) then l = 0; q = 0 - !$acc parallel loop collapse(1) gang vector default(present) + $:PARALLEL_LOOP() do k = 0, m rhs_vf(damage_idx)%sf(k, l, q) = (alpha_bar*max(abs(q_cons_vf(stress_idx%beg)%sf(k, l, q)) - tau_star, 0._wp))**cont_damage_s end do diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 57ce03c183..f4b83f9598 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -153,7 +153,11 @@ contains type(ghost_point) :: gp type(ghost_point) :: innerp - !$acc parallel loop gang vector private(physical_loc, dyn_pres, alpha_rho_IP, alpha_IP, pres_IP, vel_IP, vel_g, vel_norm_IP, r_IP, v_IP, pb_IP, mv_IP, nmom_IP, presb_IP, massv_IP, rho, gamma, pi_inf, Re_K, G_K, Gs, gp, innerp, norm, buf, j, k, l, q, coeff) + $:PARALLEL_LOOP(private=["physical_loc","dyn_pres","alpha_rho_IP", & + & "alpha_IP","pres_IP","vel_IP","vel_g","vel_norm_IP","r_IP", & + & "v_IP","pb_IP","mv_IP","nmom_IP","presb_IP","massv_IP","rho", & + & "gamma","pi_inf","Re_K","G_K","Gs","gp","innerp","norm","buf", & + & "j","k","l","q","coeff"]) do i = 1, num_gps gp = ghost_points(i) @@ -297,7 +301,9 @@ contains end do !Correct the state of the inner points in IBs - !$acc parallel loop gang vector private(physical_loc, dyn_pres, alpha_rho_IP, alpha_IP, vel_g, rho, gamma, pi_inf, Re_K, innerp, j, k, l, q) + $:PARALLEL_LOOP(private=["physical_loc","dyn_pres","alpha_rho_IP", & + & "alpha_IP","vel_g","rho","gamma","pi_inf","Re_K","innerp", & + & "j","k","l","q"]) do i = 1, num_inner_gps vel_g = 0._wp diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index 37882ec1ab..f271cb696e 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -799,7 +799,7 @@ contains if (bc_x%end >= 0) then ! PBC at the beginning and end ! Packing buffer to be sent to bc_x%end - !$acc parallel loop collapse(3) gang vector default(present) private(r) + $:PARALLEL_LOOP(collapse=3,private=["r"]) do l = 0, p do k = 0, n do j = m - gp_layers + 1, m @@ -854,7 +854,7 @@ contains else ! PBC at the beginning only ! Packing buffer to be sent to bc_x%beg - !$acc parallel loop collapse(3) gang vector default(present) private(r) + $:PARALLEL_LOOP(collapse=3,private=["r"]) do l = 0, p do k = 0, n do j = 0, gp_layers - 1 @@ -914,7 +914,7 @@ contains #endif ! Unpacking buffer received from bc_x%beg - !$acc parallel loop collapse(3) gang vector default(present) private(r) + $:PARALLEL_LOOP(collapse=3,private=["r"]) do l = 0, p do k = 0, n do j = -gp_layers, -1 @@ -930,7 +930,7 @@ contains if (bc_x%beg >= 0) then ! PBC at the end and beginning - !$acc parallel loop collapse(3) gang vector default(present) private(r) + $:PARALLEL_LOOP(collapse=3,private=["r"]) ! Packing buffer to be sent to bc_x%beg do l = 0, p do k = 0, n @@ -984,7 +984,7 @@ contains else ! PBC at the end only ! Packing buffer to be sent to bc_x%end - !$acc parallel loop collapse(3) gang vector default(present) private(r) + $:PARALLEL_LOOP(collapse=3,private=["r"]) do l = 0, p do k = 0, n do j = m - gp_layers + 1, m @@ -1042,7 +1042,7 @@ contains end if ! Unpacking buffer received from bc_x%end - !$acc parallel loop collapse(3) gang vector default(present) private(r) + $:PARALLEL_LOOP(collapse=3,private=["r"]) do l = 0, p do k = 0, n do j = m + 1, m + gp_layers @@ -1062,7 +1062,7 @@ contains if (bc_y%end >= 0) then ! PBC at the beginning and end ! Packing buffer to be sent to bc_y%end - !$acc parallel loop collapse(3) gang vector default(present) private(r) + $:PARALLEL_LOOP(collapse=3,private=["r"]) do l = 0, p do k = n - gp_layers + 1, n do j = -gp_layers, m + gp_layers @@ -1118,7 +1118,7 @@ contains else ! PBC at the beginning only ! Packing buffer to be sent to bc_y%beg - !$acc parallel loop collapse(3) gang vector default(present) private(r) + $:PARALLEL_LOOP(collapse=3,private=["r"]) do l = 0, p do k = 0, gp_layers - 1 do j = -gp_layers, m + gp_layers @@ -1180,7 +1180,7 @@ contains #endif ! Unpacking buffer received from bc_y%beg - !$acc parallel loop collapse(3) gang vector default(present) private(r) + $:PARALLEL_LOOP(collapse=3,private=["r"]) do l = 0, p do k = -gp_layers, -1 do j = -gp_layers, m + gp_layers @@ -1198,7 +1198,7 @@ contains if (bc_y%beg >= 0) then ! PBC at the end and beginning ! Packing buffer to be sent to bc_y%beg - !$acc parallel loop collapse(3) gang vector default(present) private(r) + $:PARALLEL_LOOP(collapse=3,private=["r"]) do l = 0, p do k = 0, gp_layers - 1 do j = -gp_layers, m + gp_layers @@ -1254,7 +1254,7 @@ contains else ! PBC at the end only ! Packing buffer to be sent to bc_y%end - !$acc parallel loop collapse(3) gang vector default(present) private(r) + $:PARALLEL_LOOP(collapse=3,private=["r"]) do l = 0, p do k = n - gp_layers + 1, n do j = -gp_layers, m + gp_layers @@ -1316,7 +1316,7 @@ contains #endif ! Unpacking buffer received form bc_y%end - !$acc parallel loop collapse(3) gang vector default(present) private(r) + $:PARALLEL_LOOP(collapse=3,private=["r"]) do l = 0, p do k = n + 1, n + gp_layers do j = -gp_layers, m + gp_layers @@ -1336,7 +1336,7 @@ contains if (bc_z%end >= 0) then ! PBC at the beginning and end ! Packing buffer to be sent to bc_z%end - !$acc parallel loop collapse(3) gang vector default(present) private(r) + $:PARALLEL_LOOP(collapse=3,private=["r"]) do l = p - gp_layers + 1, p do k = -gp_layers, n + gp_layers do j = -gp_layers, m + gp_layers @@ -1393,7 +1393,7 @@ contains else ! PBC at the beginning only ! Packing buffer to be sent to bc_z%beg - !$acc parallel loop collapse(3) gang vector default(present) private(r) + $:PARALLEL_LOOP(collapse=3,private=["r"]) do l = 0, gp_layers - 1 do k = -gp_layers, n + gp_layers do j = -gp_layers, m + gp_layers @@ -1455,7 +1455,7 @@ contains #endif ! Unpacking buffer from bc_z%beg - !$acc parallel loop collapse(3) gang vector default(present) private(r) + $:PARALLEL_LOOP(collapse=3,private=["r"]) do l = -gp_layers, -1 do k = -gp_layers, n + gp_layers do j = -gp_layers, m + gp_layers @@ -1474,7 +1474,7 @@ contains if (bc_z%beg >= 0) then ! PBC at the end and beginning ! Packing buffer to be sent to bc_z%beg - !$acc parallel loop collapse(3) gang vector default(present) private(r) + $:PARALLEL_LOOP(collapse=3,private=["r"]) do l = 0, gp_layers - 1 do k = -gp_layers, n + gp_layers do j = -gp_layers, m + gp_layers @@ -1529,7 +1529,7 @@ contains else ! PBC at the end only ! Packing buffer to be sent to bc_z%end - !$acc parallel loop collapse(3) gang vector default(present) private(r) + $:PARALLEL_LOOP(collapse=3,private=["r"]) do l = p - gp_layers + 1, p do k = -gp_layers, n + gp_layers do j = -gp_layers, m + gp_layers @@ -1591,7 +1591,7 @@ contains #endif ! Unpacking buffer received from bc_z%end - !$acc parallel loop collapse(3) gang vector default(present) private(r) + $:PARALLEL_LOOP(collapse=3,private=["r"]) do l = p + 1, p + gp_layers do k = -gp_layers, n + gp_layers do j = -gp_layers, m + gp_layers diff --git a/src/simulation/m_qbmm.fpp b/src/simulation/m_qbmm.fpp index 912ab99e13..9dc18d1771 100644 --- a/src/simulation/m_qbmm.fpp +++ b/src/simulation/m_qbmm.fpp @@ -429,7 +429,7 @@ contains !Non-polytropic qbmm needs to account for change in bubble radius due to a change in nb if (.not. polytropic) then - !$acc parallel loop collapse(5) gang vector default(present) private(nb_q, nR, nR2, R, R2, nb_dot, nR_dot, nR2_dot, var, AX) + $:PARALLEL_LOOP(collapse=5,private=["nb_q","nR","nR2","R","R2","nb_dot","nR_dot","nR2_dot","var","AX"]) do i = 1, nb do q = 1, nnode do l = 0, p @@ -513,7 +513,7 @@ contains !Non-polytropic qbmm needs to account for change in bubble radius due to a change in nb if (.not. polytropic) then - !$acc parallel loop collapse(5) gang vector default(present) private(nb_q, nR, nR2, R, R2, nb_dot, nR_dot, nR2_dot, var, AX) + $:PARALLEL_LOOP(collapse=5,private=["nb_q","nR","nR2","R","R2","nb_dot","nR_dot","nR2_dot","var","AX"]) do i = 1, nb do q = 1, nnode do l = 0, p @@ -570,7 +570,7 @@ contains if (.not. polytropic) then if (grid_geometry == 3) then !Non-polytropic qbmm needs to account for change in bubble radius due to a change in nb - !$acc parallel loop collapse(5) gang vector default(present) private(nb_q, nR, nR2, R, R2, nb_dot, nR_dot, nR2_dot, var, AX) + $:PARALLEL_LOOP(collapse=5,private=["nb_q","nR","nR2","R","R2","nb_dot","nR_dot","nR2_dot","var","AX"]) do i = 1, nb do q = 1, nnode do l = 0, p @@ -621,7 +621,7 @@ contains end do else !Non-polytropic qbmm needs to account for change in bubble radius due to a change in nb - !$acc parallel loop collapse(5) gang vector default(present) private(nb_q, nR, nR2, R, R2, nb_dot, nR_dot, nR2_dot, var, AX) + $:PARALLEL_LOOP(collapse=5,private=["nb_q","nR","nR2","R","R2","nb_dot","nR_dot","nR2_dot","var","AX"]) do i = 1, nb do q = 1, nnode do l = 0, p diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index cbc50298ba..c31095f6ca 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -1010,7 +1010,7 @@ contains call s_cbc(q_prim_vf%vf, flux_n(idir)%vf, flux_src_n_vf%vf, idir, 1, irx, iry, irz) end if - !$acc parallel loop collapse(4) gang vector default(present) private(inv_ds, flux_face1, flux_face2) + $:PARALLEL_LOOP(collapse=4,private=["inv_ds","flux_face1","flux_face2"]) do j = 1, sys_size do q_loop = 0, p do l_loop = 0, n @@ -1025,8 +1025,8 @@ contains end do if (model_eqns == 3) then - !$acc parallel loop collapse(4) gang vector default(present) & - !$acc private(inv_ds, advected_qty_val, pressure_val, flux_face1, flux_face2) + $:PARALLEL_LOOP(collapse=4,private=["inv_ds","advected_qty_val", & + & "pressure_val","flux_face1","flux_face2"]) do q_loop = 0, p do l_loop = 0, n do k_loop = 0, m @@ -1055,7 +1055,7 @@ contains call s_cbc(q_prim_vf%vf, flux_n(idir)%vf, flux_src_n_vf%vf, idir, 1, irx, iry, irz) end if - !$acc parallel loop collapse(4) gang vector default(present) private(inv_ds, flux_face1, flux_face2) + $:PARALLEL_LOOP(collapse=4,private=["inv_ds","flux_face1","flux_face2"]) do j = 1, sys_size do l = 0, p do k = 0, n @@ -1070,8 +1070,8 @@ contains end do if (model_eqns == 3) then - !$acc parallel loop collapse(4) gang vector default(present) & - !$acc private(inv_ds, advected_qty_val, pressure_val, flux_face1, flux_face2) + $:PARALLEL_LOOP(collapse=4,private=["inv_ds","advected_qty_val", & + & "pressure_val","flux_face1","flux_face2"]) do l = 0, p do k = 0, n do q = 0, m @@ -1096,7 +1096,7 @@ contains end if if (cyl_coord) then - !$acc parallel loop collapse(4) gang vector default(present) private(flux_face1, flux_face2) + $:PARALLEL_LOOP(collapse=4,private=["flux_face1","flux_face2"]) do j = 1, sys_size do l = 0, p do k = 0, n @@ -1122,8 +1122,8 @@ contains end if if (grid_geometry == 3) then ! Cylindrical Coordinates - !$acc parallel loop collapse(4) gang vector default(present) & - !$acc private(inv_ds, velocity_val, flux_face1, flux_face2) + $:PARALLEL_LOOP(collapse=4,private=["inv_ds","velocity_val", & + & "flux_face1","flux_face2"]) do j = 1, sys_size do k = 0, p do q = 0, n @@ -1138,7 +1138,7 @@ contains end do end do end do - !$acc parallel loop collapse(4) gang vector default(present) private(flux_face1, flux_face2) + $:PARALLEL_LOOP(collapse=4,private=["flux_face1","flux_face2"]) do j = 1, sys_size do k = 0, p do q = 0, n @@ -1152,7 +1152,7 @@ contains end do end do else ! Cartesian Coordinates - !$acc parallel loop collapse(4) gang vector default(present) private(inv_ds, flux_face1, flux_face2) + $:PARALLEL_LOOP(collapse=4,private=["inv_ds","flux_face1","flux_face2"]) do j = 1, sys_size do k = 0, p do q = 0, n @@ -1168,8 +1168,8 @@ contains end if if (model_eqns == 3) then - !$acc parallel loop collapse(4) gang vector default(present) & - !$acc private(inv_ds, advected_qty_val, pressure_val, flux_face1, flux_face2) + $:PARALLEL_LOOP(collapse=4,private=["inv_ds","advected_qty_val", & + & "pressure_val","flux_face1","flux_face2"]) do k = 0, p do q = 0, n do l = 0, m @@ -1213,8 +1213,8 @@ contains case (1) ! x-direction use_standard_riemann = (riemann_solver == 1 .or. riemann_solver == 4) if (use_standard_riemann) then - !$acc parallel loop collapse(4) gang vector default(present) & - !$acc private(local_inv_ds, local_term_coeff, local_flux1, local_flux2) + $:PARALLEL_LOOP(collapse=4,private=["local_inv_ds", & + & "local_term_coeff","local_flux1","local_flux2"]) do j_adv = advxb, advxe do q_idx = 0, p ! z_extent do l_idx = 0, n ! y_extent @@ -1262,8 +1262,8 @@ contains end do; end do; end do end if else ! NOT alt_soundspeed - !$acc parallel loop collapse(4) gang vector default(present) & - !$acc private(local_inv_ds, local_term_coeff, local_flux1, local_flux2) + $:PARALLEL_LOOP(collapse=4,private=["local_inv_ds", & + & "local_term_coeff","local_flux1","local_flux2"]) do j_adv = advxb, advxe do q_idx = 0, p; do l_idx = 0, n; do k_idx = 0, m local_inv_ds = 1._wp/dx(k_idx) @@ -1280,8 +1280,8 @@ contains case (2) ! y-direction: loops q_idx (x), k_idx (y), l_idx (z); sf(q_idx, k_idx, l_idx); dy(k_idx); Kterm(q_idx,k_idx,l_idx) use_standard_riemann = (riemann_solver == 1 .or. riemann_solver == 4) if (use_standard_riemann) then - !$acc parallel loop collapse(4) gang vector default(present) & - !$acc private(local_inv_ds, local_term_coeff, local_flux1, local_flux2) + $:PARALLEL_LOOP(collapse=4,private=["local_inv_ds", & + & "local_term_coeff","local_flux1","local_flux2"]) do j_adv = advxb, advxe do l_idx = 0, p ! z_extent do k_idx = 0, n ! y_extent @@ -1338,8 +1338,8 @@ contains end do; end do; end do end if else ! NOT alt_soundspeed - !$acc parallel loop collapse(4) gang vector default(present) & - !$acc private(local_inv_ds, local_term_coeff, local_flux1, local_flux2) + $:PARALLEL_LOOP(collapse=4,private=["local_inv_ds", & + & "local_term_coeff","local_flux1","local_flux2"]) do j_adv = advxb, advxe do l_idx = 0, p; do k_idx = 0, n; do q_idx = 0, m local_inv_ds = 1._wp/dy(k_idx) @@ -1361,8 +1361,8 @@ contains end if if (use_standard_riemann) then - !$acc parallel loop collapse(4) gang vector default(present) & - !$acc private(local_inv_ds, local_term_coeff, local_flux1, local_flux2) + $:PARALLEL_LOOP(collapse=4,private=["local_inv_ds", & + & "local_term_coeff","local_flux1","local_flux2"]) do j_adv = advxb, advxe do k_idx = 0, p ! z_extent do q_idx = 0, n ! y_extent @@ -1411,8 +1411,8 @@ contains end do; end do; end do end if else ! NOT alt_soundspeed - !$acc parallel loop collapse(4) gang vector default(present) & - !$acc private(local_inv_ds, local_term_coeff, local_flux1, local_flux2) + $:PARALLEL_LOOP(collapse=4,private=["local_inv_ds", & + & "local_term_coeff","local_flux1","local_flux2"]) do j_adv = advxb, advxe do k_idx = 0, p; do q_idx = 0, n; do l_idx = 0, m local_inv_ds = 1._wp/dz(k_idx) @@ -2008,7 +2008,7 @@ contains $:UPDATE(device=["is1","is2","is3","iv"]) if (recon_dir == 1) then - !$acc parallel loop collapse(4) default(present) + $:PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -2021,7 +2021,7 @@ contains end do !$acc end parallel loop else if (recon_dir == 2) then - !$acc parallel loop collapse(4) default(present) + $:PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -2034,7 +2034,7 @@ contains end do !$acc end parallel loop else if (recon_dir == 3) then - !$acc parallel loop collapse(4) default(present) + $:PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end do l = is3%beg, is3%end do k = is2%beg, is2%end diff --git a/src/simulation/m_surface_tension.fpp b/src/simulation/m_surface_tension.fpp index 3f11f01443..97c987327d 100644 --- a/src/simulation/m_surface_tension.fpp +++ b/src/simulation/m_surface_tension.fpp @@ -340,7 +340,7 @@ contains $:UPDATE(device=["is1","is2","is3","iv"]) if (recon_dir == 1) then - !$acc parallel loop collapse(4) default(present) + $:PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -353,7 +353,7 @@ contains end do !$acc end parallel loop else if (recon_dir == 2) then - !$acc parallel loop collapse(4) default(present) + $:PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -366,7 +366,7 @@ contains end do !$acc end parallel loop else if (recon_dir == 3) then - !$acc parallel loop collapse(4) default(present) + $:PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end do l = is3%beg, is3%end do k = is2%beg, is2%end diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp index e37f75d456..07815a9c46 100644 --- a/src/simulation/m_viscous.fpp +++ b/src/simulation/m_viscous.fpp @@ -1000,7 +1000,7 @@ contains end if - $:UPDATE(device=["is1_viscous","is2_viscous","is3_viscous","iv"]) + !$acc update device(is1_viscous, is2_viscous, is3_viscous, iv) if (n > 0) then if (p > 0) then @@ -1097,7 +1097,7 @@ contains end if - $:UPDATE(device=["is1_viscous","is2_viscous","is3_viscous","iv"]) + !$acc update device(is1_viscous, is2_viscous, is3_viscous, iv) if (n > 0) then if (p > 0) then @@ -1200,7 +1200,7 @@ contains is3_viscous = iz iv = iv_in - $:UPDATE(device=["is1_viscous","is2_viscous","is3_viscous","iv"]) + !$acc update device(is1_viscous, is2_viscous, is3_viscous, iv) ! First-Order Spatial Derivatives in x-direction if (norm_dir == 1) then @@ -1334,7 +1334,7 @@ contains end do if (n > 0) then - !$acc parallel loop collapse(3) gang vector + $:PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg, is1_viscous%end @@ -1347,7 +1347,7 @@ contains end if if (p > 0) then - !$acc parallel loop collapse(3) gang vector + $:PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg, is1_viscous%end diff --git a/src/simulation/m_weno.fpp b/src/simulation/m_weno.fpp index 0ca1775217..1db77c8b45 100644 --- a/src/simulation/m_weno.fpp +++ b/src/simulation/m_weno.fpp @@ -673,7 +673,7 @@ contains if (weno_order == 1) then if (weno_dir == 1) then - !$acc parallel loop collapse(4) default(present) + $:PARALLEL_LOOP(collapse=4) do i = 1, ubound(v_vf, 1) do l = is3_weno%beg, is3_weno%end do k = is2_weno%beg, is2_weno%end @@ -686,7 +686,7 @@ contains end do !$acc end parallel loop else if (weno_dir == 2) then - !$acc parallel loop collapse(4) default(present) + $:PARALLEL_LOOP(collapse=4) do i = 1, ubound(v_vf, 1) do l = is3_weno%beg, is3_weno%end do k = is2_weno%beg, is2_weno%end @@ -699,7 +699,7 @@ contains end do !$acc end parallel loop else if (weno_dir == 3) then - !$acc parallel loop collapse(4) default(present) + $:PARALLEL_LOOP(collapse=4) do i = 1, ubound(v_vf, 1) do l = is3_weno%beg, is3_weno%end do k = is2_weno%beg, is2_weno%end @@ -715,7 +715,7 @@ contains elseif (weno_order == 3) then #:for WENO_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (weno_dir == ${WENO_DIR}$) then - !$acc parallel loop collapse(4) gang vector default(present) private(beta,dvd,poly,omega,alpha,tau) + $:PARALLEL_LOOP(collapse=4,private=["beta","dvd","poly","omega","alpha","tau"]) do l = is3_weno%beg, is3_weno%end do k = is2_weno%beg, is2_weno%end do j = is1_weno%beg, is1_weno%end @@ -794,7 +794,7 @@ contains elseif (weno_order == 5) then #:for WENO_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (weno_dir == ${WENO_DIR}$) then - !$acc parallel loop vector gang collapse(3) default(present) private(dvd, poly, beta, alpha, omega, tau, delta) + $:PARALLEL_LOOP(collapse=3,private=["dvd","poly","beta","alpha","omega","tau","delta"]) do l = is3_weno%beg, is3_weno%end do k = is2_weno%beg, is2_weno%end do j = is1_weno%beg, is1_weno%end @@ -914,7 +914,7 @@ contains elseif (weno_order == 7) then #:for WENO_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (weno_dir == ${WENO_DIR}$) then - !$acc parallel loop vector gang collapse(3) default(present) private(poly, beta, alpha, omega, tau, delta, dvd, v) + $:PARALLEL_LOOP(collapse=3,private=["poly","beta","alpha","omega","tau","delta","dvd","v"]) do l = is3_weno%beg, is3_weno%end do k = is2_weno%beg, is2_weno%end do j = is1_weno%beg, is1_weno%end @@ -1269,7 +1269,7 @@ contains real(wp), parameter :: alpha_mp = 2._wp real(wp), parameter :: beta_mp = 4._wp/3._wp - !$acc parallel loop gang vector collapse (4) default(present) private(d) + $:PARALLEL_LOOP(collapse=4,private=["d"]) do l = is3_weno%beg, is3_weno%end do k = is2_weno%beg, is2_weno%end do j = is1_weno%beg, is1_weno%end From b4afb9f552a21c0724ed133de725b69d70835f31 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Wed, 11 Jun 2025 16:55:39 -0400 Subject: [PATCH 22/75] Ran formatter --- src/common/m_mpi_common.fpp | 2 +- src/common/m_variables_conversion.fpp | 2 +- src/simulation/m_global_parameters.fpp | 1 - 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index 19bd461d5c..2d1b079467 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -643,7 +643,7 @@ contains #ifdef MFC_MPI call nvtxStartRange("RHS-COMM-PACKBUF") -$:UPDATE(device=["v_size"]) + $:UPDATE(device=["v_size"]) #ifdef MFC_SIMULATION if (qbmm .and. .not. polytropic) then diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp index 7c49767723..796dcb5003 100644 --- a/src/common/m_variables_conversion.fpp +++ b/src/common/m_variables_conversion.fpp @@ -653,7 +653,7 @@ contains qvs(i) = fluid_pp(i)%qv qvps(i) = fluid_pp(i)%qvp end do -$:UPDATE(device=["gammas","gs_min","pi_infs","ps_inf","cvs","qvs","qvps","Gs"]) + $:UPDATE(device=["gammas","gs_min","pi_infs","ps_inf","cvs","qvs","qvps","Gs"]) #ifdef MFC_SIMULATION diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index b93f023d05..e2e14286d7 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -311,7 +311,6 @@ module m_global_parameters $:DECLARE(create=["buff_size"]) - integer :: shear_num !! Number of shear stress components integer, dimension(3) :: shear_indices !< !! Indices of the stress components that represent shear stress From 764c71db495e37e7b0f23e9b9af62db2ef72ab5b Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Thu, 12 Jun 2025 16:05:25 -0400 Subject: [PATCH 23/75] Small refactor of QBMM subroutines (#856) --- src/simulation/m_qbmm.fpp | 641 ++++++++++++++------------------------ 1 file changed, 241 insertions(+), 400 deletions(-) diff --git a/src/simulation/m_qbmm.fpp b/src/simulation/m_qbmm.fpp index 9dc18d1771..c5338bfed0 100644 --- a/src/simulation/m_qbmm.fpp +++ b/src/simulation/m_qbmm.fpp @@ -422,258 +422,143 @@ contains real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: mv, rhs_mv integer :: i, j, k, l, q - real(wp) :: nb_q, nb_dot, R, R2, nR, nR2, nR_dot, nR2_dot, var, AX + logical :: is_axisym - if (idir == 1) then - - !Non-polytropic qbmm needs to account for change in bubble radius due to a change in nb - if (.not. polytropic) then - $:PARALLEL_LOOP(collapse=5,private=["nb_q","nR","nR2","R","R2","nb_dot","nR_dot","nR2_dot","var","AX"]) - do i = 1, nb - do q = 1, nnode - do l = 0, p - do k = 0, n - do j = 0, m - nb_q = q_cons_vf(bubxb + (i - 1)*nmom)%sf(j, k, l) - nR = q_cons_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l) - nR2 = q_cons_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l) - - R = q_prim_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l) - R2 = q_prim_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l) - - if (R2 - R**2._wp > 0._wp) then - var = R2 - R**2._wp - else - var = verysmall - end if - - if (q <= 2) then - AX = R - sqrt(var) - else - AX = R + sqrt(var) - end if + select case (idir) + case (1) + is_axisym = .false. + case (2) + is_axisym = .false. + case (3) + is_axisym = (grid_geometry == 3) + end select + if (.not. polytropic) then + $:PARALLEL_LOOP(collapse=5,private=["nb_q","nR","nR2","R","R2","nb_dot","nR_dot","nR2_dot","var","AX"]) + do i = 1, nb + do q = 1, nnode + do l = 0, p + do k = 0, n + do j = 0, m + nb_q = q_cons_vf(bubxb + (i - 1)*nmom)%sf(j, k, l) + nR = q_cons_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l) + nR2 = q_cons_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l) + R = q_prim_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l) + R2 = q_prim_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l) + var = max(R2 - R**2._wp, verysmall) + if (q <= 2) then + AX = R - sqrt(var) + else + AX = R + sqrt(var) + end if + select case (idir) + case (1) nb_dot = flux_n_vf(bubxb + (i - 1)*nmom)%sf(j - 1, k, l) - flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k, l) nR_dot = flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j - 1, k, l) - flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l) nR2_dot = flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j - 1, k, l) - flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dx(j)*AX*nb_q**2)* & (nR_dot*nb_q - nR*nb_dot)*(pb(j, k, l, q, i)) - - if (q <= 2) then - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dx(j)*AX*nb_q**2*sqrt(var)*2._wp)* & - (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dx(j)*AX*nb_q**2*sqrt(var)*2._wp)* & - (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) - - else - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dx(j)*AX*nb_q**2*sqrt(var)*2._wp)* & - (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dx(j)*AX*nb_q**2*sqrt(var)*2._wp)* & - (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) - end if - - end do - end do - end do - end do - end do - end if - - $:PARALLEL_LOOP(collapse=3) - do l = 0, p - do q = 0, n - do i = 0, m - - rhs_vf(alf_idx)%sf(i, q, l) = rhs_vf(alf_idx)%sf(i, q, l) + mom_sp(2)%sf(i, q, l) - j = bubxb - $:LOOP() - do k = 1, nb - rhs_vf(j)%sf(i, q, l) = & - rhs_vf(j)%sf(i, q, l) + mom_3d(0, 0, k)%sf(i, q, l) - rhs_vf(j + 1)%sf(i, q, l) = & - rhs_vf(j + 1)%sf(i, q, l) + mom_3d(1, 0, k)%sf(i, q, l) - rhs_vf(j + 2)%sf(i, q, l) = & - rhs_vf(j + 2)%sf(i, q, l) + mom_3d(0, 1, k)%sf(i, q, l) - rhs_vf(j + 3)%sf(i, q, l) = & - rhs_vf(j + 3)%sf(i, q, l) + mom_3d(2, 0, k)%sf(i, q, l) - rhs_vf(j + 4)%sf(i, q, l) = & - rhs_vf(j + 4)%sf(i, q, l) + mom_3d(1, 1, k)%sf(i, q, l) - rhs_vf(j + 5)%sf(i, q, l) = & - rhs_vf(j + 5)%sf(i, q, l) + mom_3d(0, 2, k)%sf(i, q, l) - j = j + 6 - end do - - end do - end do - end do - - elseif (idir == 2) then - - !Non-polytropic qbmm needs to account for change in bubble radius due to a change in nb - if (.not. polytropic) then - $:PARALLEL_LOOP(collapse=5,private=["nb_q","nR","nR2","R","R2","nb_dot","nR_dot","nR2_dot","var","AX"]) - do i = 1, nb - do q = 1, nnode - do l = 0, p - do k = 0, n - do j = 0, m - nb_q = q_cons_vf(bubxb + (i - 1)*nmom)%sf(j, k, l) - nR = q_cons_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l) - nR2 = q_cons_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l) - - R = q_prim_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l) - R2 = q_prim_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l) - - if (R2 - R**2._wp > 0._wp) then - var = R2 - R**2._wp - else - var = verysmall - end if - - if (q <= 2) then - AX = R - sqrt(var) - else - AX = R + sqrt(var) - end if - + case (2) nb_dot = flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k - 1, l) - flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k, l) nR_dot = flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k - 1, l) - flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l) nR2_dot = flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k - 1, l) - flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dy(k)*AX*nb_q**2)* & (nR_dot*nb_q - nR*nb_dot)*(pb(j, k, l, q, i)) - - if (q <= 2) then - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dy(k)*AX*nb_q**2*sqrt(var)*2._wp)* & - (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dy(k)*AX*nb_q**2*sqrt(var)*2._wp)* & - (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) - - else - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dy(k)*AX*nb_q**2*sqrt(var)*2._wp)* & - (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dy(k)*AX*nb_q**2*sqrt(var)*2._wp)* & - (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) - end if - - end do - end do - end do - end do - end do - end if - - elseif (idir == 3) then - - if (.not. polytropic) then - if (grid_geometry == 3) then - !Non-polytropic qbmm needs to account for change in bubble radius due to a change in nb - $:PARALLEL_LOOP(collapse=5,private=["nb_q","nR","nR2","R","R2","nb_dot","nR_dot","nR2_dot","var","AX"]) - do i = 1, nb - do q = 1, nnode - do l = 0, p - do k = 0, n - do j = 0, m - nb_q = q_cons_vf(bubxb + (i - 1)*nmom)%sf(j, k, l) - nR = q_cons_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l) - nR2 = q_cons_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l) - - R = q_prim_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l) - R2 = q_prim_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l) - - if (R2 - R**2._wp > 0._wp) then - var = R2 - R**2._wp - else - var = verysmall - end if - - if (q <= 2) then - AX = R - sqrt(var) - else - AX = R + sqrt(var) - end if - + case (3) + if (is_axisym) then nb_dot = q_prim_vf(contxe + idir)%sf(j, k, l)*(flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k, l - 1) - flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k, l)) nR_dot = q_prim_vf(contxe + idir)%sf(j, k, l)*(flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l - 1) - flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l)) nR2_dot = q_prim_vf(contxe + idir)%sf(j, k, l)*(flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l - 1) - flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l)) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dz(l)*y_cc(k)*AX*nb_q**2)* & (nR_dot*nb_q - nR*nb_dot)*(pb(j, k, l, q, i)) - - if (q <= 2) then + else + nb_dot = flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k, l - 1) - flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k, l) + nR_dot = flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l - 1) - flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l) + nR2_dot = flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l - 1) - flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l) + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dz(l)*AX*nb_q**2)* & + (nR_dot*nb_q - nR*nb_dot)*(pb(j, k, l, q, i)) + end if + end select + if (q <= 2) then + select case (idir) + case (1) + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dx(j)*AX*nb_q**2*sqrt(var)*2._wp)* & + (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dx(j)*AX*nb_q**2*sqrt(var)*2._wp)* & + (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) + case (2) + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dy(k)*AX*nb_q**2*sqrt(var)*2._wp)* & + (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dy(k)*AX*nb_q**2*sqrt(var)*2._wp)* & + (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) + case (3) + if (is_axisym) then rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dz(l)*y_cc(k)*AX*nb_q**2*sqrt(var)*2._wp)* & (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dz(l)*y_cc(k)*AX*nb_q**2*sqrt(var)*2._wp)* & (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) - else - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dz(l)*y_cc(k)*AX*nb_q**2*sqrt(var)*2._wp)* & + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dz(l)*AX*nb_q**2*sqrt(var)*2._wp)* & (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dz(l)*y_cc(k)*AX*nb_q**2*sqrt(var)*2._wp)* & + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dz(l)*AX*nb_q**2*sqrt(var)*2._wp)* & (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) end if - end do - end do - end do - end do - end do - else - !Non-polytropic qbmm needs to account for change in bubble radius due to a change in nb - $:PARALLEL_LOOP(collapse=5,private=["nb_q","nR","nR2","R","R2","nb_dot","nR_dot","nR2_dot","var","AX"]) - do i = 1, nb - do q = 1, nnode - do l = 0, p - do k = 0, n - do j = 0, m - nb_q = q_cons_vf(bubxb + (i - 1)*nmom)%sf(j, k, l) - nR = q_cons_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l) - nR2 = q_cons_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l) - - R = q_prim_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l) - R2 = q_prim_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l) - - if (R2 - R**2._wp > 0._wp) then - var = R2 - R**2._wp - else - var = verysmall - end if - - if (q <= 2) then - AX = R - sqrt(var) - else - AX = R + sqrt(var) - end if - - nb_dot = flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k, l - 1) - flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k, l) - nR_dot = flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l - 1) - flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l) - nR2_dot = flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l - 1) - flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l) - - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dz(l)*AX*nb_q**2)* & - (nR_dot*nb_q - nR*nb_dot)*(pb(j, k, l, q, i)) - - if (q <= 2) then - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dz(l)*AX*nb_q**2*sqrt(var)*2._wp)* & + end select + else + select case (idir) + case (1) + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dx(j)*AX*nb_q**2*sqrt(var)*2._wp)* & + (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dx(j)*AX*nb_q**2*sqrt(var)*2._wp)* & + (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) + case (2) + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dy(k)*AX*nb_q**2*sqrt(var)*2._wp)* & + (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dy(k)*AX*nb_q**2*sqrt(var)*2._wp)* & + (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) + case (3) + if (is_axisym) then + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dz(l)*y_cc(k)*AX*nb_q**2*sqrt(var)*2._wp)* & (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dz(l)*AX*nb_q**2*sqrt(var)*2._wp)* & + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dz(l)*y_cc(k)*AX*nb_q**2*sqrt(var)*2._wp)* & (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) - else rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dz(l)*AX*nb_q**2*sqrt(var)*2._wp)* & (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dz(l)*AX*nb_q**2*sqrt(var)*2._wp)* & (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) end if - - end do - end do + end select + end if end do end do end do - end if - end if + end do + end do + end if + ! The following block is not repeated and is left as is + if (idir == 1) then + !$acc parallel loop collapse(3) gang vector default(present) + do l = 0, p + do q = 0, n + do i = 0, m + rhs_vf(alf_idx)%sf(i, q, l) = rhs_vf(alf_idx)%sf(i, q, l) + mom_sp(2)%sf(i, q, l) + j = bubxb + !$acc loop seq + do k = 1, nb + rhs_vf(j)%sf(i, q, l) = rhs_vf(j)%sf(i, q, l) + mom_3d(0, 0, k)%sf(i, q, l) + rhs_vf(j + 1)%sf(i, q, l) = rhs_vf(j + 1)%sf(i, q, l) + mom_3d(1, 0, k)%sf(i, q, l) + rhs_vf(j + 2)%sf(i, q, l) = rhs_vf(j + 2)%sf(i, q, l) + mom_3d(0, 1, k)%sf(i, q, l) + rhs_vf(j + 3)%sf(i, q, l) = rhs_vf(j + 3)%sf(i, q, l) + mom_3d(2, 0, k)%sf(i, q, l) + rhs_vf(j + 4)%sf(i, q, l) = rhs_vf(j + 4)%sf(i, q, l) + mom_3d(1, 1, k)%sf(i, q, l) + rhs_vf(j + 5)%sf(i, q, l) = rhs_vf(j + 5)%sf(i, q, l) + mom_3d(0, 2, k)%sf(i, q, l) + j = j + 6 + end do + end do + end do + end do end if end subroutine s_compute_qbmm_rhs @@ -760,7 +645,7 @@ contains $:ROUTINE() #endif - real(wp), intent(inout) :: pres, rho, c + real(wp), intent(in) :: pres, rho, c real(wp), dimension(nterms, 0:2, 0:2), intent(out) :: coeffs integer :: i1, i2 @@ -831,13 +716,9 @@ contains real(wp), dimension(nterms, 0:2, 0:2) :: coeff real(wp) :: pres, rho, nbub, c, alf, momsum, drdt, drdt2, chi_vw, x_vw, rho_mw, k_mw, T_bar, grad_T real(wp) :: n_tait, B_tait - - integer :: id1, id2, id3 - integer :: i1, i2 - integer :: j, q, r + integer :: id1, id2, id3, i1, i2, j, q, r is1_qbmm = ix; is2_qbmm = iy; is3_qbmm = iz - $:UPDATE(device=["is1_qbmm","is2_qbmm","is3_qbmm"]) $:PARALLEL_LOOP(collapse=3, private=["moms", "msum", "wght", "abscX", & @@ -851,73 +732,52 @@ contains alf = q_prim_vf(alf_idx)%sf(id1, id2, id3) pres = q_prim_vf(E_idx)%sf(id1, id2, id3) rho = q_prim_vf(contxb)%sf(id1, id2, id3) + if (bubble_model == 2) then - n_tait = gammas(1) - n_tait = 1._wp/n_tait + 1._wp !make this the usual little 'gamma' + n_tait = 1._wp/gammas(1) + 1._wp B_tait = pi_infs(1)*(n_tait - 1)/n_tait c = n_tait*(pres + B_tait)*(1._wp - alf)/(rho) - - if (c > 0._wp) then - c = sqrt(c) - else - c = sgm_eps - end if + c = merge(sqrt(c), sgm_eps, c > 0._wp) end if - if (polytropic) then - call s_coeff(pres, rho, c, coeff) - else - call s_coeff_nonpoly(pres, rho, c, coeff) - end if + call s_coeff_selector(pres, rho, c, coeff, polytropic) - ! SHB: Manually adjusted pressure here for no-coupling case - ! pres = 1._wp/0.3_wp if (alf > small_alf) then - nbub = q_cons_vf(bubxb)%sf(id1, id2, id3) - $:LOOP() do q = 1, nb - !Initialize moment set for each R0 bin + ! Gather moments for this bubble bin $:LOOP() do r = 2, nmom moms(r) = q_prim_vf(bubmoms(q, r))%sf(id1, id2, id3) end do - moms(1) = 1._wp - call s_chyqmom(moms, wght(:, q), abscX(:, q), abscY(:, q)) if (polytropic) then - !Account for bubble pressure pb0 at each R0 bin $:LOOP() do j = 1, nnode wght_pb(j, q) = wght(j, q)*(pb0(q) - pv) end do else - !Account for bubble pressure, mass transfer rate and heat transfer rate in wght_pb, wght_mv and wght_ht using Preston model $:LOOP() do j = 1, nnode chi_vw = 1._wp/(1._wp + R_v/R_n*(pb(id1, id2, id3, j, q)/pv - 1._wp)) x_vw = M_n*chi_vw/(M_v + (M_n - M_v)*chi_vw) - k_mw = x_vw*k_v(q)/(x_vw + (1._wp - x_vw)*phi_vn) & - + (1._wp - x_vw)*k_n(q)/(x_vw*phi_nv + 1._wp - x_vw) + k_mw = x_vw*k_v(q)/(x_vw + (1._wp - x_vw)*phi_vn) + (1._wp - x_vw)*k_n(q)/(x_vw*phi_nv + 1._wp - x_vw) rho_mw = pv/(chi_vw*R_v*Tw) rhs_mv(id1, id2, id3, j, q) = -Re_trans_c(q)*((mv(id1, id2, id3, j, q)/(mv(id1, id2, id3, j, q) + mass_n0(q))) - chi_vw) rhs_mv(id1, id2, id3, j, q) = rho_mw*rhs_mv(id1, id2, id3, j, q)/Pe_c/(1._wp - chi_vw)/abscX(j, q) - - T_bar = Tw*(pb(id1, id2, id3, j, q)/pb0(q))*(abscX(j, q)/R0(q))**3 & - *(mass_n0(q) + mass_v0(q))/(mass_n0(q) + mv(id1, id2, id3, j, q)) + T_bar = Tw*(pb(id1, id2, id3, j, q)/pb0(q))*(abscX(j, q)/R0(q))**3*(mass_n0(q) + mass_v0(q))/(mass_n0(q) + mv(id1, id2, id3, j, q)) grad_T = -Re_trans_T(q)*(T_bar - Tw) ht(j, q) = pb0(q)*k_mw*grad_T/Pe_T(q)/abscX(j, q) - wght_pb(j, q) = wght(j, q)*(pb(id1, id2, id3, j, q)) wght_mv(j, q) = wght(j, q)*(rhs_mv(id1, id2, id3, j, q)) wght_ht(j, q) = wght(j, q)*ht(j, q) end do end if - !Compute change in moments due to bubble dynamics + ! Compute change in moments due to bubble dynamics r = 1 $:LOOP() do i2 = 0, 2 @@ -927,67 +787,45 @@ contains momsum = 0._wp $:LOOP() do j = 1, nterms - ! Account for term with pb in Rayleigh Plesset equation - if (bubble_model == 3 .and. j == 3) then - momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q)) & - *f_quad2D(abscX(:, q), abscY(:, q), wght_pb(:, q), momrhs(:, i1, i2, j, q)) - ! Account for terms with pb in Keller-Miksis equation - else if (bubble_model == 2 .and. ((j >= 7 .and. j <= 9) .or. (j >= 22 .and. j <= 23) .or. (j >= 10 .and. j <= 11) .or. (j == 26))) then - momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q)) & - *f_quad2D(abscX(:, q), abscY(:, q), wght_pb(:, q), momrhs(:, i1, i2, j, q)) - ! Account for terms with mass transfer rate in Keller-Miksis equation - else if (bubble_model == 2 .and. (j >= 27 .and. j <= 29) .and. (.not. polytropic)) then - momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q)) & - *f_quad2D(abscX(:, q), abscY(:, q), wght_mv(:, q), momrhs(:, i1, i2, j, q)) - ! Account for terms with heat transfer rate in Keller-Miksis equation - else if (bubble_model == 2 .and. (j >= 30 .and. j <= 32) .and. (.not. polytropic)) then - momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q)) & - *f_quad2D(abscX(:, q), abscY(:, q), wght_ht(:, q), momrhs(:, i1, i2, j, q)) - else - momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q)) & - *f_quad2D(abscX(:, q), abscY(:, q), wght(:, q), momrhs(:, i1, i2, j, q)) - end if - + select case (bubble_model) + case (3) + if (j == 3) then + momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q))*f_quad2D(abscX(:, q), abscY(:, q), wght_pb(:, q), momrhs(:, i1, i2, j, q)) + else + momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q))*f_quad2D(abscX(:, q), abscY(:, q), wght(:, q), momrhs(:, i1, i2, j, q)) + end if + case (2) + if ((j >= 7 .and. j <= 9) .or. (j >= 22 .and. j <= 23) .or. (j >= 10 .and. j <= 11) .or. (j == 26)) then + momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q))*f_quad2D(abscX(:, q), abscY(:, q), wght_pb(:, q), momrhs(:, i1, i2, j, q)) + else if ((j >= 27 .and. j <= 29) .and. (.not. polytropic)) then + momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q))*f_quad2D(abscX(:, q), abscY(:, q), wght_mv(:, q), momrhs(:, i1, i2, j, q)) + else if ((j >= 30 .and. j <= 32) .and. (.not. polytropic)) then + momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q))*f_quad2D(abscX(:, q), abscY(:, q), wght_ht(:, q), momrhs(:, i1, i2, j, q)) + else + momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q))*f_quad2D(abscX(:, q), abscY(:, q), wght(:, q), momrhs(:, i1, i2, j, q)) + end if + end select end do - moms3d(i1, i2, q)%sf(id1, id2, id3) = nbub*momsum msum(r) = momsum r = r + 1 - end if end do end do - ! Compute change in pb and mv for non-polytroic model + ! Compute change in pb and mv for non-polytropic model if (.not. polytropic) then $:LOOP() do j = 1, nnode - ! Compute Rdot (drdt) at quadrature node in the ODE for pb (note this is not the same as bubble variable Rdot) drdt = msum(2) - if (moms(4) - moms(2)**2._wp > 0._wp) then - if (j == 1 .or. j == 2) then - drdt2 = -1._wp/(2._wp*sqrt(moms(4) - moms(2)**2._wp)) - else - drdt2 = 1._wp/(2._wp*sqrt(moms(4) - moms(2)**2._wp)) - end if - else - ! Edge case where variance < 0 - if (j == 1 .or. j == 2) then - drdt2 = -1._wp/(2._wp*sqrt(verysmall)) - else - drdt2 = 1._wp/(2._wp*sqrt(verysmall)) - end if - end if - + drdt2 = merge(-1._wp, 1._wp, j == 1 .or. j == 2)/(2._wp*sqrt(merge(moms(4) - moms(2)**2._wp, verysmall, moms(4) - moms(2)**2._wp > 0._wp))) drdt2 = drdt2*(msum(3) - 2._wp*moms(2)*msum(2)) drdt = drdt + drdt2 - rhs_pb(id1, id2, id3, j, q) = (-3._wp*gam*drdt/abscX(j, q))*(pb(id1, id2, id3, j, q)) rhs_pb(id1, id2, id3, j, q) = rhs_pb(id1, id2, id3, j, q) + (3._wp*gam/abscX(j, q))*rhs_mv(id1, id2, id3, j, q)*R_v*Tw rhs_pb(id1, id2, id3, j, q) = rhs_pb(id1, id2, id3, j, q) + (3._wp*gam/abscX(j, q))*ht(j, q) rhs_mv(id1, id2, id3, j, q) = rhs_mv(id1, id2, id3, j, q)*(4._wp*pi*abscX(j, q)**2._wp) end do - end if end do @@ -996,19 +834,14 @@ contains momsp(2)%sf(id1, id2, id3) = 4._wp*pi*nbub*f_quad(abscX, abscY, wght, 2._wp, 1._wp, 0._wp) momsp(3)%sf(id1, id2, id3) = f_quad(abscX, abscY, wght, 3._wp, 2._wp, 0._wp) if (abs(gam - 1._wp) <= 1.e-4_wp) then - ! Gam \approx 1, don't risk imaginary quadrature momsp(4)%sf(id1, id2, id3) = 1._wp else - !Special moment with bubble pressure pb if (polytropic) then - momsp(4)%sf(id1, id2, id3) = f_quad(abscX, abscY, wght_pb, 3._wp*(1._wp - gam), 0._wp, 3._wp*gam) + pv*f_quad(abscX, abscY, wght, 3._wp, 0._wp, 0._wp) & - - 4._wp*Re_inv*f_quad(abscX, abscY, wght, 2._wp, 1._wp, 0._wp) - (2._wp/Web)*f_quad(abscX, abscY, wght, 2._wp, 0._wp, 0._wp) + momsp(4)%sf(id1, id2, id3) = f_quad(abscX, abscY, wght_pb, 3._wp*(1._wp - gam), 0._wp, 3._wp*gam) + pv*f_quad(abscX, abscY, wght, 3._wp, 0._wp, 0._wp) - 4._wp*Re_inv*f_quad(abscX, abscY, wght, 2._wp, 1._wp, 0._wp) - (2._wp/Web)*f_quad(abscX, abscY, wght, 2._wp, 0._wp, 0._wp) else - momsp(4)%sf(id1, id2, id3) = f_quad(abscX, abscY, wght_pb, 3._wp, 0._wp, 0._wp) & - - 4._wp*Re_inv*f_quad(abscX, abscY, wght, 2._wp, 1._wp, 0._wp) - (2._wp/Web)*f_quad(abscX, abscY, wght, 2._wp, 0._wp, 0._wp) + momsp(4)%sf(id1, id2, id3) = f_quad(abscX, abscY, wght_pb, 3._wp, 0._wp, 0._wp) - 4._wp*Re_inv*f_quad(abscX, abscY, wght, 2._wp, 1._wp, 0._wp) - (2._wp/Web)*f_quad(abscX, abscY, wght, 2._wp, 0._wp, 0._wp) end if end if - else $:LOOP() do q = 1, nb @@ -1020,133 +853,141 @@ contains end do end do end do - momsp(1)%sf(id1, id2, id3) = 0._wp momsp(2)%sf(id1, id2, id3) = 0._wp momsp(3)%sf(id1, id2, id3) = 0._wp momsp(4)%sf(id1, id2, id3) = 0._wp - end if - end do end do end do - end subroutine s_mom_inv + contains + ! Helper to select the correct coefficient routine + subroutine s_coeff_selector(pres, rho, c, coeff, polytropic) +#ifdef _CRAYFTN + !DIR$ INLINEALWAYS s_chyqmom +#else + !$acc routine seq +#endif + real(wp), intent(in) :: pres, rho, c + real(wp), dimension(nterms, 0:2, 0:2), intent(out) :: coeff + logical, intent(in) :: polytropic + if (polytropic) then + call s_coeff(pres, rho, c, coeff) + else + call s_coeff_nonpoly(pres, rho, c, coeff) + end if + end subroutine s_coeff_selector - pure subroutine s_chyqmom(momin, wght, abscX, abscY) + pure subroutine s_chyqmom(momin, wght, abscX, abscY) #ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_chyqmom + !DIR$ INLINEALWAYS s_chyqmom #else - $:ROUTINE() + $:ROUTINE() #endif - real(wp), dimension(nmom), intent(in) :: momin - real(wp), dimension(nnode), intent(inout) :: wght, abscX, abscY - - real(wp), dimension(0:2, 0:2) :: moms - real(wp), dimension(3) :: M1, M3 - real(wp), dimension(2) :: myrho, myrho3, up, up3, Vf - real(wp) :: bu, bv, d20, d11, d_02, c20, c11, c02 - real(wp) :: mu2avg, mu2, vp21, vp22, rho21, rho22 - - moms(0, 0) = momin(1) - moms(1, 0) = momin(2) - moms(0, 1) = momin(3) - moms(2, 0) = momin(4) - moms(1, 1) = momin(5) - moms(0, 2) = momin(6) - - bu = moms(1, 0)/moms(0, 0) - bv = moms(0, 1)/moms(0, 0) - d20 = moms(2, 0)/moms(0, 0) - d11 = moms(1, 1)/moms(0, 0) - d_02 = moms(0, 2)/moms(0, 0) - - c20 = d20 - bu**2._wp; - c11 = d11 - bu*bv; - c02 = d_02 - bv**2._wp; - M1 = (/1._wp, 0._wp, c20/) - call s_hyqmom(myrho, up, M1) - Vf = c11*up/c20 - - mu2avg = c02 - sum(myrho(:)*(Vf(:)**2._wp)) - - mu2avg = maxval((/mu2avg, 0._wp/)) - mu2 = mu2avg - M3 = (/1._wp, 0._wp, mu2/) - call s_hyqmom(myrho3, up3, M3) - - vp21 = up3(1) - vp22 = up3(2) - rho21 = myrho3(1) - rho22 = myrho3(2) - - wght(1) = myrho(1)*rho21 - wght(2) = myrho(1)*rho22 - wght(3) = myrho(2)*rho21 - wght(4) = myrho(2)*rho22 - wght = moms(0, 0)*wght - - abscX(1) = up(1) - abscX(2) = up(1) - abscX(3) = up(2) - abscX(4) = up(2) - abscX = bu + abscX - - abscY(1) = Vf(1) + vp21 - abscY(2) = Vf(1) + vp22 - abscY(3) = Vf(2) + vp21 - abscY(4) = Vf(2) + vp22 - abscY = bv + abscY - - end subroutine s_chyqmom - - pure subroutine s_hyqmom(frho, fup, fmom) + real(wp), dimension(nmom), intent(in) :: momin + real(wp), dimension(nnode), intent(inout) :: wght, abscX, abscY + + ! Local variables + real(wp), dimension(0:2, 0:2) :: moms + real(wp), dimension(3) :: M1, M3 + real(wp), dimension(2) :: myrho, myrho3, up, up3, Vf + real(wp) :: bu, bv, d20, d11, d_02, c20, c11, c02 + real(wp) :: mu2, vp21, vp22, rho21, rho22 + + ! Assign moments to 2D array for clarity + moms(0, 0) = momin(1) + moms(1, 0) = momin(2) + moms(0, 1) = momin(3) + moms(2, 0) = momin(4) + moms(1, 1) = momin(5) + moms(0, 2) = momin(6) + + ! Compute means and central moments + bu = moms(1, 0)/moms(0, 0) + bv = moms(0, 1)/moms(0, 0) + d20 = moms(2, 0)/moms(0, 0) + d11 = moms(1, 1)/moms(0, 0) + d_02 = moms(0, 2)/moms(0, 0) + + c20 = d20 - bu**2._wp + c11 = d11 - bu*bv + c02 = d_02 - bv**2._wp + + ! First 1D quadrature (X direction) + M1 = (/1._wp, 0._wp, c20/) + call s_hyqmom(myrho, up, M1) + Vf = c11*up/c20 + + ! Second 1D quadrature (Y direction, conditional on X) + mu2 = max(0._wp, c02 - sum(myrho*(Vf**2._wp))) + M3 = (/1._wp, 0._wp, mu2/) + call s_hyqmom(myrho3, up3, M3) + + ! Assign roots and weights for 2D quadrature + vp21 = up3(1) + vp22 = up3(2) + rho21 = myrho3(1) + rho22 = myrho3(2) + + ! Compute weights (vectorized) + wght = moms(0, 0)*[myrho(1)*rho21, myrho(1)*rho22, myrho(2)*rho21, myrho(2)*rho22] + + ! Compute abscissas (vectorized) + abscX = bu + [up(1), up(1), up(2), up(2)] + abscY = bv + [Vf(1) + vp21, Vf(1) + vp22, Vf(2) + vp21, Vf(2) + vp22] + + end subroutine s_chyqmom + + pure subroutine s_hyqmom(frho, fup, fmom) #ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_hyqmom + !DIR$ INLINEALWAYS s_hyqmom #else - $:ROUTINE() + $:ROUTINE() #endif - real(wp), dimension(2), intent(inout) :: frho, fup - real(wp), dimension(3), intent(in) :: fmom + real(wp), dimension(2), intent(inout) :: frho, fup + real(wp), dimension(3), intent(in) :: fmom - real(wp) :: bu, d2, c2 + real(wp) :: bu, d2, c2 - bu = fmom(2)/fmom(1) - d2 = fmom(3)/fmom(1) - c2 = d2 - bu**2._wp - frho(1) = fmom(1)/2._wp; - frho(2) = fmom(1)/2._wp; - c2 = maxval((/c2, verysmall/)) - fup(1) = bu - sqrt(c2) - fup(2) = bu + sqrt(c2) + bu = fmom(2)/fmom(1) + d2 = fmom(3)/fmom(1) + c2 = d2 - bu**2._wp + frho(1) = fmom(1)/2._wp; + frho(2) = fmom(1)/2._wp; + c2 = maxval((/c2, verysmall/)) + fup(1) = bu - sqrt(c2) + fup(2) = bu + sqrt(c2) - end subroutine s_hyqmom + end subroutine s_hyqmom - pure function f_quad(abscX, abscY, wght_in, q, r, s) - $:ROUTINE() - real(wp), dimension(nnode, nb), intent(in) :: abscX, abscY, wght_in - real(wp), intent(in) :: q, r, s + pure function f_quad(abscX, abscY, wght_in, q, r, s) + $:ROUTINE() + real(wp), dimension(nnode, nb), intent(in) :: abscX, abscY, wght_in + real(wp), intent(in) :: q, r, s - real(wp) :: f_quad_RV, f_quad - integer :: i + real(wp) :: f_quad_RV, f_quad + integer :: i - f_quad = 0._wp - do i = 1, nb - f_quad_RV = sum(wght_in(:, i)*(abscX(:, i)**q)*(abscY(:, i)**r)) - f_quad = f_quad + weight(i)*(R0(i)**s)*f_quad_RV - end do + f_quad = 0._wp + do i = 1, nb + f_quad_RV = sum(wght_in(:, i)*(abscX(:, i)**q)*(abscY(:, i)**r)) + f_quad = f_quad + weight(i)*(R0(i)**s)*f_quad_RV + end do - end function f_quad + end function f_quad - pure function f_quad2D(abscX, abscY, wght_in, pow) - $:ROUTINE() - real(wp), dimension(nnode), intent(in) :: abscX, abscY, wght_in - real(wp), dimension(3), intent(in) :: pow + pure function f_quad2D(abscX, abscY, wght_in, pow) + $:ROUTINE() + real(wp), dimension(nnode), intent(in) :: abscX, abscY, wght_in + real(wp), dimension(3), intent(in) :: pow + + real(wp) :: f_quad2D - real(wp) :: f_quad2D + f_quad2D = sum(wght_in(:)*(abscX(:)**pow(1))*(abscY(:)**pow(2))) + end function f_quad2D - f_quad2D = sum(wght_in(:)*(abscX(:)**pow(1))*(abscY(:)**pow(2))) - end function f_quad2D + end subroutine s_mom_inv end module m_qbmm From 2914bed6bb7f2fe0895746494a208fd0463ae3d4 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Wed, 4 Jun 2025 21:52:51 -0400 Subject: [PATCH 24/75] Fix packer compare message (#857) --- toolchain/mfc/packer/packer.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/toolchain/mfc/packer/packer.py b/toolchain/mfc/packer/packer.py index 98555d8477..0ae6671e45 100644 --- a/toolchain/mfc/packer/packer.py +++ b/toolchain/mfc/packer/packer.py @@ -41,7 +41,10 @@ def packer(): if err is not None: raise MFCException(err) elif ARG("packer") == "compare": - cons.print(f"Comparing [magenta]{os.path.relpath(ARG('input1'))}[/magenta] to [magenta]{os.path.relpath(ARG('input1'))}[/magenta]:") + cons.print( + f"Comparing [magenta]{os.path.relpath(ARG('input1'))}[/magenta] to " + f"[magenta]{os.path.relpath(ARG('input2'))}[/magenta]:" + ) cons.indent() cons.print() From d9656a78bf8a0a0df292c81ae613a67c0d7f404c Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Thu, 12 Jun 2025 16:08:26 -0400 Subject: [PATCH 25/75] Fixed some OpenACC directives (#859) --- src/common/m_finite_differences.fpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/common/m_finite_differences.fpp b/src/common/m_finite_differences.fpp index 18a0a84484..80db1d8c79 100644 --- a/src/common/m_finite_differences.fpp +++ b/src/common/m_finite_differences.fpp @@ -18,8 +18,7 @@ contains integer :: x, y, z !< Generic loop iterators real(wp) :: divergence - - $:PARALLEL_LOOP(collapse=3, private=["divergence"]) + $:PARALLEL_LOOP(collapse=3, gang vector default(present) private=["divergence"]) do x = ix_s%beg, ix_s%end do y = iy_s%beg, iy_s%end do z = iz_s%beg, iz_s%end From 6a348458cc1b585f75dda118425906d3f418ed13 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Fri, 6 Jun 2025 17:48:36 -0400 Subject: [PATCH 26/75] cmake flags (#862) --- CMakeLists.txt | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a5835bd24e..8fa0731f75 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -135,13 +135,18 @@ if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") if (CMAKE_BUILD_TYPE STREQUAL "Debug") add_compile_options( -Wall + -Wextra + -Wintrinsic-shadow + -Wunderflow + -Wrealloc-lhs + -Wsurprising -fcheck=all,no-array-temps -fbacktrace -fimplicit-none - #-ffpe-trap=invalid,zero,denormal,overflow -fsignaling-nans -finit-real=snan -finit-integer=-99999999 + #-ffpe-trap=invalid,zero,denormal,overflow ) endif() From 217817fcbe5c7b9a44112fd9bc1ece6f0227db16 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Fri, 6 Jun 2025 20:42:17 -0400 Subject: [PATCH 27/75] Benchmarking news a new temp directory and there ya go --- .github/workflows/phoenix/bench.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/phoenix/bench.sh b/.github/workflows/phoenix/bench.sh index 0c2ed163eb..8812e00e3b 100644 --- a/.github/workflows/phoenix/bench.sh +++ b/.github/workflows/phoenix/bench.sh @@ -8,8 +8,13 @@ if [ "$job_device" == "gpu" ]; then device_opts="--gpu -g $gpu_ids" fi +mkdir -p /storage/scratch1/6/sbryngelson3/mytmp_build +export TMPDIR=/storage/scratch1/6/sbryngelson3/mytmp_build + if ["$job_device" == "gpu"]; then ./mfc.sh bench --mem 12 -j $(nproc) -o "$job_slug.yaml" -- -c phoenix-bench $device_opts -n $n_ranks else ./mfc.sh bench --mem 1 -j $(nproc) -o "$job_slug.yaml" -- -c phoenix-bench $device_opts -n $n_ranks fi + +unset TMPDIR From b443875fcb88d70b0731fa24dc6671009bacd05e Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Sat, 7 Jun 2025 00:28:57 -0400 Subject: [PATCH 28/75] refac qbmm (#861) --- CMakeLists.txt | 9 ++++----- src/simulation/m_qbmm.fpp | 3 +-- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8fa0731f75..d486d43981 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -136,17 +136,16 @@ if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") add_compile_options( -Wall -Wextra - -Wintrinsic-shadow - -Wunderflow - -Wrealloc-lhs - -Wsurprising -fcheck=all,no-array-temps -fbacktrace -fimplicit-none -fsignaling-nans -finit-real=snan -finit-integer=-99999999 - #-ffpe-trap=invalid,zero,denormal,overflow + -Wintrinsic-shadow + -Wunderflow + -Wrealloc-lhs + -Wsurprising ) endif() diff --git a/src/simulation/m_qbmm.fpp b/src/simulation/m_qbmm.fpp index c5338bfed0..3136146b4d 100644 --- a/src/simulation/m_qbmm.fpp +++ b/src/simulation/m_qbmm.fpp @@ -563,8 +563,7 @@ contains end subroutine s_compute_qbmm_rhs -!Coefficient array for non-polytropic model (pb and mv values are accounted in wght_pb and wght_mv) - + !Coefficient array for non-polytropic model (pb and mv values are accounted in wght_pb and wght_mv) pure subroutine s_coeff_nonpoly(pres, rho, c, coeffs) #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_coeff_nonpoly From ffb479a25c1e970a24d68f12a270de75274ded46 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Thu, 12 Jun 2025 16:10:42 -0400 Subject: [PATCH 29/75] Cody tidying - Remove unused dummy variables (#854) Co-authored-by: Xuzheng Tian Co-authored-by: Spencer Bryngelson Co-authored-by: Spencer Bryngelson --- src/common/m_boundary_common.fpp | 5 +- src/common/m_mpi_common.fpp | 7 +-- src/common/m_phase_change.fpp | 9 ++-- src/common/m_variables_conversion.fpp | 40 +++++--------- src/pre_process/m_boundary_conditions.fpp | 15 +++--- src/pre_process/m_model.fpp | 16 ++---- src/pre_process/m_patches.fpp | 17 +++--- src/pre_process/m_start_up.fpp | 5 +- src/pre_process/p_main.f90 | 2 +- src/simulation/m_bubbles.fpp | 2 +- src/simulation/m_bubbles_EE.fpp | 3 +- src/simulation/m_bubbles_EL.fpp | 13 ++--- src/simulation/m_cbc.fpp | 21 +++----- src/simulation/m_compute_cbc.fpp | 17 ++---- src/simulation/m_data_output.fpp | 8 +-- src/simulation/m_hyperelastic.fpp | 2 +- src/simulation/m_ibm.fpp | 8 +-- src/simulation/m_qbmm.fpp | 6 +-- src/simulation/m_rhs.fpp | 19 +++---- src/simulation/m_riemann_solvers.fpp | 66 +++++++---------------- src/simulation/m_sim_helpers.fpp | 6 +-- src/simulation/m_start_up.fpp | 15 ++---- src/simulation/m_surface_tension.fpp | 9 ++-- src/simulation/m_time_steppers.fpp | 18 +++---- src/simulation/m_viscous.fpp | 12 ++--- src/simulation/m_weno.fpp | 8 ++- src/simulation/p_main.fpp | 11 ++-- 27 files changed, 124 insertions(+), 236 deletions(-) diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index f793c94e4d..d9f6094600 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -143,7 +143,7 @@ contains case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) call s_ghost_cell_extrapolation(q_prim_vf, pb, mv, 2, -1, k, l) case (BC_AXIS) - call s_axis(q_prim_vf, pb, mv, 2, -1, k, l) + call s_axis(q_prim_vf, pb, mv, k, l) case (BC_REFLECTIVE) call s_symmetry(q_prim_vf, pb, mv, 2, -1, k, l) case (BC_PERIODIC) @@ -714,7 +714,7 @@ contains end subroutine s_periodic - pure subroutine s_axis(q_prim_vf, pb, mv, bc_dir, bc_loc, k, l) + pure subroutine s_axis(q_prim_vf, pb, mv, k, l) #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_axis #else @@ -722,7 +722,6 @@ contains #endif type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv - integer, intent(in) :: bc_dir, bc_loc integer, intent(in) :: k, l integer :: j, q, i diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index 2d1b079467..1fdc19aa0d 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -376,21 +376,17 @@ contains !! @param Rc_min_glb Global minimum Rc stability criterion impure subroutine s_mpi_reduce_stability_criteria_extrema(icfl_max_loc, & vcfl_max_loc, & - ccfl_max_loc, & Rc_min_loc, & icfl_max_glb, & vcfl_max_glb, & - ccfl_max_glb, & Rc_min_glb) real(wp), intent(in) :: icfl_max_loc real(wp), intent(in) :: vcfl_max_loc - real(wp), intent(in) :: ccfl_max_loc real(wp), intent(in) :: Rc_min_loc real(wp), intent(out) :: icfl_max_glb real(wp), intent(out) :: vcfl_max_glb - real(wp), intent(out) :: ccfl_max_glb real(wp), intent(out) :: Rc_min_glb #ifdef MFC_SIMULATION @@ -1179,7 +1175,8 @@ contains #:endif end if #:endfor - + p_send => buff_send(0) + p_recv => buff_recv(0) ! Send/Recv #ifdef MFC_SIMULATION #:for rdma_mpi in [False, True] diff --git a/src/common/m_phase_change.fpp b/src/common/m_phase_change.fpp index 7725743e78..5bbb8c4e8c 100644 --- a/src/common/m_phase_change.fpp +++ b/src/common/m_phase_change.fpp @@ -147,7 +147,7 @@ contains ! Calling pT-equilibrium for either finishing phase-change module, or as an IC for the pTg-equilibrium ! for this case, MFL cannot be either 0 or 1, so I chose it to be 2 - call s_infinite_pt_relaxation_k(j, k, l, 2, pS, p_infpT, rM, q_cons_vf, rhoe, TS) + call s_infinite_pt_relaxation_k(j, k, l, 2, pS, p_infpT, q_cons_vf, rhoe, TS) ! check if pTg-equilibrium is required ! NOTE that NOTHING else needs to be updated OTHER than the individual partial densities @@ -168,7 +168,7 @@ contains q_cons_vf(vp + contxb - 1)%sf(j, k, l) = (1.0_wp - mixM)*rM ! calling pT-equilibrium for overheated vapor, which is MFL = 0 - call s_infinite_pt_relaxation_k(j, k, l, 0, pSOV, p_infOV, rM, q_cons_vf, rhoe, TSOV) + call s_infinite_pt_relaxation_k(j, k, l, 0, pSOV, p_infOV, q_cons_vf, rhoe, TSOV) ! calculating Saturation temperature call s_TSat(pSOV, TSatOV, TSOV) @@ -181,7 +181,7 @@ contains q_cons_vf(vp + contxb - 1)%sf(j, k, l) = mixM*rM ! calling pT-equilibrium for subcooled liquid, which is MFL = 1 - call s_infinite_pt_relaxation_k(j, k, l, 1, pSSL, p_infSL, rM, q_cons_vf, rhoe, TSSL) + call s_infinite_pt_relaxation_k(j, k, l, 1, pSSL, p_infSL, q_cons_vf, rhoe, TSSL) ! calculating Saturation temperature call s_TSat(pSSL, TSatSL, TSSL) @@ -285,7 +285,7 @@ contains !! @param q_cons_vf Cell-average conservative variables !! @param rhoe mixture energy !! @param TS equilibrium temperature at the interface - pure subroutine s_infinite_pt_relaxation_k(j, k, l, MFL, pS, p_infpT, rM, q_cons_vf, rhoe, TS) + pure subroutine s_infinite_pt_relaxation_k(j, k, l, MFL, pS, p_infpT, q_cons_vf, rhoe, TS) #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_infinite_pt_relaxation_k @@ -297,7 +297,6 @@ contains integer, intent(in) :: j, k, l, MFL real(wp), intent(out) :: pS real(wp), dimension(num_fluids), intent(out) :: p_infpT - real(wp), intent(in) :: rM type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf real(wp), intent(in) :: rhoe real(wp), intent(out) :: TS diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp index 796dcb5003..a23b5ff2ae 100644 --- a/src/common/m_variables_conversion.fpp +++ b/src/common/m_variables_conversion.fpp @@ -92,11 +92,11 @@ contains if (model_eqns == 1) then ! Gamma/pi_inf model call s_convert_mixture_to_mixture_variables(q_vf, i, j, k, & - rho, gamma, pi_inf, qv, Re_K, G_K, G) + rho, gamma, pi_inf, qv) else if (bubbles_euler) then call s_convert_species_to_mixture_variables_bubbles(q_vf, i, j, k, & - rho, gamma, pi_inf, qv, Re_K, G_K, G) + rho, gamma, pi_inf, qv, Re_K) else ! Volume fraction model call s_convert_species_to_mixture_variables(q_vf, i, j, k, & @@ -207,7 +207,7 @@ contains !! @param pi_inf liquid stiffness !! @param qv fluid reference energy subroutine s_convert_mixture_to_mixture_variables(q_vf, i, j, k, & - rho, gamma, pi_inf, qv, Re_K, G_K, G) + rho, gamma, pi_inf, qv) type(scalar_field), dimension(sys_size), intent(in) :: q_vf integer, intent(in) :: i, j, k @@ -217,11 +217,6 @@ contains real(wp), intent(out), target :: pi_inf real(wp), intent(out), target :: qv - real(wp), optional, dimension(2), intent(out) :: Re_K - - real(wp), optional, intent(out) :: G_K - real(wp), optional, dimension(num_fluids), intent(in) :: G - ! Transferring the density, the specific heat ratio function and the ! liquid stiffness function, respectively rho = q_vf(1)%sf(i, j, k) @@ -254,7 +249,7 @@ contains !! @param pi_inf liquid stiffness !! @param qv fluid reference energy subroutine s_convert_species_to_mixture_variables_bubbles(q_vf, j, k, l, & - rho, gamma, pi_inf, qv, Re_K, G_K, G) + rho, gamma, pi_inf, qv, Re_K) type(scalar_field), dimension(sys_size), intent(in) :: q_vf @@ -266,8 +261,6 @@ contains real(wp), intent(out), target :: qv real(wp), optional, dimension(2), intent(out) :: Re_K - real(wp), optional, intent(out) :: G_K - real(wp), optional, dimension(num_fluids), intent(in) :: G integer :: i, q real(wp), dimension(num_fluids) :: alpha_rho_K, alpha_K @@ -464,7 +457,7 @@ contains pure subroutine s_convert_species_to_mixture_variables_acc(rho_K, & gamma_K, pi_inf_K, qv_K, & - alpha_K, alpha_rho_K, Re_K, k, l, r, & + alpha_K, alpha_rho_K, Re_K, & G_K, G) #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_convert_species_to_mixture_variables_acc @@ -481,8 +474,6 @@ contains real(wp), optional, intent(out) :: G_K real(wp), optional, dimension(num_fluids), intent(in) :: G - integer, intent(in) :: k, l, r - integer :: i, j !< Generic loop iterators real(wp) :: alpha_K_sum @@ -548,7 +539,7 @@ contains pure subroutine s_convert_species_to_mixture_variables_bubbles_acc(rho_K, & gamma_K, pi_inf_K, qv_K, & - alpha_K, alpha_rho_K, Re_K, k, l, r) + alpha_K, alpha_rho_K, Re_K) #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_convert_species_to_mixture_variables_bubbles_acc #else @@ -561,7 +552,6 @@ contains !! Partial densities and volume fractions real(wp), dimension(2), intent(out) :: Re_K - integer, intent(in) :: k, l, r integer :: i, j !< Generic loop iterators @@ -820,16 +810,12 @@ contains subroutine s_convert_conservative_to_primitive_variables(qK_cons_vf, & q_T_sf, & qK_prim_vf, & - ibounds, & - gm_alphaK_vf) + ibounds) type(scalar_field), dimension(sys_size), intent(in) :: qK_cons_vf type(scalar_field), intent(inout) :: q_T_sf type(scalar_field), dimension(sys_size), intent(inout) :: qK_prim_vf type(int_bounds_info), dimension(1:3), intent(in) :: ibounds - type(scalar_field), & - allocatable, optional, dimension(:), & - intent(in) :: gm_alphaK_vf real(wp), dimension(num_fluids) :: alpha_K, alpha_rho_K real(wp), dimension(2) :: Re_K @@ -903,13 +889,13 @@ contains ! If in simulation, use acc mixture subroutines if (elasticity) then call s_convert_species_to_mixture_variables_acc(rho_K, gamma_K, pi_inf_K, qv_K, alpha_K, & - alpha_rho_K, Re_K, j, k, l, G_K, Gs) + alpha_rho_K, Re_K, G_K, Gs) else if (bubbles_euler) then call s_convert_species_to_mixture_variables_bubbles_acc(rho_K, gamma_K, pi_inf_K, qv_K, & - alpha_K, alpha_rho_K, Re_K, j, k, l) + alpha_K, alpha_rho_K, Re_K) else call s_convert_species_to_mixture_variables_acc(rho_K, gamma_K, pi_inf_K, qv_K, & - alpha_K, alpha_rho_K, Re_K, j, k, l) + alpha_K, alpha_rho_K, Re_K) end if #else ! If pre-processing, use non acc mixture subroutines @@ -1511,13 +1497,13 @@ contains if (elasticity) then call s_convert_species_to_mixture_variables_acc(rho_K, gamma_K, pi_inf_K, qv_K, & alpha_K, alpha_rho_K, Re_K, & - j, k, l, G_K, Gs) + G_K, Gs) else if (bubbles_euler) then call s_convert_species_to_mixture_variables_bubbles_acc(rho_K, gamma_K, & - pi_inf_K, qv_K, alpha_K, alpha_rho_K, Re_K, j, k, l) + pi_inf_K, qv_K, alpha_K, alpha_rho_K, Re_K) else call s_convert_species_to_mixture_variables_acc(rho_K, gamma_K, pi_inf_K, qv_K, & - alpha_K, alpha_rho_K, Re_K, j, k, l) + alpha_K, alpha_rho_K, Re_K) end if ! Computing the energy from the pressure diff --git a/src/pre_process/m_boundary_conditions.fpp b/src/pre_process/m_boundary_conditions.fpp index 1e8f160b38..4fc2e5ff03 100644 --- a/src/pre_process/m_boundary_conditions.fpp +++ b/src/pre_process/m_boundary_conditions.fpp @@ -29,9 +29,8 @@ module m_boundary_conditions s_write_parallel_boundary_condition_files contains - impure subroutine s_line_segment_bc(patch_id, q_prim_vf, bc_type) + impure subroutine s_line_segment_bc(patch_id, bc_type) - type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf type(integer_field), dimension(1:num_dims, -1:1), intent(inout) :: bc_type integer, intent(in) :: patch_id @@ -79,9 +78,8 @@ contains end subroutine s_line_segment_bc - impure subroutine s_circle_bc(patch_id, q_prim_vf, bc_type) + impure subroutine s_circle_bc(patch_id, bc_type) - type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf type(integer_field), dimension(1:num_dims, -1:1), intent(inout) :: bc_type integer, intent(in) :: patch_id @@ -143,9 +141,8 @@ contains end subroutine s_circle_bc - impure subroutine s_rectangle_bc(patch_id, q_prim_vf, bc_type) + impure subroutine s_rectangle_bc(patch_id, bc_type) - type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf type(integer_field), dimension(1:num_dims, -1:1), intent(inout) :: bc_type integer, intent(in) :: patch_id @@ -247,9 +244,9 @@ contains end if if (patch_bc(i)%geometry == 2) then - call s_circle_bc(i, q_prim_vf, bc_type) + call s_circle_bc(i, bc_type) elseif (patch_bc(i)%geometry == 3) then - call s_rectangle_bc(i, q_prim_vf, bc_type) + call s_rectangle_bc(i, bc_type) end if end do !< Apply 1D patches to 2D domain @@ -260,7 +257,7 @@ contains end if if (patch_bc(i)%geometry == 1) then - call s_line_segment_bc(i, q_prim_vf, bc_type) + call s_line_segment_bc(i, bc_type) end if end do end if diff --git a/src/pre_process/m_model.fpp b/src/pre_process/m_model.fpp index 214a5fcda1..852cc1a22b 100644 --- a/src/pre_process/m_model.fpp +++ b/src/pre_process/m_model.fpp @@ -1104,11 +1104,10 @@ contains !! @param point The cell centers of the current levelset cell !! @param spacing Dimensions of the current levelset cell !! @return Distance which the levelset distance without interpolation - pure function f_distance(boundary_v, boundary_vertex_count, boundary_edge_count, point, spacing) result(distance) - integer, intent(in) :: boundary_vertex_count, boundary_edge_count + pure function f_distance(boundary_v, boundary_edge_count, point) result(distance) + integer, intent(in) :: boundary_edge_count real(wp), intent(in), dimension(1:boundary_edge_count, 1:3, 1:2) :: boundary_v t_vec3, intent(in) :: point - t_vec3, intent(in) :: spacing integer :: i real(wp) :: dist_buffer1, dist_buffer2 @@ -1132,16 +1131,13 @@ contains !> This procedure determines the levelset normals of 2D models without interpolation. !! @param boundary_v Group of all the boundary vertices of the 2D model without interpolation - !! @param boundary_vertex_count Output the total number of boundary vertices !! @param boundary_edge_count Output the total number of boundary edges !! @param point The cell centers of the current levelset cell - !! @param spacing Dimensions of the current levelset cell !! @param normals Output levelset normals without interpolation - pure subroutine f_normals(boundary_v, boundary_vertex_count, boundary_edge_count, point, spacing, normals) - integer, intent(in) :: boundary_vertex_count, boundary_edge_count + pure subroutine f_normals(boundary_v, boundary_edge_count, point, normals) + integer, intent(in) :: boundary_edge_count real(wp), intent(in), dimension(1:boundary_edge_count, 1:3, 1:2) :: boundary_v t_vec3, intent(in) :: point - t_vec3, intent(in) :: spacing t_vec3, intent(out) :: normals integer :: i, idx_buffer @@ -1176,13 +1172,11 @@ contains !! @param interpolated_boundary_v Group of all the boundary vertices of the interpolated 2D model !! @param total_vertices Total number of vertices after interpolation !! @param point The cell centers of the current levelset cell - !! @param spacing Dimensions of the current levelset cell !! @return Distance which the levelset distance without interpolation - pure function f_interpolated_distance(interpolated_boundary_v, total_vertices, point, spacing) result(distance) + pure function f_interpolated_distance(interpolated_boundary_v, total_vertices, point) result(distance) integer, intent(in) :: total_vertices real(wp), intent(in), dimension(1:total_vertices, 1:3) :: interpolated_boundary_v t_vec3, intent(in) :: point - t_vec3, intent(in) :: spacing integer :: i !< Loop iterator real(wp) :: dist_buffer, min_dist diff --git a/src/pre_process/m_patches.fpp b/src/pre_process/m_patches.fpp index 584048aa02..fe861c4c06 100644 --- a/src/pre_process/m_patches.fpp +++ b/src/pre_process/m_patches.fpp @@ -2300,8 +2300,7 @@ contains if (interpolate) then STL_levelset%sf(i, j, k, patch_id) = f_interpolated_distance(interpolated_boundary_v, & total_vertices, & - point, & - (/dx, dy, dz/)) + point) else STL_levelset%sf(i, j, k, patch_id) = distance end if @@ -2324,15 +2323,12 @@ contains ! Get the shortest distance between the cell center and the model boundary STL_levelset%sf(i, j, 0, patch_id) = f_interpolated_distance(interpolated_boundary_v, & total_vertices, & - point, & - (/dx, dy, dz/)) + point) else ! Get the shortest distance between the cell center and the interpolated model boundary STL_levelset%sf(i, j, 0, patch_id) = f_distance(boundary_v, & - boundary_vertex_count, & boundary_edge_count, & - point, & - (/dx, dy, dz/)) + point) end if ! Correct the sign of the levelset @@ -2342,10 +2338,9 @@ contains ! Get the boundary normals call f_normals(boundary_v, & - boundary_vertex_count, & - boundary_edge_count, & - point, & - & (/dx, dy, dz/), normals) + boundary_edge_count, & + point, & + normals) ! Correct the sign of the levelset_norm if (patch_id_fp(i, j, k) == 0) then diff --git a/src/pre_process/m_start_up.fpp b/src/pre_process/m_start_up.fpp index 1c9c79d342..e33491576f 100644 --- a/src/pre_process/m_start_up.fpp +++ b/src/pre_process/m_start_up.fpp @@ -826,12 +826,9 @@ contains end subroutine s_read_grid - impure subroutine s_apply_initial_condition(start, finish, proc_time, time_avg, time_final, file_exists) + impure subroutine s_apply_initial_condition(start, finish) real(wp), intent(inout) :: start, finish - real(wp), dimension(:), intent(inout) :: proc_time - real(wp), intent(inout) :: time_avg, time_final - logical, intent(inout) :: file_exists ! Setting up the grid and the initial condition. If the grid is read in from ! preexisting grid data files, it is checked for consistency. If the grid is diff --git a/src/pre_process/p_main.f90 b/src/pre_process/p_main.f90 index 56e6ab928f..4ce377f5cb 100644 --- a/src/pre_process/p_main.f90 +++ b/src/pre_process/p_main.f90 @@ -28,7 +28,7 @@ program p_main allocate (proc_time(0:num_procs - 1)) - call s_apply_initial_condition(start, finish, proc_time, time_avg, time_final, file_exists) + call s_apply_initial_condition(start, finish) time_avg = abs(finish - start) diff --git a/src/simulation/m_bubbles.fpp b/src/simulation/m_bubbles.fpp index 11a7c075d2..54fc7fcc47 100644 --- a/src/simulation/m_bubbles.fpp +++ b/src/simulation/m_bubbles.fpp @@ -70,7 +70,7 @@ contains else if (bubble_model == 3) then ! Rayleigh-Plesset bubbles fCpbw = f_cpbw_KM(fR0, fR, fV, fpb) - f_rddot = f_rddot_RP(fP, fRho, fR, fV, fR0, fCpbw) + f_rddot = f_rddot_RP(fP, fRho, fR, fV, fCpbw) end if end function f_rddot diff --git a/src/simulation/m_bubbles_EE.fpp b/src/simulation/m_bubbles_EE.fpp index 714f95913c..d6c2b0212c 100644 --- a/src/simulation/m_bubbles_EE.fpp +++ b/src/simulation/m_bubbles_EE.fpp @@ -154,10 +154,9 @@ contains !! that are needed for the bubble modeling !! @param q_prim_vf Primitive variables !! @param q_cons_vf Conservative variables - impure subroutine s_compute_bubble_EE_source(q_cons_vf, q_prim_vf, t_step, rhs_vf) + impure subroutine s_compute_bubble_EE_source(q_cons_vf, q_prim_vf, rhs_vf) type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf - integer, intent(in) :: t_step type(scalar_field), dimension(sys_size), intent(inout) :: rhs_vf real(wp) :: rddot diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index a1746ddac6..c455426180 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -503,12 +503,10 @@ contains !! @param rhs_vf Calculated change of conservative variables !! @param t_step Current time step !! @param stage Current stage in the time-stepper algorithm - subroutine s_compute_bubble_EL_dynamics(q_cons_vf, q_prim_vf, t_step, rhs_vf, stage) + subroutine s_compute_bubble_EL_dynamics(q_prim_vf, stage) - type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf - type(scalar_field), dimension(sys_size), intent(inout) :: rhs_vf - integer, intent(in) :: t_step, stage + integer, intent(in) :: stage real(wp) :: myVapFlux real(wp) :: preterm1, term2, paux, pint, Romega, term1_fac @@ -582,8 +580,8 @@ contains myalpha(i) = q_prim_vf(E_idx + i)%sf(cell(1), cell(2), cell(3)) end do call s_convert_species_to_mixture_variables_acc(myRho, gamma, pi_inf, qv, myalpha, & - myalpha_rho, Re, cell(1), cell(2), cell(3)) - call s_compute_cson_from_pinf(k, q_prim_vf, myPinf, cell, myRho, gamma, pi_inf, myCson) + myalpha_rho, Re) + call s_compute_cson_from_pinf(q_prim_vf, myPinf, cell, myRho, gamma, pi_inf, myCson) ! Adaptive time stepping adap_dt_stop = 0 @@ -740,13 +738,12 @@ contains !! @param gamma Liquid specific heat ratio !! @param pi_inf Liquid stiffness !! @param cson Calculated speed of sound - pure subroutine s_compute_cson_from_pinf(bub_id, q_prim_vf, pinf, cell, rhol, gamma, pi_inf, cson) + pure subroutine s_compute_cson_from_pinf(q_prim_vf, pinf, cell, rhol, gamma, pi_inf, cson) #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_cson_from_pinf #else $:ROUTINE() #endif - integer, intent(in) :: bub_id type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf real(wp), intent(in) :: pinf, rhol, gamma, pi_inf integer, dimension(3), intent(in) :: cell diff --git a/src/simulation/m_cbc.fpp b/src/simulation/m_cbc.fpp index e306717513..6b19251034 100644 --- a/src/simulation/m_cbc.fpp +++ b/src/simulation/m_cbc.fpp @@ -808,9 +808,9 @@ contains end do if (bubbles_euler) then - call s_convert_species_to_mixture_variables_bubbles_acc(rho, gamma, pi_inf, qv, adv, alpha_rho, Re_cbc, 0, k, r) + call s_convert_species_to_mixture_variables_bubbles_acc(rho, gamma, pi_inf, qv, adv, alpha_rho, Re_cbc) else - call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv, adv, alpha_rho, Re_cbc, 0, k, r) + call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv, adv, alpha_rho, Re_cbc) end if $:LOOP() @@ -920,13 +920,13 @@ contains if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_SLIP_WALL) .or. & (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_SLIP_WALL)) then - call s_compute_slip_wall_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds) + call s_compute_slip_wall_L(lambda, L, rho, c, dpres_ds, dvel_ds) else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_NR_SUB_BUFFER) .or. & (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_NR_SUB_BUFFER)) then call s_compute_nonreflecting_subsonic_buffer_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds, dYs_ds) else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_NR_SUB_INFLOW) .or. & (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_NR_SUB_INFLOW)) then - call s_compute_nonreflecting_subsonic_inflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds) + call s_compute_nonreflecting_subsonic_inflow_L(lambda, L, rho, c, dpres_ds, dvel_ds) ! Add GRCBC for Subsonic Inflow if (bc_${XYZ}$%grcbc_in) then $:LOOP() @@ -965,7 +965,7 @@ contains call s_compute_constant_pressure_subsonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds) else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_SUP_INFLOW) .or. & (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_SUP_INFLOW)) then - call s_compute_supersonic_inflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds) + call s_compute_supersonic_inflow_L(L) else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_SUP_OUTFLOW) .or. & (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_SUP_OUTFLOW)) then call s_compute_supersonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds, dYs_ds) @@ -1118,8 +1118,7 @@ contains ! The reshaping of outputted data and disssociation of the FD and PI ! coefficients, or CBC coefficients, respectively, based on selected ! CBC coordinate direction. - call s_finalize_cbc(flux_vf, flux_src_vf, & - ix, iy, iz) + call s_finalize_cbc(flux_vf, flux_src_vf) end subroutine s_cbc !> The computation of parameters, the allocation of memory, @@ -1406,18 +1405,12 @@ contains !! are necessary in order to finalize the CBC application !! @param flux_vf Cell-boundary-average fluxes !! @param flux_src_vf Cell-boundary-average flux sources - !! @param ix Index bound in the first coordinate direction - !! @param iy Index bound in the second coordinate direction - !! @param iz Index bound in the third coordinate direction - subroutine s_finalize_cbc(flux_vf, flux_src_vf, & - ix, iy, iz) + subroutine s_finalize_cbc(flux_vf, flux_src_vf) type(scalar_field), & dimension(sys_size), & intent(inout) :: flux_vf, flux_src_vf - type(int_bounds_info), intent(in) :: ix, iy, iz - integer :: i, j, k, r !< Generic loop iterators ! Determining the indicial shift based on CBC location diff --git a/src/simulation/m_compute_cbc.fpp b/src/simulation/m_compute_cbc.fpp index 23136a0198..421f4ab07a 100644 --- a/src/simulation/m_compute_cbc.fpp +++ b/src/simulation/m_compute_cbc.fpp @@ -24,7 +24,7 @@ contains !! Thompson (1990). At the slip wall (frictionless wall), !! the normal component of velocity is zero at all times, !! while the transverse velocities may be nonzero. - pure subroutine s_compute_slip_wall_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds) + pure subroutine s_compute_slip_wall_L(lambda, L, rho, c, dpres_ds, dvel_ds) #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_slip_wall_L #else @@ -33,10 +33,8 @@ contains real(wp), dimension(3), intent(in) :: lambda real(wp), dimension(sys_size), intent(inout) :: L real(wp), intent(in) :: rho, c - real(wp), dimension(num_fluids), intent(in) :: mf, dalpha_rho_ds real(wp), intent(in) :: dpres_ds real(wp), dimension(num_dims), intent(in) :: dvel_ds - real(wp), dimension(num_fluids), intent(in) :: dadv_ds integer :: i @@ -104,7 +102,7 @@ contains !! see pg. 455, Thompson (1990). This nonreflecting subsonic !! CBC assumes an incoming flow and reduces the amplitude of !! any reflections caused by outgoing waves. - pure subroutine s_compute_nonreflecting_subsonic_inflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds) + pure subroutine s_compute_nonreflecting_subsonic_inflow_L(lambda, L, rho, c, dpres_ds, dvel_ds) #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_nonreflecting_subsonic_inflow_L #else @@ -113,10 +111,8 @@ contains real(wp), dimension(3), intent(in) :: lambda real(wp), dimension(sys_size), intent(inout) :: L real(wp), intent(in) :: rho, c - real(wp), dimension(num_fluids), intent(in) :: mf, dalpha_rho_ds real(wp), intent(in) :: dpres_ds real(wp), dimension(num_dims), intent(in) :: dvel_ds - real(wp), dimension(num_fluids), intent(in) :: dadv_ds integer :: i @@ -264,19 +260,14 @@ contains !! state, or nearly a steady state, CBC in which only the !! transverse terms may generate a time dependence at the !! inflow boundary. - pure subroutine s_compute_supersonic_inflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds) + pure subroutine s_compute_supersonic_inflow_L(L) #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_supersonic_inflow_L #else $:ROUTINE() #endif - real(wp), dimension(3), intent(in) :: lambda real(wp), dimension(sys_size), intent(inout) :: L - real(wp), intent(in) :: rho, c - real(wp), dimension(num_fluids), intent(in) :: mf, dalpha_rho_ds - real(wp), intent(in) :: dpres_ds - real(wp), dimension(num_dims), intent(in) :: dvel_ds - real(wp), dimension(num_fluids), intent(in) :: dadv_ds + integer :: i do i = 1, advxe diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp index e8f4c92f06..fdf81d528a 100644 --- a/src/simulation/m_data_output.fpp +++ b/src/simulation/m_data_output.fpp @@ -99,7 +99,7 @@ contains if (.not. parallel_io) then call s_write_serial_data_files(q_cons_vf, q_T_sf, q_prim_vf, t_step, beta) else - call s_write_parallel_data_files(q_cons_vf, q_prim_vf, t_step, beta) + call s_write_parallel_data_files(q_cons_vf, t_step, beta) end if end subroutine s_write_data_files @@ -332,11 +332,9 @@ contains if (num_procs > 1) then call s_mpi_reduce_stability_criteria_extrema(icfl_max_loc, & vcfl_max_loc, & - ccfl_max_loc, & Rc_min_loc, & icfl_max_glb, & vcfl_max_glb, & - ccfl_max_glb, & Rc_min_glb) else icfl_max_glb = icfl_max_loc @@ -785,13 +783,11 @@ contains !> The goal of this subroutine is to output the grid and !! conservative variables data files for given time-step. !! @param q_cons_vf Cell-average conservative variables - !! @param q_prim_vf Cell-average primitive variables !! @param t_step Current time-step !! @param beta Eulerian void fraction from lagrangian bubbles - impure subroutine s_write_parallel_data_files(q_cons_vf, q_prim_vf, t_step, beta) + impure subroutine s_write_parallel_data_files(q_cons_vf, t_step, beta) type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf - type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf integer, intent(in) :: t_step type(scalar_field), intent(inout), optional :: beta diff --git a/src/simulation/m_hyperelastic.fpp b/src/simulation/m_hyperelastic.fpp index efe984ef05..994412a567 100644 --- a/src/simulation/m_hyperelastic.fpp +++ b/src/simulation/m_hyperelastic.fpp @@ -119,7 +119,7 @@ contains end do ! If in simulation, use acc mixture subroutines call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv, alpha_k, & - alpha_rho_k, Re, j, k, l, G, Gs) + alpha_rho_k, Re, G, Gs) rho = max(rho, sgm_eps) G = max(G, sgm_eps) !if ( G <= verysmall ) G_K = 0_wp diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index f4b83f9598..1db453b8a0 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -208,13 +208,13 @@ contains ! If in simulation, use acc mixture subroutines if (elasticity) then call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv_K, alpha_IP, & - alpha_rho_IP, Re_K, j, k, l, G_K, Gs) + alpha_rho_IP, Re_K, G_K, Gs) else if (bubbles_euler) then call s_convert_species_to_mixture_variables_bubbles_acc(rho, gamma, pi_inf, qv_K, alpha_IP, & - alpha_rho_IP, Re_K, j, k, l) + alpha_rho_IP, Re_K) else call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv_K, alpha_IP, & - alpha_rho_IP, Re_K, j, k, l) + alpha_rho_IP, Re_K) end if end if @@ -331,7 +331,7 @@ contains end if call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv_K, alpha_IP, & - alpha_rho_IP, Re_K, j, k, l) + alpha_rho_IP, Re_K) dyn_pres = 0._wp diff --git a/src/simulation/m_qbmm.fpp b/src/simulation/m_qbmm.fpp index 3136146b4d..72a0114865 100644 --- a/src/simulation/m_qbmm.fpp +++ b/src/simulation/m_qbmm.fpp @@ -412,14 +412,13 @@ contains end subroutine s_initialize_qbmm_module - pure subroutine s_compute_qbmm_rhs(idir, q_cons_vf, q_prim_vf, rhs_vf, flux_n_vf, pb, rhs_pb, mv, rhs_mv) + pure subroutine s_compute_qbmm_rhs(idir, q_cons_vf, q_prim_vf, rhs_vf, flux_n_vf, pb, rhs_pb) integer, intent(in) :: idir type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf, q_prim_vf type(scalar_field), dimension(sys_size), intent(inout) :: rhs_vf type(scalar_field), dimension(sys_size), intent(in) :: flux_n_vf real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, rhs_pb - real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: mv, rhs_mv integer :: i, j, k, l, q real(wp) :: nb_q, nb_dot, R, R2, nR, nR2, nR_dot, nR2_dot, var, AX @@ -700,7 +699,7 @@ contains end subroutine s_coeff - subroutine s_mom_inv(q_cons_vf, q_prim_vf, momsp, moms3d, pb, rhs_pb, mv, rhs_mv, ix, iy, iz, nbub_sc) + subroutine s_mom_inv(q_cons_vf, q_prim_vf, momsp, moms3d, pb, rhs_pb, mv, rhs_mv, ix, iy, iz) type(scalar_field), dimension(:), intent(inout) :: q_cons_vf, q_prim_vf type(scalar_field), dimension(:), intent(inout) :: momsp @@ -708,7 +707,6 @@ contains real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, rhs_pb real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: mv, rhs_mv type(int_bounds_info), intent(in) :: ix, iy, iz - real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:), intent(inout) :: nbub_sc real(wp), dimension(nmom) :: moms, msum real(wp), dimension(nnode, nb) :: wght, abscX, abscY, wght_pb, wght_mv, wght_ht, ht diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index c31095f6ca..846f04370a 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -670,8 +670,7 @@ contains q_cons_qp%vf, & q_T_sf, & q_prim_qp%vf, & - idwint, & - gm_alpha_qp%vf) + idwint) call nvtxEndRange call nvtxStartRange("RHS-COMMUNICATION") @@ -688,7 +687,7 @@ contains if (t_step == t_step_stop) return end if - if (qbmm) call s_mom_inv(q_cons_qp%vf, q_prim_qp%vf, mom_sp, mom_3d, pb, rhs_pb, mv, rhs_mv, idwbuff(1), idwbuff(2), idwbuff(3), nbub) + if (qbmm) call s_mom_inv(q_cons_qp%vf, q_prim_qp%vf, mom_sp, mom_3d, pb, rhs_pb, mv, rhs_mv, idwbuff(1), idwbuff(2), idwbuff(3)) if (viscous) then call nvtxStartRange("RHS-VISCOUS") @@ -853,9 +852,7 @@ contains rhs_vf, & flux_n(id)%vf, & pb, & - rhs_pb, & - mv, & - rhs_mv) + rhs_pb) call nvtxEndRange end if ! END: Additional physics and source terms @@ -899,7 +896,6 @@ contains call s_compute_bubble_EE_source( & q_cons_qp%vf(1:sys_size), & q_prim_qp%vf(1:sys_size), & - t_step, & rhs_vf) call nvtxEndRange end if @@ -916,10 +912,7 @@ contains if (.not. adap_dt) then call nvtxStartRange("RHS-EL-BUBBLES-DYN") call s_compute_bubble_EL_dynamics( & - q_cons_qp%vf(1:sys_size), & q_prim_qp%vf(1:sys_size), & - t_step, & - rhs_vf, & stage) call nvtxEndRange end if @@ -1957,19 +1950,19 @@ contains call s_weno(v_vf(iv%beg:iv%end), & vL_x(:, :, :, iv%beg:iv%end), vL_y(:, :, :, iv%beg:iv%end), vL_z(:, :, :, iv%beg:iv%end), vR_x(:, :, :, iv%beg:iv%end), vR_y(:, :, :, iv%beg:iv%end), vR_z(:, :, :, iv%beg:iv%end), & - norm_dir, weno_dir, & + weno_dir, & is1, is2, is3) else call s_weno(v_vf(iv%beg:iv%end), & vL_x(:, :, :, iv%beg:iv%end), vL_y(:, :, :, iv%beg:iv%end), vL_z(:, :, :, :), vR_x(:, :, :, iv%beg:iv%end), vR_y(:, :, :, iv%beg:iv%end), vR_z(:, :, :, :), & - norm_dir, weno_dir, & + weno_dir, & is1, is2, is3) end if else call s_weno(v_vf(iv%beg:iv%end), & vL_x(:, :, :, iv%beg:iv%end), vL_y(:, :, :, :), vL_z(:, :, :, :), vR_x(:, :, :, iv%beg:iv%end), vR_y(:, :, :, :), vR_z(:, :, :, :), & - norm_dir, weno_dir, & + weno_dir, & is1, is2, is3) end if diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp index 1f52925f00..6d0c7fb7d8 100644 --- a/src/simulation/m_riemann_solvers.fpp +++ b/src/simulation/m_riemann_solvers.fpp @@ -242,17 +242,14 @@ contains norm_dir, & ix, iy, iz) else - call s_compute_cartesian_viscous_source_flux(velL_vf, & - dvelL_dx_vf, & + call s_compute_cartesian_viscous_source_flux(dvelL_dx_vf, & dvelL_dy_vf, & dvelL_dz_vf, & - velR_vf, & dvelR_dx_vf, & dvelR_dy_vf, & dvelR_dz_vf, & flux_src_vf, & - norm_dir, & - ix, iy, iz) + norm_dir) end if end subroutine s_compute_viscous_source_flux @@ -346,19 +343,15 @@ contains qL_prim_rsx_vf, qL_prim_rsy_vf, qL_prim_rsz_vf, dqL_prim_dx_vf, & dqL_prim_dy_vf, & dqL_prim_dz_vf, & - qL_prim_vf, & qR_prim_rsx_vf, qR_prim_rsy_vf, qR_prim_rsz_vf, dqR_prim_dx_vf, & dqR_prim_dy_vf, & dqR_prim_dz_vf, & - qR_prim_vf, & norm_dir, ix, iy, iz) ! Reshaping inputted data based on dimensional splitting direction call s_initialize_riemann_solver( & - q_prim_vf, & - flux_vf, flux_src_vf, & - flux_gsrc_vf, & - norm_dir, ix, iy, iz) + flux_src_vf, & + norm_dir) #:for NORM_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (norm_dir == ${NORM_DIR}$) then @@ -1138,7 +1131,7 @@ contains call s_finalize_riemann_solver(flux_vf, flux_src_vf, & flux_gsrc_vf, & - norm_dir, ix, iy, iz) + norm_dir) end subroutine s_hll_riemann_solver @@ -1271,20 +1264,16 @@ contains qL_prim_rsx_vf, qL_prim_rsy_vf, qL_prim_rsz_vf, dqL_prim_dx_vf, & dqL_prim_dy_vf, & dqL_prim_dz_vf, & - qL_prim_vf, & qR_prim_rsx_vf, qR_prim_rsy_vf, qR_prim_rsz_vf, dqR_prim_dx_vf, & dqR_prim_dy_vf, & dqR_prim_dz_vf, & - qR_prim_vf, & norm_dir, ix, iy, iz) ! Reshaping inputted data based on dimensional splitting direction call s_initialize_riemann_solver( & - q_prim_vf, & - flux_vf, flux_src_vf, & - flux_gsrc_vf, & - norm_dir, ix, iy, iz) + flux_src_vf, & + norm_dir) idx1 = 1; if (dir_idx(1) == 2) idx1 = 2; if (dir_idx(1) == 3) idx1 = 3 @@ -2998,7 +2987,6 @@ contains if (surface_tension) then call s_compute_capilary_source_flux( & - q_prim_vf, & vel_src_rsx_vf, & vel_src_rsy_vf, & vel_src_rsz_vf, & @@ -3008,7 +2996,7 @@ contains call s_finalize_riemann_solver(flux_vf, flux_src_vf, & flux_gsrc_vf, & - norm_dir, ix, iy, iz) + norm_dir) end subroutine s_hllc_riemann_solver @@ -3068,13 +3056,13 @@ contains call s_populate_riemann_states_variables_buffers( & qL_prim_rsx_vf, qL_prim_rsy_vf, qL_prim_rsz_vf, dqL_prim_dx_vf, & - dqL_prim_dy_vf, dqL_prim_dz_vf, qL_prim_vf, & + dqL_prim_dy_vf, dqL_prim_dz_vf, & qR_prim_rsx_vf, qR_prim_rsy_vf, qR_prim_rsz_vf, dqR_prim_dx_vf, & - dqR_prim_dy_vf, dqR_prim_dz_vf, qR_prim_vf, & + dqR_prim_dy_vf, dqR_prim_dz_vf, & norm_dir, ix, iy, iz) call s_initialize_riemann_solver( & - q_prim_vf, flux_vf, flux_src_vf, flux_gsrc_vf, norm_dir, ix, iy, iz) + flux_src_vf, norm_dir) #:for NORM_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (norm_dir == ${NORM_DIR}$) then @@ -3317,7 +3305,7 @@ contains #:endfor call s_finalize_riemann_solver(flux_vf, flux_src_vf, flux_gsrc_vf, & - norm_dir, ix, iy, iz) + norm_dir) end subroutine s_hlld_riemann_solver !> The computation of parameters, the allocation of memory, @@ -3464,11 +3452,9 @@ contains qL_prim_rsx_vf, qL_prim_rsy_vf, qL_prim_rsz_vf, dqL_prim_dx_vf, & dqL_prim_dy_vf, & dqL_prim_dz_vf, & - qL_prim_vf, & qR_prim_rsx_vf, qR_prim_rsy_vf, qR_prim_rsz_vf, dqR_prim_dx_vf, & dqR_prim_dy_vf, & dqR_prim_dz_vf, & - qR_prim_vf, & norm_dir, ix, iy, iz) real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:), intent(inout) :: qL_prim_rsx_vf, qL_prim_rsy_vf, qL_prim_rsz_vf, qR_prim_rsx_vf, qR_prim_rsy_vf, qR_prim_rsz_vf @@ -3477,8 +3463,7 @@ contains allocatable, dimension(:), & intent(inout) :: dqL_prim_dx_vf, dqR_prim_dx_vf, & dqL_prim_dy_vf, dqR_prim_dy_vf, & - dqL_prim_dz_vf, dqR_prim_dz_vf, & - qL_prim_vf, qR_prim_vf + dqL_prim_dz_vf, dqR_prim_dz_vf integer, intent(in) :: norm_dir type(int_bounds_info), intent(in) :: ix, iy, iz @@ -3845,18 +3830,14 @@ contains !! @param iz Index bounds in the z-dir !! @param q_prim_vf Cell-averaged primitive variables subroutine s_initialize_riemann_solver( & - q_prim_vf, & - flux_vf, flux_src_vf, & - flux_gsrc_vf, & - norm_dir, ix, iy, iz) + flux_src_vf, & + norm_dir) - type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf type(scalar_field), & dimension(sys_size), & - intent(inout) :: flux_vf, flux_src_vf, flux_gsrc_vf + intent(inout) :: flux_src_vf integer, intent(in) :: norm_dir - type(int_bounds_info), intent(in) :: ix, iy, iz integer :: i, j, k, l ! Generic loop iterators @@ -4133,26 +4114,21 @@ contains !! @param[in] ix X-direction loop bounds (int_bounds_info). !! @param[in] iy Y-direction loop bounds (int_bounds_info). !! @param[in] iz Z-direction loop bounds (int_bounds_info). - pure subroutine s_compute_cartesian_viscous_source_flux(velL_vf, & - dvelL_dx_vf, & + pure subroutine s_compute_cartesian_viscous_source_flux(dvelL_dx_vf, & dvelL_dy_vf, & dvelL_dz_vf, & - velR_vf, & dvelR_dx_vf, & dvelR_dy_vf, & dvelR_dz_vf, & flux_src_vf, & - norm_dir, & - ix, iy, iz) + norm_dir) ! Arguments - type(scalar_field), dimension(num_dims), intent(in) :: velL_vf, velR_vf type(scalar_field), dimension(num_dims), intent(in) :: dvelL_dx_vf, dvelR_dx_vf type(scalar_field), dimension(num_dims), intent(in) :: dvelL_dy_vf, dvelR_dy_vf type(scalar_field), dimension(num_dims), intent(in) :: dvelL_dz_vf, dvelR_dz_vf type(scalar_field), dimension(sys_size), intent(inout) :: flux_src_vf integer, intent(in) :: norm_dir - type(int_bounds_info), intent(in) :: ix, iy, iz ! Local variables real(wp), dimension(num_dims, num_dims) :: vel_grad_avg !< Averaged velocity gradient tensor `d(vel_i)/d(coord_j)`. @@ -4326,19 +4302,15 @@ contains !! @param flux_src_vf Intercell source fluxes !! @param flux_gsrc_vf Intercell geometric source fluxes !! @param norm_dir Dimensional splitting coordinate direction - !! @param ix Index bounds in first coordinate direction - !! @param iy Index bounds in second coordinate direction - !! @param iz Index bounds in third coordinate direction pure subroutine s_finalize_riemann_solver(flux_vf, flux_src_vf, & flux_gsrc_vf, & - norm_dir, ix, iy, iz) + norm_dir) type(scalar_field), & dimension(sys_size), & intent(inout) :: flux_vf, flux_src_vf, flux_gsrc_vf integer, intent(in) :: norm_dir - type(int_bounds_info), intent(in) :: ix, iy, iz integer :: i, j, k, l !< Generic loop iterators diff --git a/src/simulation/m_sim_helpers.fpp b/src/simulation/m_sim_helpers.fpp index 4f51c59898..5dc7ee30a7 100644 --- a/src/simulation/m_sim_helpers.fpp +++ b/src/simulation/m_sim_helpers.fpp @@ -57,11 +57,11 @@ contains if (elasticity) then call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv, alpha, & - alpha_rho, Re, j, k, l, G, Gs) + alpha_rho, Re, G, Gs) elseif (bubbles_euler) then - call s_convert_species_to_mixture_variables_bubbles_acc(rho, gamma, pi_inf, qv, alpha, alpha_rho, Re, j, k, l) + call s_convert_species_to_mixture_variables_bubbles_acc(rho, gamma, pi_inf, qv, alpha, alpha_rho, Re) else - call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv, alpha, alpha_rho, Re, j, k, l) + call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv, alpha, alpha_rho, Re) end if $:LOOP() diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index 208f0524e6..bd4ea50666 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -1240,15 +1240,9 @@ contains end subroutine s_initialize_internal_energy_equations - impure subroutine s_perform_time_step(t_step, time_avg, time_final, io_time_avg, io_time_final, proc_time, io_proc_time, file_exists, start, finish, nt) + impure subroutine s_perform_time_step(t_step, time_avg) integer, intent(inout) :: t_step - real(wp), intent(inout) :: time_avg, time_final - real(wp), intent(inout) :: io_time_avg, io_time_final - real(wp), dimension(:), intent(inout) :: proc_time - real(wp), dimension(:), intent(inout) :: io_proc_time - logical, intent(inout) :: file_exists - real(wp), intent(inout) :: start, finish - integer, intent(inout) :: nt + real(wp), intent(inout) :: time_avg integer :: i @@ -1330,16 +1324,13 @@ contains end subroutine s_perform_time_step - impure subroutine s_save_performance_metrics(t_step, time_avg, time_final, io_time_avg, io_time_final, proc_time, io_proc_time, file_exists, start, finish, nt) + impure subroutine s_save_performance_metrics(time_avg, time_final, io_time_avg, io_time_final, proc_time, io_proc_time, file_exists) - integer, intent(inout) :: t_step real(wp), intent(inout) :: time_avg, time_final real(wp), intent(inout) :: io_time_avg, io_time_final real(wp), dimension(:), intent(inout) :: proc_time real(wp), dimension(:), intent(inout) :: io_proc_time logical, intent(inout) :: file_exists - real(wp), intent(inout) :: start, finish - integer, intent(inout) :: nt real(wp) :: grind_time diff --git a/src/simulation/m_surface_tension.fpp b/src/simulation/m_surface_tension.fpp index 97c987327d..6631933e2b 100644 --- a/src/simulation/m_surface_tension.fpp +++ b/src/simulation/m_surface_tension.fpp @@ -66,12 +66,11 @@ contains end if end subroutine s_initialize_surface_tension_module - pure subroutine s_compute_capilary_source_flux(q_prim_vf, & - vSrc_rsx_vf, vSrc_rsy_vf, vSrc_rsz_vf, & - flux_src_vf, & - id, isx, isy, isz) + pure subroutine s_compute_capilary_source_flux( & + vSrc_rsx_vf, vSrc_rsy_vf, vSrc_rsz_vf, & + flux_src_vf, & + id, isx, isy, isz) - type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf real(wp), dimension(-1:, 0:, 0:, 1:), intent(in) :: vSrc_rsx_vf real(wp), dimension(-1:, 0:, 0:, 1:), intent(in) :: vSrc_rsy_vf real(wp), dimension(-1:, 0:, 0:, 1:), intent(in) :: vSrc_rsz_vf diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 15fd95e52c..042b878c04 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -903,13 +903,13 @@ contains call nvtxStartRange("TIMESTEP") ! Stage 1 of 3 - call s_adaptive_dt_bubble(t_step, 1) + call s_adaptive_dt_bubble(1) ! Stage 2 of 3 call s_3rd_order_tvd_rk(t_step, time_avg) ! Stage 3 of 3 - call s_adaptive_dt_bubble(t_step, 3) + call s_adaptive_dt_bubble(3) call nvtxEndRange @@ -921,9 +921,9 @@ contains !> Bubble source part in Strang operator splitting scheme !! @param t_step Current time-step - impure subroutine s_adaptive_dt_bubble(t_step, stage) + impure subroutine s_adaptive_dt_bubble(stage) - integer, intent(in) :: t_step, stage + integer, intent(in) :: stage type(vector_field) :: gm_alpha_qp @@ -931,18 +931,17 @@ contains q_cons_ts(1)%vf, & q_T_sf, & q_prim_vf, & - idwint, & - gm_alpha_qp%vf) + idwint) if (bubbles_euler) then - call s_compute_bubble_EE_source(q_cons_ts(1)%vf, q_prim_vf, t_step, rhs_vf) + call s_compute_bubble_EE_source(q_cons_ts(1)%vf, q_prim_vf, rhs_vf) call s_comp_alpha_from_n(q_cons_ts(1)%vf) elseif (bubbles_lagrange) then call s_populate_variables_buffers(q_prim_vf, pb_ts(1)%sf, mv_ts(1)%sf, bc_type) - call s_compute_bubble_EL_dynamics(q_cons_ts(1)%vf, q_prim_vf, t_step, rhs_vf, stage) + call s_compute_bubble_EL_dynamics(q_prim_vf, stage) call s_transfer_data_to_tmp() call s_smear_voidfraction() if (stage == 3) then @@ -979,8 +978,7 @@ contains q_cons_ts(1)%vf, & q_T_sf, & q_prim_vf, & - idwint, & - gm_alpha_qp%vf) + idwint) $:PARALLEL_LOOP(collapse=3, private=["vel", "alpha", "Re"]) do l = 0, p diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp index 07815a9c46..2cc8797e27 100644 --- a/src/simulation/m_viscous.fpp +++ b/src/simulation/m_viscous.fpp @@ -1006,18 +1006,18 @@ contains if (p > 0) then call s_weno(v_vf(iv%beg:iv%end), & vL_x(:, :, :, iv%beg:iv%end), vL_y(:, :, :, iv%beg:iv%end), vL_z(:, :, :, iv%beg:iv%end), vR_x(:, :, :, iv%beg:iv%end), vR_y(:, :, :, iv%beg:iv%end), vR_z(:, :, :, iv%beg:iv%end), & - norm_dir, weno_dir, & + weno_dir, & is1_viscous, is2_viscous, is3_viscous) else call s_weno(v_vf(iv%beg:iv%end), & vL_x(:, :, :, iv%beg:iv%end), vL_y(:, :, :, iv%beg:iv%end), vL_z(:, :, :, :), vR_x(:, :, :, iv%beg:iv%end), vR_y(:, :, :, iv%beg:iv%end), vR_z(:, :, :, :), & - norm_dir, weno_dir, & + weno_dir, & is1_viscous, is2_viscous, is3_viscous) end if else call s_weno(v_vf(iv%beg:iv%end), & vL_x(:, :, :, iv%beg:iv%end), vL_y(:, :, :, :), vL_z(:, :, :, :), vR_x(:, :, :, iv%beg:iv%end), vR_y(:, :, :, :), vR_z(:, :, :, :), & - norm_dir, weno_dir, & + weno_dir, & is1_viscous, is2_viscous, is3_viscous) end if @@ -1104,19 +1104,19 @@ contains call s_weno(v_vf(iv%beg:iv%end), & vL_x(:, :, :, iv%beg:iv%end), vL_y(:, :, :, iv%beg:iv%end), vL_z(:, :, :, iv%beg:iv%end), vR_x(:, :, :, iv%beg:iv%end), vR_y(:, :, :, iv%beg:iv%end), vR_z(:, :, :, iv%beg:iv%end), & - norm_dir, weno_dir, & + weno_dir, & is1_viscous, is2_viscous, is3_viscous) else call s_weno(v_vf(iv%beg:iv%end), & vL_x(:, :, :, iv%beg:iv%end), vL_y(:, :, :, iv%beg:iv%end), vL_z(:, :, :, :), vR_x(:, :, :, iv%beg:iv%end), vR_y(:, :, :, iv%beg:iv%end), vR_z(:, :, :, :), & - norm_dir, weno_dir, & + weno_dir, & is1_viscous, is2_viscous, is3_viscous) end if else call s_weno(v_vf(iv%beg:iv%end), & vL_x(:, :, :, iv%beg:iv%end), vL_y(:, :, :, :), vL_z(:, :, :, :), vR_x(:, :, :, iv%beg:iv%end), vR_y(:, :, :, :), vR_z(:, :, :, :), & - norm_dir, weno_dir, & + weno_dir, & is1_viscous, is2_viscous, is3_viscous) end if diff --git a/src/simulation/m_weno.fpp b/src/simulation/m_weno.fpp index 1db77c8b45..17e0583687 100644 --- a/src/simulation/m_weno.fpp +++ b/src/simulation/m_weno.fpp @@ -639,13 +639,12 @@ contains end subroutine s_compute_weno_coefficients subroutine s_weno(v_vf, vL_rs_vf_x, vL_rs_vf_y, vL_rs_vf_z, vR_rs_vf_x, vR_rs_vf_y, vR_rs_vf_z, & - norm_dir, weno_dir, & + weno_dir, & is1_weno_d, is2_weno_d, is3_weno_d) type(scalar_field), dimension(1:), intent(in) :: v_vf real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:), intent(inout) :: vL_rs_vf_x, vL_rs_vf_y, vL_rs_vf_z real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:), intent(inout) :: vR_rs_vf_x, vR_rs_vf_y, vR_rs_vf_z - integer, intent(in) :: norm_dir integer, intent(in) :: weno_dir type(int_bounds_info), intent(in) :: is1_weno_d, is2_weno_d, is3_weno_d @@ -668,7 +667,7 @@ contains if (weno_order /= 1) then call s_initialize_weno(v_vf, & - norm_dir, weno_dir) + weno_dir) end if if (weno_order == 1) then @@ -1120,11 +1119,10 @@ contains !! @param is2_weno Index bounds in second coordinate direction !! @param is3_weno Index bounds in third coordinate direction subroutine s_initialize_weno(v_vf, & - norm_dir, weno_dir) + weno_dir) type(scalar_field), dimension(:), intent(IN) :: v_vf - integer, intent(IN) :: norm_dir integer, intent(IN) :: weno_dir integer :: j, k, l, q diff --git a/src/simulation/p_main.fpp b/src/simulation/p_main.fpp index 57dcbfbdf6..29cb3b8281 100644 --- a/src/simulation/p_main.fpp +++ b/src/simulation/p_main.fpp @@ -76,20 +76,19 @@ program p_main if (cfl_dt) then if (mytime >= t_stop) then - call s_save_performance_metrics(t_step, time_avg, time_final, io_time_avg, & - io_time_final, proc_time, io_proc_time, file_exists, start, finish, nt) + call s_save_performance_metrics(time_avg, time_final, io_time_avg, & + io_time_final, proc_time, io_proc_time, file_exists) exit end if else if (t_step == t_step_stop) then - call s_save_performance_metrics(t_step, time_avg, time_final, io_time_avg, & - io_time_final, proc_time, io_proc_time, file_exists, start, finish, nt) + call s_save_performance_metrics(time_avg, time_final, io_time_avg, & + io_time_final, proc_time, io_proc_time, file_exists) exit end if end if - call s_perform_time_step(t_step, time_avg, time_final, io_time_avg, io_time_final, & - proc_time, io_proc_time, file_exists, start, finish, nt) + call s_perform_time_step(t_step, time_avg) if (cfl_dt) then if (abs(mod(mytime, t_save)) < dt .or. mytime >= t_stop) then From 5b8d97905a2fb8900f297341ea1ea67968755571 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Thu, 12 Jun 2025 16:14:18 -0400 Subject: [PATCH 30/75] move pressure relaxation to its own module + refac (#865) --- misc/length-subroutines.sh | 29 +++ src/simulation/m_pressure_relaxation.fpp | 315 +++++++++++++++++++++++ src/simulation/m_rhs.fpp | 276 +------------------- src/simulation/m_time_steppers.fpp | 2 + 4 files changed, 351 insertions(+), 271 deletions(-) create mode 100755 misc/length-subroutines.sh create mode 100644 src/simulation/m_pressure_relaxation.fpp diff --git a/misc/length-subroutines.sh b/misc/length-subroutines.sh new file mode 100755 index 0000000000..b955e2d73a --- /dev/null +++ b/misc/length-subroutines.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +# Use gawk if available, otherwise fall back to awk +if command -v gawk > /dev/null; then + AWK_CMD="gawk" + IGNORECASE_BLOCK='BEGIN { IGNORECASE = 1 }' +else + AWK_CMD="awk" + IGNORECASE_BLOCK='' + echo "Warning: gawk not found. Case-insensitive matching may not work as expected." >&2 +fi + +find . -type f \( -name "*.f90" -o -name "*.fpp" \) | while read file; do + "$AWK_CMD" " + $IGNORECASE_BLOCK + /^[ \t]*((pure|elemental|impure)[ \t]+)*subroutine[ \t]+[a-zA-Z_][a-zA-Z0-9_]*[ \t]*\\(/ { + in_sub = 1 + match(\$0, /subroutine[ \t]+([a-zA-Z_][a-zA-Z0-9_]*)/, arr) + sub_name = arr[1] + start_line = NR + next + } + /^[ \t]*end[ \t]+subroutine[ \t]*([a-zA-Z_][a-zA-Z0-9_]*)?[ \t]*\$/ && in_sub { + end_line = NR + print (end_line - start_line + 1) \"\t\" FILENAME \": \" sub_name + in_sub = 0 + } + " "$file" +done | sort -nr | awk -F'\t' '{print $2 " : " $1 " lines"}' | head -20 diff --git a/src/simulation/m_pressure_relaxation.fpp b/src/simulation/m_pressure_relaxation.fpp new file mode 100644 index 0000000000..8aa78ad88a --- /dev/null +++ b/src/simulation/m_pressure_relaxation.fpp @@ -0,0 +1,315 @@ +!> +!! @file m_pressure_relaxation.fpp +!! @brief Contains module m_pressure_relaxation + +#:include 'case.fpp' +#:include 'macros.fpp' + +!> @brief The module contains the subroutines used to perform pressure relaxation +!! for multi-component flows using the 6-equation model. This includes +!! volume fraction correction, Newton-Raphson pressure equilibration, and +!! internal energy correction to maintain thermodynamic consistency. +module m_pressure_relaxation + + use m_derived_types !< Definitions of the derived types + use m_global_parameters !< Definitions of the global parameters + + implicit none + + private; public :: s_pressure_relaxation_procedure, & + s_initialize_pressure_relaxation_module, & + s_finalize_pressure_relaxation_module + + real(wp), allocatable, dimension(:) :: gamma_min, pres_inf + !$acc declare create(gamma_min, pres_inf) + + real(wp), allocatable, dimension(:, :) :: Res + !$acc declare create(Res) + +contains + + !> Initialize the pressure relaxation module + impure subroutine s_initialize_pressure_relaxation_module + + integer :: i, j + + @:ALLOCATE(gamma_min(1:num_fluids), pres_inf(1:num_fluids)) + + do i = 1, num_fluids + gamma_min(i) = 1._wp/fluid_pp(i)%gamma + 1._wp + pres_inf(i) = fluid_pp(i)%pi_inf/(1._wp + fluid_pp(i)%gamma) + end do + !$acc update device(gamma_min, pres_inf) + + if (viscous) then + @:ALLOCATE(Res(1:2, 1:maxval(Re_size))) + do i = 1, 2 + do j = 1, Re_size(i) + Res(i, j) = fluid_pp(Re_idx(i, j))%Re(i) + end do + end do + !$acc update device(Res, Re_idx, Re_size) + end if + + end subroutine s_initialize_pressure_relaxation_module + + !> Finalize the pressure relaxation module + impure subroutine s_finalize_pressure_relaxation_module + + @:DEALLOCATE(gamma_min, pres_inf) + if (viscous) then + @:DEALLOCATE(Res) + end if + + end subroutine s_finalize_pressure_relaxation_module + + !> The main pressure relaxation procedure + !! @param q_cons_vf Cell-average conservative variables + pure subroutine s_pressure_relaxation_procedure(q_cons_vf) + + type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf + integer :: j, k, l + + !$acc parallel loop collapse(3) gang vector default(present) + do l = 0, p + do k = 0, n + do j = 0, m + call s_relax_cell_pressure(q_cons_vf, j, k, l) + end do + end do + end do + + end subroutine s_pressure_relaxation_procedure + + !> Process pressure relaxation for a single cell + pure subroutine s_relax_cell_pressure(q_cons_vf, j, k, l) + !$acc routine seq + + type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf + integer, intent(in) :: j, k, l + + ! Volume fraction correction + if (mpp_lim) call s_correct_volume_fractions(q_cons_vf, j, k, l) + + ! Pressure equilibration + if (s_needs_pressure_relaxation(q_cons_vf, j, k, l)) then + call s_equilibrate_pressure(q_cons_vf, j, k, l) + end if + + ! Internal energy correction + call s_correct_internal_energies(q_cons_vf, j, k, l) + + end subroutine s_relax_cell_pressure + + !> Check if pressure relaxation is needed for this cell + pure logical function s_needs_pressure_relaxation(q_cons_vf, j, k, l) + !$acc routine seq + + type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf + integer, intent(in) :: j, k, l + integer :: i + + s_needs_pressure_relaxation = .true. + !$acc loop seq + do i = 1, num_fluids + if (q_cons_vf(i + advxb - 1)%sf(j, k, l) > (1._wp - sgm_eps)) then + s_needs_pressure_relaxation = .false. + end if + end do + + end function s_needs_pressure_relaxation + + !> Correct volume fractions to physical bounds + pure subroutine s_correct_volume_fractions(q_cons_vf, j, k, l) + !$acc routine seq + + type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf + integer, intent(in) :: j, k, l + real(wp) :: sum_alpha + integer :: i + + sum_alpha = 0._wp + !$acc loop seq + do i = 1, num_fluids + if ((q_cons_vf(i + contxb - 1)%sf(j, k, l) < 0._wp) .or. & + (q_cons_vf(i + advxb - 1)%sf(j, k, l) < 0._wp)) then + q_cons_vf(i + contxb - 1)%sf(j, k, l) = 0._wp + q_cons_vf(i + advxb - 1)%sf(j, k, l) = 0._wp + q_cons_vf(i + intxb - 1)%sf(j, k, l) = 0._wp + end if + if (q_cons_vf(i + advxb - 1)%sf(j, k, l) > 1._wp) & + q_cons_vf(i + advxb - 1)%sf(j, k, l) = 1._wp + sum_alpha = sum_alpha + q_cons_vf(i + advxb - 1)%sf(j, k, l) + end do + + !$acc loop seq + do i = 1, num_fluids + q_cons_vf(i + advxb - 1)%sf(j, k, l) = q_cons_vf(i + advxb - 1)%sf(j, k, l)/sum_alpha + end do + + end subroutine s_correct_volume_fractions + + !> Main pressure equilibration using Newton-Raphson + pure subroutine s_equilibrate_pressure(q_cons_vf, j, k, l) + !$acc routine seq + + type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf + integer, intent(in) :: j, k, l + + real(wp) :: pres_relax, f_pres, df_pres + real(wp), dimension(num_fluids) :: pres_K_init, rho_K_s + integer, parameter :: MAX_ITER = 50 + real(wp), parameter :: TOLERANCE = 1e-10_wp + integer :: iter, i + + ! Initialize pressures + pres_relax = 0._wp + !$acc loop seq + do i = 1, num_fluids + if (q_cons_vf(i + advxb - 1)%sf(j, k, l) > sgm_eps) then + pres_K_init(i) = (q_cons_vf(i + intxb - 1)%sf(j, k, l)/ & + q_cons_vf(i + advxb - 1)%sf(j, k, l) - pi_infs(i))/gammas(i) + if (pres_K_init(i) <= -(1._wp - 1e-8_wp)*pres_inf(i) + 1e-8_wp) & + pres_K_init(i) = -(1._wp - 1e-8_wp)*pres_inf(i) + 1e-8_wp + else + pres_K_init(i) = 0._wp + end if + pres_relax = pres_relax + q_cons_vf(i + advxb - 1)%sf(j, k, l)*pres_K_init(i) + end do + + ! Newton-Raphson iteration + f_pres = 1e-9_wp + df_pres = 1e9_wp + !$acc loop seq + do iter = 0, MAX_ITER - 1 + if (abs(f_pres) > TOLERANCE) then + pres_relax = pres_relax - f_pres/df_pres + + ! Enforce pressure bounds + do i = 1, num_fluids + if (pres_relax <= -(1._wp - 1e-8_wp)*pres_inf(i) + 1e-8_wp) & + pres_relax = -(1._wp - 1e-8_wp)*pres_inf(i) + 1._wp + end do + + ! Newton-Raphson step + f_pres = -1._wp + df_pres = 0._wp + !$acc loop seq + do i = 1, num_fluids + if (q_cons_vf(i + advxb - 1)%sf(j, k, l) > sgm_eps) then + rho_K_s(i) = q_cons_vf(i + contxb - 1)%sf(j, k, l)/ & + max(q_cons_vf(i + advxb - 1)%sf(j, k, l), sgm_eps) & + *((pres_relax + pres_inf(i))/(pres_K_init(i) + & + pres_inf(i)))**(1._wp/gamma_min(i)) + f_pres = f_pres + q_cons_vf(i + contxb - 1)%sf(j, k, l)/rho_K_s(i) + df_pres = df_pres - q_cons_vf(i + contxb - 1)%sf(j, k, l) & + /(gamma_min(i)*rho_K_s(i)*(pres_relax + pres_inf(i))) + end if + end do + end if + end do + + ! Update volume fractions + !$acc loop seq + do i = 1, num_fluids + if (q_cons_vf(i + advxb - 1)%sf(j, k, l) > sgm_eps) & + q_cons_vf(i + advxb - 1)%sf(j, k, l) = q_cons_vf(i + contxb - 1)%sf(j, k, l)/rho_K_s(i) + end do + + end subroutine s_equilibrate_pressure + + !> Correct internal energies using equilibrated pressure + pure subroutine s_correct_internal_energies(q_cons_vf, j, k, l) + !$acc routine seq + + type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf + integer, intent(in) :: j, k, l + + real(wp), dimension(num_fluids) :: alpha_rho, alpha + real(wp) :: rho, dyn_pres, gamma, pi_inf, pres_relax, sum_alpha + real(wp), dimension(2) :: Re + integer :: i, q + + !$acc loop seq + do i = 1, num_fluids + alpha_rho(i) = q_cons_vf(i)%sf(j, k, l) + alpha(i) = q_cons_vf(E_idx + i)%sf(j, k, l) + end do + + ! Compute mixture properties (combined bubble and standard logic) + rho = 0._wp + gamma = 0._wp + pi_inf = 0._wp + + if (bubbles_euler) then + if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then + !$acc loop seq + do i = 1, num_fluids + rho = rho + alpha_rho(i) + gamma = gamma + alpha(i)*gammas(i) + pi_inf = pi_inf + alpha(i)*pi_infs(i) + end do + else if ((model_eqns == 2) .and. (num_fluids > 2)) then + !$acc loop seq + do i = 1, num_fluids - 1 + rho = rho + alpha_rho(i) + gamma = gamma + alpha(i)*gammas(i) + pi_inf = pi_inf + alpha(i)*pi_infs(i) + end do + else + rho = alpha_rho(1) + gamma = gammas(1) + pi_inf = pi_infs(1) + end if + else + sum_alpha = 0._wp + if (mpp_lim) then + !$acc loop seq + do i = 1, num_fluids + alpha_rho(i) = max(0._wp, alpha_rho(i)) + alpha(i) = min(max(0._wp, alpha(i)), 1._wp) + sum_alpha = sum_alpha + alpha(i) + end do + alpha = alpha/max(sum_alpha, sgm_eps) + end if + + !$acc loop seq + do i = 1, num_fluids + rho = rho + alpha_rho(i) + gamma = gamma + alpha(i)*gammas(i) + pi_inf = pi_inf + alpha(i)*pi_infs(i) + end do + + if (viscous) then + !$acc loop seq + do i = 1, 2 + Re(i) = dflt_real + if (Re_size(i) > 0) Re(i) = 0._wp + !$acc loop seq + do q = 1, Re_size(i) + Re(i) = alpha(Re_idx(i, q))/Res(i, q) + Re(i) + end do + Re(i) = 1._wp/max(Re(i), sgm_eps) + end do + end if + end if + + ! Compute dynamic pressure and update internal energies + dyn_pres = 0._wp + !$acc loop seq + do i = momxb, momxe + dyn_pres = dyn_pres + 5e-1_wp*q_cons_vf(i)%sf(j, k, l)* & + q_cons_vf(i)%sf(j, k, l)/max(rho, sgm_eps) + end do + + pres_relax = (q_cons_vf(E_idx)%sf(j, k, l) - dyn_pres - pi_inf)/gamma + + !$acc loop seq + do i = 1, num_fluids + q_cons_vf(i + intxb - 1)%sf(j, k, l) = & + q_cons_vf(i + advxb - 1)%sf(j, k, l)*(gammas(i)*pres_relax + pi_infs(i)) + end do + + end subroutine s_correct_internal_energies + +end module m_pressure_relaxation diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index 846f04370a..e3023de18c 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -64,11 +64,12 @@ module m_rhs use m_mhd + use m_pressure_relaxation + implicit none private; public :: s_initialize_rhs_module, & s_compute_rhs, & - s_pressure_relaxation_procedure, & s_finalize_rhs_module !! This variable contains the WENO-reconstructed values of the cell-average @@ -159,12 +160,6 @@ module m_rhs $:DECLARE(create=["qL_rsx_vf","qL_rsy_vf","qL_rsz_vf","qR_rsx_vf","qR_rsy_vf","qR_rsz_vf"]) $:DECLARE(create=["dqL_rsx_vf","dqL_rsy_vf","dqL_rsz_vf","dqR_rsx_vf","dqR_rsy_vf","dqR_rsz_vf"]) - real(wp), allocatable, dimension(:) :: gamma_min, pres_inf - $:DECLARE(create=["gamma_min","pres_inf"]) - - real(wp), allocatable, dimension(:, :) :: Res - $:DECLARE(create=["Res"]) - real(wp), allocatable, dimension(:, :, :) :: nbub !< Bubble number density $:DECLARE(create=["nbub"]) @@ -572,26 +567,7 @@ contains @:ALLOCATE(blkmod1(0:m, 0:n, 0:p), blkmod2(0:m, 0:n, 0:p), alpha1(0:m, 0:n, 0:p), alpha2(0:m, 0:n, 0:p), Kterm(0:m, 0:n, 0:p)) end if - @:ALLOCATE(gamma_min(1:num_fluids), pres_inf(1:num_fluids)) - - do i = 1, num_fluids - gamma_min(i) = 1._wp/fluid_pp(i)%gamma + 1._wp - pres_inf(i) = fluid_pp(i)%pi_inf/(1._wp + fluid_pp(i)%gamma) - end do - $:UPDATE(device=["gamma_min", "pres_inf"]) - - if (viscous) then - @:ALLOCATE(Res(1:2, 1:maxval(Re_size))) - end if - - if (viscous) then - do i = 1, 2 - do j = 1, Re_size(i) - Res(i, j) = fluid_pp(Re_idx(i, j))%Re(i) - end do - end do - $:UPDATE(device=["Res","Re_idx","Re_size"]) - end if + call s_initialize_pressure_relaxation_module $:PARALLEL_LOOP(collapse=4) do id = 1, num_dims @@ -1661,250 +1637,6 @@ contains !! fraction of each phase are recomputed. For conservation !! purpose, this pressure is finally corrected using the !! mixture-total-energy equation. - !! @param q_cons_vf Cell-average conservative variables - pure subroutine s_pressure_relaxation_procedure(q_cons_vf) - - type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf - - !> @name Relaxed pressure, initial partial pressures, function f(p) and its partial - !! derivative df(p), isentropic partial density, sum of volume fractions, - !! mixture density, dynamic pressure, surface energy, specific heat ratio - !! function, liquid stiffness function (two variations of the last two - !! ones), shear and volume Reynolds numbers and the Weber numbers - !> @{ - real(wp) :: pres_relax - real(wp), dimension(num_fluids) :: pres_K_init - real(wp) :: f_pres - real(wp) :: df_pres - real(wp), dimension(num_fluids) :: rho_K_s - real(wp), dimension(num_fluids) :: alpha_rho - real(wp), dimension(num_fluids) :: alpha - real(wp) :: sum_alpha - real(wp) :: rho - real(wp) :: dyn_pres - real(wp) :: gamma - real(wp) :: pi_inf - real(wp), dimension(2) :: Re - - integer :: i, j, k, l, q, iter !< Generic loop iterators - integer :: relax !< Relaxation procedure determination variable - - $:PARALLEL_LOOP(collapse=3, private=["pres_K_init", "rho_K_s", & - "alpha_rho", "alpha", "Re", "pres_relax"]) - do l = 0, p - do k = 0, n - do j = 0, m - - ! Numerical correction of the volume fractions - if (mpp_lim) then - sum_alpha = 0._wp - - $:LOOP() - do i = 1, num_fluids - if ((q_cons_vf(i + contxb - 1)%sf(j, k, l) < 0._wp) .or. & - (q_cons_vf(i + advxb - 1)%sf(j, k, l) < 0._wp)) then - q_cons_vf(i + contxb - 1)%sf(j, k, l) = 0._wp - q_cons_vf(i + advxb - 1)%sf(j, k, l) = 0._wp - q_cons_vf(i + intxb - 1)%sf(j, k, l) = 0._wp - end if - - if (q_cons_vf(i + advxb - 1)%sf(j, k, l) > 1._wp) & - q_cons_vf(i + advxb - 1)%sf(j, k, l) = 1._wp - sum_alpha = sum_alpha + q_cons_vf(i + advxb - 1)%sf(j, k, l) - end do - - $:LOOP() - do i = 1, num_fluids - q_cons_vf(i + advxb - 1)%sf(j, k, l) = q_cons_vf(i + advxb - 1)%sf(j, k, l)/sum_alpha - end do - end if - - ! Pressures relaxation procedure - - ! Is the pressure relaxation procedure necessary? - relax = 1 - - $:LOOP() - do i = 1, num_fluids - if (q_cons_vf(i + advxb - 1)%sf(j, k, l) > (1._wp - sgm_eps)) relax = 0 - end do - - if (relax == 1) then - ! Initial state - pres_relax = 0._wp - - $:LOOP() - do i = 1, num_fluids - if (q_cons_vf(i + advxb - 1)%sf(j, k, l) > sgm_eps) then - pres_K_init(i) = & - (q_cons_vf(i + intxb - 1)%sf(j, k, l)/ & - q_cons_vf(i + advxb - 1)%sf(j, k, l) & - - pi_infs(i))/gammas(i) - - if (pres_K_init(i) <= -(1._wp - 1e-8_wp)*pres_inf(i) + 1e-8_wp) & - pres_K_init(i) = -(1._wp - 1e-8_wp)*pres_inf(i) + 1e-8_wp - else - pres_K_init(i) = 0._wp - end if - pres_relax = pres_relax + q_cons_vf(i + advxb - 1)%sf(j, k, l)*pres_K_init(i) - end do - - ! Iterative process for relaxed pressure determination - f_pres = 1e-9_wp - df_pres = 1e9_wp - - $:LOOP() - do i = 1, num_fluids - rho_K_s(i) = 0._wp - end do - - $:LOOP() - do iter = 0, 49 - - if (abs(f_pres) > 1e-10_wp) then - pres_relax = pres_relax - f_pres/df_pres - - ! Physical pressure - do i = 1, num_fluids - if (pres_relax <= -(1._wp - 1e-8_wp)*pres_inf(i) + 1e-8_wp) & - pres_relax = -(1._wp - 1e-8_wp)*pres_inf(i) + 1._wp - end do - - ! Newton-Raphson method - f_pres = -1._wp - df_pres = 0._wp - - $:LOOP() - do i = 1, num_fluids - if (q_cons_vf(i + advxb - 1)%sf(j, k, l) > sgm_eps) then - rho_K_s(i) = q_cons_vf(i + contxb - 1)%sf(j, k, l)/ & - max(q_cons_vf(i + advxb - 1)%sf(j, k, l), sgm_eps) & - *((pres_relax + pres_inf(i))/(pres_K_init(i) + & - pres_inf(i)))**(1._wp/gamma_min(i)) - - f_pres = f_pres + q_cons_vf(i + contxb - 1)%sf(j, k, l) & - /rho_K_s(i) - - df_pres = df_pres - q_cons_vf(i + contxb - 1)%sf(j, k, l) & - /(gamma_min(i)*rho_K_s(i)*(pres_relax + pres_inf(i))) - end if - end do - end if - - end do - - ! Cell update of the volume fraction - $:LOOP() - do i = 1, num_fluids - if (q_cons_vf(i + advxb - 1)%sf(j, k, l) > sgm_eps) & - q_cons_vf(i + advxb - 1)%sf(j, k, l) = q_cons_vf(i + contxb - 1)%sf(j, k, l) & - /rho_K_s(i) - end do - end if - - ! Mixture-total-energy correction - - ! The mixture-total-energy correction of the mixture pressure P is not necessary here - ! because the primitive variables are directly recovered later on by the conservative - ! variables (see s_convert_conservative_to_primitive_variables called in s_compute_rhs). - ! However, the internal-energy equations should be reset with the corresponding mixture - ! pressure from the correction. This step is carried out below. - - $:LOOP() - do i = 1, num_fluids - alpha_rho(i) = q_cons_vf(i)%sf(j, k, l) - alpha(i) = q_cons_vf(E_idx + i)%sf(j, k, l) - end do - - if (bubbles_euler) then - rho = 0._wp - gamma = 0._wp - pi_inf = 0._wp - - if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then - $:LOOP() - do i = 1, num_fluids - rho = rho + alpha_rho(i) - gamma = gamma + alpha(i)*gammas(i) - pi_inf = pi_inf + alpha(i)*pi_infs(i) - end do - else if ((model_eqns == 2) .and. (num_fluids > 2)) then - $:LOOP() - do i = 1, num_fluids - 1 - rho = rho + alpha_rho(i) - gamma = gamma + alpha(i)*gammas(i) - pi_inf = pi_inf + alpha(i)*pi_infs(i) - end do - else - rho = alpha_rho(1) - gamma = gammas(1) - pi_inf = pi_infs(1) - end if - else - rho = 0._wp - gamma = 0._wp - pi_inf = 0._wp - - sum_alpha = 0._wp - - if (mpp_lim) then - $:LOOP() - do i = 1, num_fluids - alpha_rho(i) = max(0._wp, alpha_rho(i)) - alpha(i) = min(max(0._wp, alpha(i)), 1._wp) - sum_alpha = sum_alpha + alpha(i) - end do - - alpha = alpha/max(sum_alpha, sgm_eps) - - end if - - $:LOOP() - do i = 1, num_fluids - rho = rho + alpha_rho(i) - gamma = gamma + alpha(i)*gammas(i) - pi_inf = pi_inf + alpha(i)*pi_infs(i) - end do - - if (viscous) then - $:LOOP() - do i = 1, 2 - Re(i) = dflt_real - - if (Re_size(i) > 0) Re(i) = 0._wp - $:LOOP() - do q = 1, Re_size(i) - Re(i) = alpha(Re_idx(i, q))/Res(i, q) & - + Re(i) - end do - - Re(i) = 1._wp/max(Re(i), sgm_eps) - - end do - end if - end if - - dyn_pres = 0._wp - - $:LOOP() - do i = momxb, momxe - dyn_pres = dyn_pres + 5e-1_wp*q_cons_vf(i)%sf(j, k, l)* & - q_cons_vf(i)%sf(j, k, l)/max(rho, sgm_eps) - end do - - pres_relax = (q_cons_vf(E_idx)%sf(j, k, l) - dyn_pres - pi_inf)/gamma - - $:LOOP() - do i = 1, num_fluids - q_cons_vf(i + intxb - 1)%sf(j, k, l) = & - q_cons_vf(i + advxb - 1)%sf(j, k, l)* & - (gammas(i)*pres_relax + pi_infs(i)) - end do - end do - end do - end do - - end subroutine s_pressure_relaxation_procedure !> The purpose of this subroutine is to WENO-reconstruct the !! left and the right cell-boundary values, including values @@ -2048,6 +1780,8 @@ contains integer :: i, j, l + call s_finalize_pressure_relaxation_module + do j = cont_idx%beg, cont_idx%end if (relativity) then ! Cons and Prim densities are different for relativity diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 042b878c04..b9d2594a05 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -19,6 +19,8 @@ module m_time_steppers use m_rhs !< Right-hane-side (RHS) evaluation procedures + use m_pressure_relaxation !< Pressure relaxation procedures + use m_data_output !< Run-time info & solution data output procedures use m_bubbles_EE !< Ensemble-averaged bubble dynamics routines From 1ed95c0f0984de61af42a66a6231277fd74971c4 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Sun, 8 Jun 2025 01:45:01 -0400 Subject: [PATCH 31/75] Create .cursorrules (#866) --- .cursorrules | 125 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 .cursorrules diff --git a/.cursorrules b/.cursorrules new file mode 100644 index 0000000000..a0451d35c7 --- /dev/null +++ b/.cursorrules @@ -0,0 +1,125 @@ +###################################################################### +# .cursorrules – project-level instructions for Cursor AI # +# Target repo: github.com/mflowcode/mfc # +# Format: plain-text "rules for the Agent". One idea per line. # +###################################################################### + +### ─────────────────────────────────────────────────────────────────── +### 1. Global project context (ALWAYS applied) +### ─────────────────────────────────────────────────────────────────── + +- You are interacting with **MFC** – an exascale, many-physics, multi-component + flow solver written in modern Fortran and generated with the Fypp + pre-processor. Most source files use the `.fpp` extension (Fypp templates) + and are transpiled to `.f90` during the CMake build. A minority of modules + remain as plain `.f90`. The code lives in `src/`, tests in `tests/`, + and example input cases in `examples/`. + +- Fypp helper macros live in files under any `include/` directory within + `src/` (e.g., `src//include/`). **Scan these macro files first** so + that template expansions resolve correctly. + +- Always assume free-form Fortran 2008+ (`.fpp` and `.f90`) with `implicit none`, + explicit `intent(in|out|inout)`, and modern constructs (`allocate`, + `move_alloc`, `iso_fortran_env`, etc.). + +- Prefer `module` + `contains` → `subroutine foo()` patterns; avoid COMMON + blocks and `include` files. + +- **Simulation kernels are accelerated exclusively with OpenACC.** Always + include the appropriate `!$acc` pragmas when generating or modifying + performance-critical loops. + +- **The codebase is written in Fortran 2008+ with the Fypp pre-processor.** + Always use the appropriate Fypp macros when generating or modifying + performance-critical loops. + +- **Before suggesting or applying any source-code changes, first read/parse the + full codebase *and* the documentation sources listed below, then produce a + concise description of how the codebase is structured and functions.** + +- Documentation sources: + - Primary online docs: + - Root-level `README.md` in the repository. + +### ─────────────────────────────────────────────────────────────────── +### 2. Style & naming conventions (AUTO-ATTACH for *.fpp and *.f90 files) +### ─────────────────────────────────────────────────────────────────── + +- Indent 2 spaces; continuation lines line up under “&”. +- No maximum line length is enforced. +- Prefer shorter, streamlined code—avoid needless verbosity while keeping clarity. +- Lower-case keywords (`do`, `end subroutine`, etc.). +- Module names: `m_` (e.g., `m_transport`). +- Public procedures: `_` (e.g., `compute_flux`) with prefixe `s_` or `f_` for subroutines and functions, respectively. +- Avoid private helper functions within a subroutine or function, and instead make them private to the module (if appropriate) +- Helper functions or subroutines start with with f_[name here] or s_[name here], respectively +- Avoid passing arguments to subroutines and functions that need to be reshaped in the subroutine or function, such that the compiler can optimize the code. +- Avoid common Fortran code smells, such as: + - Use of `goto` statements (except in legacy interop situations). + - Omission of `intent(...)` declarations. + - Use of global state via `COMMON` blocks or `save` variables. + - Shadowing of variable names (especially loop indices). + - Non-descriptive or single-letter variable names outside small scopes. + - Excessively long helper procedures (>200 lines); prefer decomposition. + - Excessively long subroutines (>500 lines); prefer decomposition. + - Excessively long functions (>100 lines); prefer decomposition. + - Excessively long modules (>1000 lines); prefer decomposition. + - Excessively long files (>1000 lines); prefer decomposition. + - Subroutines and functions that are too long to be understood at a glance. + - Use of `!$acc routine seq` to mark subroutines and functions that are called from OpenACC parallel loops. + - Mixing I/O logic with computation (separate concerns). + - Missing implicit none statements. + - Missing `intent(in|out|inout)` declarations. + - Missing `dimension` or `allocatable`/`pointer` attributes. + - Missing `private` declarations. + - Missing `public` declarations. + - Missing `module procedure` declarations. + - Missing `use` statements. + - Declaring arrays without `dimension` or `allocatable`/`pointer` attributes. + - Use of `stop` for error handling (prefer use of the subroutine `s_mpi_abort` with an appropriate string message argument). + +### ─────────────────────────────────────────────────────────────────── +### 3. Test workflow (AUTOMATICALLY applied when the code is significantly changed and should be tested) +### ─────────────────────────────────────────────────────────────────── + +- Run the test suite after any meaningful code change to verify correctness. +- Ask the user to alter the test suite command so that only the relevant tests are run. +- When you think you are done with the code changes, run ./mfc.sh test -j $(nproc) -f EA8FA07E -t 9E2CA336 +- Otherwise, only run specific tests that are relevant to the code changes. +- Do not run ./mfc.sh test -j $(nproc) without any other arguments (it takes too long to run all tests). + +### ─────────────────────────────────────────────────────────────────── +### 4. OpenACC programming guidelines (AUTOMATICALLY applied to *.fpp/*.f90) +### ─────────────────────────────────────────────────────────────────── + +- **Prefer:** + + !$acc parallel loop gang vector default(present) + + around tight loops over cells or particles; fall back to `!$acc kernels` + only when loop dependencies prevent direct parallelization. Always add + `reduction` clauses where needed. + +- Use `collapse(n)` in `!$acc parallel loop` pragmas to combine nested loops + when it improves parallelism and the iterations are independent. + +- Use `private(var1, var2, ...)` clauses for all variables that are local + to a parallel region or loop, especially scalar temporaries and loop counters. + +- Use `!$acc routine seq` to mark subroutines and functions that are called from OpenACC parallel loops. + +- When adding `!$acc routine seq` put it on the first line after the declaration of the subroutine or function. + +- Allocate large arrays with the `managed` attribute or move them to the device + at program start using a persistent `!$acc enter data` region. + +- Do **not** place `stop` or `error stop` statements inside OpenACC parallel regions or loops, as they are unsupported in device code and will cause runtime failures. + +- Ensure the code compiles with Cray Fortran (`ftn`) and NVIDIA HPC SDK + (`nvfortran`) for OpenACC GPU offloading, and with GNU (`gfortran`) and Intel + (`ifx`/`ifort`) for CPU-only builds where OpenACC directives are ignored. + +###################################################################### +# End of file +###################################################################### From 43b498a3e2bb5162fd334e7aae723265e5721a7e Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Sun, 8 Jun 2025 09:03:22 -0400 Subject: [PATCH 32/75] fix cursorrules (#867) --- .cursor/rules/mfc-agent-rules.mdc | 87 +++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 .cursor/rules/mfc-agent-rules.mdc diff --git a/.cursor/rules/mfc-agent-rules.mdc b/.cursor/rules/mfc-agent-rules.mdc new file mode 100644 index 0000000000..a323c4cdd5 --- /dev/null +++ b/.cursor/rules/mfc-agent-rules.mdc @@ -0,0 +1,87 @@ +--- +description: +globs: +alwaysApply: true +--- +--- +description: Full MFC project rules – consolidated for Agent Mode +alwaysApply: true +--- + +# 0 Purpose & Scope +Consolidated guidance for the MFC exascale, many-physics solver. +Written primarily for Fortran/Fypp; the OpenACC and style sections matter only when +`.fpp` / `.f90` files are in view. + +--- + +# 1 Global Project Context (always) +- **Project**: *MFC* is modern Fortran 2008+ generated with **Fypp**. + - Sources `src/`, tests `tests/`, examples `examples/`. + - Most sources are `.fpp`; CMake transpiles them to `.f90`. +- **Fypp macros** live in `src//include/` you should scan these first. + `` ∈ {`simulation`,`common`,`pre_process`,`post_process`}. +- Only `simulation` (+ its `common` calls) is GPU-accelerated via **OpenACC**. +- Assume free-form Fortran 2008+, `implicit none`, explicit `intent`, and modern + intrinsics. +- Prefer `module … contains … subroutine foo()`; avoid `COMMON` blocks and + file-level `include` files. +- **Read the full codebase and docs *before* changing code.** + Docs: and the respository root `README.md`. + +### Incremental-change workflow +1. Draft a step-by-step plan. +2. After each step, build: + ```bash + ./mfc.sh build -t pre_process simulation -j $(nproc) + ``` +3. If it compiles, run focused tests: + ```bash + ./mfc.sh test -j $(nproc) -f EA8FA07E -t 9E2CA336 + ``` +4. Roll back & fix if a step fails. + +* Do not run ./mfc.sh test -j $(nproc) without any other arguments (it takes too long to run all tests). + +--- + +# 2 Style & Naming Conventions (for \*.fpp / \*.f90) + +* **Indent 2 spaces**; continuation lines align under `&`. +* Lower-case keywords and intrinsics (`do`, `end subroutine`, …). +* **Modules**: `m_` (e.g. `m_transport`). +* **Public procedures**: + * Subroutine → `s__` (e.g. `s_compute_flux`) + * Function → `f__` +* Private helpers stay in the module; avoid nested procedures. +* **Size limits**: subroutine ≤ 500 lines, helper ≤ 150, function ≤ 100, + module/file ≤ 1000. +* ≤ 6 arguments per routine; otherwise pass a derived-type “params” struct. +* No `goto` (except unavoidable legacy); no global state (`COMMON`, `save`). +* Every variable: `intent(in|out|inout)` + appropriate `dimension` / `allocatable` + / `pointer`. +* Use `s_mpi_abort()` for errors, not `stop`. +* Mark OpenACC-callable helpers that are called from OpenACC parallel loops immediately after declaration: + ```fortran + subroutine s_flux_update(...) + !$acc routine seq + ... + end subroutine + ``` + +--- + +# 3 OpenACC Programming Guidelines (for kernels) + +Wrap tight loops with + +```fortran +!$acc parallel loop gang vector default(present) reduction(...) +``` +* Add `collapse(n)` to merge nested loops when safe. +* Declare loop-local variables with `private(...)`. +* Allocate large arrays with `managed` or move them into a persistent + `!$acc enter data` region at start-up. +* **Do not** place `stop` / `error stop` inside device code. +* Must compile with Cray `ftn` and NVIDIA `nvfortran` for GPU offloading; also build CPU-only with + GNU `gfortran` and Intel `ifx`/`ifort`. From 5d87f90e2f9a0d0d48e0532afdb45fad431c43fb Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Sun, 8 Jun 2025 09:04:05 -0400 Subject: [PATCH 33/75] Delete .cursorrules (#868) --- .cursorrules | 125 --------------------------------------------------- 1 file changed, 125 deletions(-) delete mode 100644 .cursorrules diff --git a/.cursorrules b/.cursorrules deleted file mode 100644 index a0451d35c7..0000000000 --- a/.cursorrules +++ /dev/null @@ -1,125 +0,0 @@ -###################################################################### -# .cursorrules – project-level instructions for Cursor AI # -# Target repo: github.com/mflowcode/mfc # -# Format: plain-text "rules for the Agent". One idea per line. # -###################################################################### - -### ─────────────────────────────────────────────────────────────────── -### 1. Global project context (ALWAYS applied) -### ─────────────────────────────────────────────────────────────────── - -- You are interacting with **MFC** – an exascale, many-physics, multi-component - flow solver written in modern Fortran and generated with the Fypp - pre-processor. Most source files use the `.fpp` extension (Fypp templates) - and are transpiled to `.f90` during the CMake build. A minority of modules - remain as plain `.f90`. The code lives in `src/`, tests in `tests/`, - and example input cases in `examples/`. - -- Fypp helper macros live in files under any `include/` directory within - `src/` (e.g., `src//include/`). **Scan these macro files first** so - that template expansions resolve correctly. - -- Always assume free-form Fortran 2008+ (`.fpp` and `.f90`) with `implicit none`, - explicit `intent(in|out|inout)`, and modern constructs (`allocate`, - `move_alloc`, `iso_fortran_env`, etc.). - -- Prefer `module` + `contains` → `subroutine foo()` patterns; avoid COMMON - blocks and `include` files. - -- **Simulation kernels are accelerated exclusively with OpenACC.** Always - include the appropriate `!$acc` pragmas when generating or modifying - performance-critical loops. - -- **The codebase is written in Fortran 2008+ with the Fypp pre-processor.** - Always use the appropriate Fypp macros when generating or modifying - performance-critical loops. - -- **Before suggesting or applying any source-code changes, first read/parse the - full codebase *and* the documentation sources listed below, then produce a - concise description of how the codebase is structured and functions.** - -- Documentation sources: - - Primary online docs: - - Root-level `README.md` in the repository. - -### ─────────────────────────────────────────────────────────────────── -### 2. Style & naming conventions (AUTO-ATTACH for *.fpp and *.f90 files) -### ─────────────────────────────────────────────────────────────────── - -- Indent 2 spaces; continuation lines line up under “&”. -- No maximum line length is enforced. -- Prefer shorter, streamlined code—avoid needless verbosity while keeping clarity. -- Lower-case keywords (`do`, `end subroutine`, etc.). -- Module names: `m_` (e.g., `m_transport`). -- Public procedures: `_` (e.g., `compute_flux`) with prefixe `s_` or `f_` for subroutines and functions, respectively. -- Avoid private helper functions within a subroutine or function, and instead make them private to the module (if appropriate) -- Helper functions or subroutines start with with f_[name here] or s_[name here], respectively -- Avoid passing arguments to subroutines and functions that need to be reshaped in the subroutine or function, such that the compiler can optimize the code. -- Avoid common Fortran code smells, such as: - - Use of `goto` statements (except in legacy interop situations). - - Omission of `intent(...)` declarations. - - Use of global state via `COMMON` blocks or `save` variables. - - Shadowing of variable names (especially loop indices). - - Non-descriptive or single-letter variable names outside small scopes. - - Excessively long helper procedures (>200 lines); prefer decomposition. - - Excessively long subroutines (>500 lines); prefer decomposition. - - Excessively long functions (>100 lines); prefer decomposition. - - Excessively long modules (>1000 lines); prefer decomposition. - - Excessively long files (>1000 lines); prefer decomposition. - - Subroutines and functions that are too long to be understood at a glance. - - Use of `!$acc routine seq` to mark subroutines and functions that are called from OpenACC parallel loops. - - Mixing I/O logic with computation (separate concerns). - - Missing implicit none statements. - - Missing `intent(in|out|inout)` declarations. - - Missing `dimension` or `allocatable`/`pointer` attributes. - - Missing `private` declarations. - - Missing `public` declarations. - - Missing `module procedure` declarations. - - Missing `use` statements. - - Declaring arrays without `dimension` or `allocatable`/`pointer` attributes. - - Use of `stop` for error handling (prefer use of the subroutine `s_mpi_abort` with an appropriate string message argument). - -### ─────────────────────────────────────────────────────────────────── -### 3. Test workflow (AUTOMATICALLY applied when the code is significantly changed and should be tested) -### ─────────────────────────────────────────────────────────────────── - -- Run the test suite after any meaningful code change to verify correctness. -- Ask the user to alter the test suite command so that only the relevant tests are run. -- When you think you are done with the code changes, run ./mfc.sh test -j $(nproc) -f EA8FA07E -t 9E2CA336 -- Otherwise, only run specific tests that are relevant to the code changes. -- Do not run ./mfc.sh test -j $(nproc) without any other arguments (it takes too long to run all tests). - -### ─────────────────────────────────────────────────────────────────── -### 4. OpenACC programming guidelines (AUTOMATICALLY applied to *.fpp/*.f90) -### ─────────────────────────────────────────────────────────────────── - -- **Prefer:** - - !$acc parallel loop gang vector default(present) - - around tight loops over cells or particles; fall back to `!$acc kernels` - only when loop dependencies prevent direct parallelization. Always add - `reduction` clauses where needed. - -- Use `collapse(n)` in `!$acc parallel loop` pragmas to combine nested loops - when it improves parallelism and the iterations are independent. - -- Use `private(var1, var2, ...)` clauses for all variables that are local - to a parallel region or loop, especially scalar temporaries and loop counters. - -- Use `!$acc routine seq` to mark subroutines and functions that are called from OpenACC parallel loops. - -- When adding `!$acc routine seq` put it on the first line after the declaration of the subroutine or function. - -- Allocate large arrays with the `managed` attribute or move them to the device - at program start using a persistent `!$acc enter data` region. - -- Do **not** place `stop` or `error stop` statements inside OpenACC parallel regions or loops, as they are unsupported in device code and will cause runtime failures. - -- Ensure the code compiles with Cray Fortran (`ftn`) and NVIDIA HPC SDK - (`nvfortran`) for OpenACC GPU offloading, and with GNU (`gfortran`) and Intel - (`ifx`/`ifort`) for CPU-only builds where OpenACC directives are ignored. - -###################################################################### -# End of file -###################################################################### From 28d97009d7c9cb0a4334eb37e98628fabb7274e1 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Sun, 8 Jun 2025 09:59:50 -0400 Subject: [PATCH 34/75] Update mfc-agent-rules.mdc (#869) --- .cursor/rules/mfc-agent-rules.mdc | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.cursor/rules/mfc-agent-rules.mdc b/.cursor/rules/mfc-agent-rules.mdc index a323c4cdd5..02fe5d7e0e 100644 --- a/.cursor/rules/mfc-agent-rules.mdc +++ b/.cursor/rules/mfc-agent-rules.mdc @@ -1,9 +1,4 @@ --- -description: -globs: -alwaysApply: true ---- ---- description: Full MFC project rules – consolidated for Agent Mode alwaysApply: true --- From 46d55d81e29b0de49ac6d717e7ea12699298131f Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Sun, 8 Jun 2025 23:23:15 -0400 Subject: [PATCH 35/75] Make CBC not so terrible (#870) --- src/simulation/m_compute_cbc.fpp | 300 ++++++++++++------------------- 1 file changed, 112 insertions(+), 188 deletions(-) diff --git a/src/simulation/m_compute_cbc.fpp b/src/simulation/m_compute_cbc.fpp index 421f4ab07a..5880361461 100644 --- a/src/simulation/m_compute_cbc.fpp +++ b/src/simulation/m_compute_cbc.fpp @@ -1,12 +1,10 @@ #:include 'directive_macros.fpp' !> !! @file m_compute_cbc.f90 -!! @brief Contains module m_compute_cbc +!! @brief CBC computation module module m_compute_cbc - - use m_global_parameters !< Definitions of the global parameters - + use m_global_parameters implicit none private; public :: s_compute_slip_wall_L, & @@ -19,11 +17,72 @@ module m_compute_cbc s_compute_supersonic_outflow_L contains + !> Base L1 calculation + pure function f_base_L1(lambda, rho, c, dpres_ds, dvel_ds) result(L1) + !$acc routine seq + real(wp), dimension(3), intent(in) :: lambda + real(wp), intent(in) :: rho, c, dpres_ds + real(wp), dimension(num_dims), intent(in) :: dvel_ds + real(wp) :: L1 + L1 = lambda(1)*(dpres_ds - rho*c*dvel_ds(dir_idx(1))) + end function f_base_L1 + + !> Fill density L variables + pure subroutine s_fill_density_L(L, lambda_factor, lambda2, c, mf, dalpha_rho_ds, dpres_ds) + !$acc routine seq + real(wp), dimension(sys_size), intent(inout) :: L + real(wp), intent(in) :: lambda_factor, lambda2, c + real(wp), dimension(num_fluids), intent(in) :: mf, dalpha_rho_ds + real(wp), intent(in) :: dpres_ds + integer :: i + + do i = 2, momxb + L(i) = lambda_factor*lambda2*(c*c*dalpha_rho_ds(i - 1) - mf(i - 1)*dpres_ds) + end do + end subroutine s_fill_density_L + + !> Fill velocity L variables + pure subroutine s_fill_velocity_L(L, lambda_factor, lambda2, dvel_ds) + !$acc routine seq + real(wp), dimension(sys_size), intent(inout) :: L + real(wp), intent(in) :: lambda_factor, lambda2 + real(wp), dimension(num_dims), intent(in) :: dvel_ds + integer :: i + + do i = momxb + 1, momxe + L(i) = lambda_factor*lambda2*dvel_ds(dir_idx(i - contxe)) + end do + end subroutine s_fill_velocity_L + + !> Fill advection L variables + pure subroutine s_fill_advection_L(L, lambda_factor, lambda2, dadv_ds) + !$acc routine seq + real(wp), dimension(sys_size), intent(inout) :: L + real(wp), intent(in) :: lambda_factor, lambda2 + real(wp), dimension(num_fluids), intent(in) :: dadv_ds + integer :: i + + do i = E_idx, advxe - 1 + L(i) = lambda_factor*lambda2*dadv_ds(i - momxe) + end do + end subroutine s_fill_advection_L + + !> Fill chemistry L variables + pure subroutine s_fill_chemistry_L(L, lambda_factor, lambda2, dYs_ds) + !$acc routine seq + real(wp), dimension(sys_size), intent(inout) :: L + real(wp), intent(in) :: lambda_factor, lambda2 + real(wp), dimension(num_species), intent(in) :: dYs_ds + integer :: i + + if (.not. chemistry) return + + do i = chemxb, chemxe + L(i) = lambda_factor*lambda2*dYs_ds(i - chemxb + 1) + end do + end subroutine s_fill_chemistry_L - !> The L variables for the slip wall CBC, see pg. 451 of - !! Thompson (1990). At the slip wall (frictionless wall), - !! the normal component of velocity is zero at all times, - !! while the transverse velocities may be nonzero. + !> Slip wall CBC (Thompson 1990, pg. 451) pure subroutine s_compute_slip_wall_L(lambda, L, rho, c, dpres_ds, dvel_ds) #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_slip_wall_L @@ -32,26 +91,16 @@ contains #endif real(wp), dimension(3), intent(in) :: lambda real(wp), dimension(sys_size), intent(inout) :: L - real(wp), intent(in) :: rho, c - real(wp), intent(in) :: dpres_ds + real(wp), intent(in) :: rho, c, dpres_ds real(wp), dimension(num_dims), intent(in) :: dvel_ds - integer :: i - L(1) = lambda(1)*(dpres_ds - rho*c*dvel_ds(dir_idx(1))) - - do i = 2, advxe - L(i) = 0._wp - end do - + L(1) = f_base_L1(lambda, rho, c, dpres_ds, dvel_ds) + L(2:advxe - 1) = 0._wp L(advxe) = L(1) - end subroutine s_compute_slip_wall_L - !> The L variables for the nonreflecting subsonic buffer CBC - !! see pg. 13 of Thompson (1987). The nonreflecting subsonic - !! buffer reduces the amplitude of any reflections caused by - !! outgoing waves. + !> Nonreflecting subsonic buffer CBC (Thompson 1987, pg. 13) pure subroutine s_compute_nonreflecting_subsonic_buffer_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds, dYs_ds) #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_nonreflecting_subsonic_buffer_L @@ -66,42 +115,22 @@ contains real(wp), dimension(num_dims), intent(in) :: dvel_ds real(wp), dimension(num_fluids), intent(in) :: dadv_ds real(wp), dimension(num_species), intent(in) :: dYs_ds + real(wp) :: lambda_factor - integer :: i !< Generic loop iterator + lambda_factor = (5e-1_wp - 5e-1_wp*sign(1._wp, lambda(1))) + L(1) = lambda_factor*lambda(1)*(dpres_ds - rho*c*dvel_ds(dir_idx(1))) - L(1) = (5e-1_wp - 5e-1_wp*sign(1._wp, lambda(1)))*lambda(1) & - *(dpres_ds - rho*c*dvel_ds(dir_idx(1))) - - do i = 2, momxb - L(i) = (5e-1_wp - 5e-1_wp*sign(1._wp, lambda(2)))*lambda(2) & - *(c*c*dalpha_rho_ds(i - 1) - mf(i - 1)*dpres_ds) - end do - - do i = momxb + 1, momxe - L(i) = (5e-1_wp - 5e-1_wp*sign(1._wp, lambda(2)))*lambda(2) & - *(dvel_ds(dir_idx(i - contxe))) - end do - - do i = E_idx, advxe - 1 - L(i) = (5e-1_wp - 5e-1_wp*sign(1._wp, lambda(2)))*lambda(2) & - *(dadv_ds(i - momxe)) - end do - - L(advxe) = (5e-1_wp - 5e-1_wp*sign(1._wp, lambda(3)))*lambda(3) & - *(dpres_ds + rho*c*dvel_ds(dir_idx(1))) - - if (chemistry) then - do i = chemxb, chemxe - L(i) = (5e-1_wp - 5e-1_wp*sign(1._wp, lambda(2)))*lambda(2) & - *(dYs_ds(i - chemxb + 1)) - end do - end if + lambda_factor = (5e-1_wp - 5e-1_wp*sign(1._wp, lambda(2))) + call s_fill_density_L(L, lambda_factor, lambda(2), c, mf, dalpha_rho_ds, dpres_ds) + call s_fill_velocity_L(L, lambda_factor, lambda(2), dvel_ds) + call s_fill_advection_L(L, lambda_factor, lambda(2), dadv_ds) + call s_fill_chemistry_L(L, lambda_factor, lambda(2), dYs_ds) + lambda_factor = (5e-1_wp - 5e-1_wp*sign(1._wp, lambda(3))) + L(advxe) = lambda_factor*lambda(3)*(dpres_ds + rho*c*dvel_ds(dir_idx(1))) end subroutine s_compute_nonreflecting_subsonic_buffer_L - !> The L variables for the nonreflecting subsonic inflow CBC - !! see pg. 455, Thompson (1990). This nonreflecting subsonic - !! CBC assumes an incoming flow and reduces the amplitude of - !! any reflections caused by outgoing waves. + + !> Nonreflecting subsonic inflow CBC (Thompson 1990, pg. 455) pure subroutine s_compute_nonreflecting_subsonic_inflow_L(lambda, L, rho, c, dpres_ds, dvel_ds) #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_nonreflecting_subsonic_inflow_L @@ -110,30 +139,15 @@ contains #endif real(wp), dimension(3), intent(in) :: lambda real(wp), dimension(sys_size), intent(inout) :: L - real(wp), intent(in) :: rho, c - real(wp), intent(in) :: dpres_ds + real(wp), intent(in) :: rho, c, dpres_ds real(wp), dimension(num_dims), intent(in) :: dvel_ds - integer :: i - - L(1) = lambda(1)*(dpres_ds - rho*c*dvel_ds(dir_idx(1))) - - do i = 2, advxe - L(i) = 0._wp - end do - - if (chemistry) then - do i = chemxb, chemxe - L(i) = 0._wp - end do - end if - + L(1) = f_base_L1(lambda, rho, c, dpres_ds, dvel_ds) + L(2:advxe) = 0._wp + if (chemistry) L(chemxb:chemxe) = 0._wp end subroutine s_compute_nonreflecting_subsonic_inflow_L - !> The L variables for the nonreflecting subsonic outflow - !! CBC see pg. 454 of Thompson (1990). This nonreflecting - !! subsonic CBC presumes an outgoing flow and reduces the - !! amplitude of any reflections caused by outgoing waves. + !> Nonreflecting subsonic outflow CBC (Thompson 1990, pg. 454) pure subroutine s_compute_nonreflecting_subsonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds, dYs_ds) #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_nonreflecting_subsonic_outflow_L @@ -149,40 +163,15 @@ contains real(wp), dimension(num_fluids), intent(in) :: dadv_ds real(wp), dimension(num_species), intent(in) :: dYs_ds - integer :: i !> Generic loop iterator - - L(1) = lambda(1)*(dpres_ds - rho*c*dvel_ds(dir_idx(1))) - - do i = 2, momxb - L(i) = lambda(2)*(c*c*dalpha_rho_ds(i - 1) - mf(i - 1)*dpres_ds) - end do - - do i = momxb + 1, momxe - L(i) = lambda(2)*(dvel_ds(dir_idx(i - contxe))) - end do - - do i = E_idx, advxe - 1 - L(i) = lambda(2)*(dadv_ds(i - momxe)) - end do - - ! bubble index + L(1) = f_base_L1(lambda, rho, c, dpres_ds, dvel_ds) + call s_fill_density_L(L, 1._wp, lambda(2), c, mf, dalpha_rho_ds, dpres_ds) + call s_fill_velocity_L(L, 1._wp, lambda(2), dvel_ds) + call s_fill_advection_L(L, 1._wp, lambda(2), dadv_ds) + call s_fill_chemistry_L(L, 1._wp, lambda(2), dYs_ds) L(advxe) = 0._wp - - if (chemistry) then - do i = chemxb, chemxe - L(i) = lambda(2)*dYs_ds(i - chemxb + 1) - end do - end if - end subroutine s_compute_nonreflecting_subsonic_outflow_L - !> The L variables for the force-free subsonic outflow CBC, - !! see pg. 454 of Thompson (1990). The force-free subsonic - !! outflow sets to zero the sum of all of the forces which - !! are acting on a fluid element for the normal coordinate - !! direction to the boundary. As a result, a fluid element - !! at the boundary is simply advected outward at the fluid - !! velocity. + !> Force-free subsonic outflow CBC (Thompson 1990, pg. 454) pure subroutine s_compute_force_free_subsonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds) #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_force_free_subsonic_outflow_L @@ -197,30 +186,14 @@ contains real(wp), dimension(num_dims), intent(in) :: dvel_ds real(wp), dimension(num_fluids), intent(in) :: dadv_ds - integer :: i !> Generic loop iterator - - L(1) = lambda(1)*(dpres_ds - rho*c*dvel_ds(dir_idx(1))) - - do i = 2, momxb - L(i) = lambda(2)*(c*c*dalpha_rho_ds(i - 1) - mf(i - 1)*dpres_ds) - end do - - do i = momxb + 1, momxe - L(i) = lambda(2)*(dvel_ds(dir_idx(i - contxe))) - end do - - do i = E_idx, advxe - 1 - L(i) = lambda(2)*(dadv_ds(i - momxe)) - end do - + L(1) = f_base_L1(lambda, rho, c, dpres_ds, dvel_ds) + call s_fill_density_L(L, 1._wp, lambda(2), c, mf, dalpha_rho_ds, dpres_ds) + call s_fill_velocity_L(L, 1._wp, lambda(2), dvel_ds) + call s_fill_advection_L(L, 1._wp, lambda(2), dadv_ds) L(advxe) = L(1) + 2._wp*rho*c*lambda(2)*dvel_ds(dir_idx(1)) - end subroutine s_compute_force_free_subsonic_outflow_L - !> L variables for the constant pressure subsonic outflow - !! CBC see pg. 455 Thompson (1990). The constant pressure - !! subsonic outflow maintains a fixed pressure at the CBC - !! boundary in absence of any transverse effects. + !> Constant pressure subsonic outflow CBC (Thompson 1990, pg. 455) pure subroutine s_compute_constant_pressure_subsonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds) #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_constant_pressure_subsonic_outflow_L @@ -235,31 +208,14 @@ contains real(wp), dimension(num_dims), intent(in) :: dvel_ds real(wp), dimension(num_fluids), intent(in) :: dadv_ds - integer :: i !> Generic loop iterator - - L(1) = lambda(1)*(dpres_ds - rho*c*dvel_ds(dir_idx(1))) - - do i = 2, momxb - L(i) = lambda(2)*(c*c*dalpha_rho_ds(i - 1) - mf(i - 1)*dpres_ds) - end do - - do i = momxb + 1, momxe - L(i) = lambda(2)*(dvel_ds(dir_idx(i - contxe))) - end do - - do i = E_idx, advxe - 1 - L(i) = lambda(2)*(dadv_ds(i - momxe)) - end do - + L(1) = f_base_L1(lambda, rho, c, dpres_ds, dvel_ds) + call s_fill_density_L(L, 1._wp, lambda(2), c, mf, dalpha_rho_ds, dpres_ds) + call s_fill_velocity_L(L, 1._wp, lambda(2), dvel_ds) + call s_fill_advection_L(L, 1._wp, lambda(2), dadv_ds) L(advxe) = -L(1) - end subroutine s_compute_constant_pressure_subsonic_outflow_L - !> L variables for the supersonic inflow CBC, see pg. 453 - !! Thompson (1990). The supersonic inflow CBC is a steady - !! state, or nearly a steady state, CBC in which only the - !! transverse terms may generate a time dependence at the - !! inflow boundary. + !> Supersonic inflow CBC (Thompson 1990, pg. 453) pure subroutine s_compute_supersonic_inflow_L(L) #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_supersonic_inflow_L @@ -267,25 +223,11 @@ contains $:ROUTINE() #endif real(wp), dimension(sys_size), intent(inout) :: L - - integer :: i - - do i = 1, advxe - L(i) = 0._wp - end do - - if (chemistry) then - do i = chemxb, chemxe - L(i) = 0._wp - end do - end if - + L(1:advxe) = 0._wp + if (chemistry) L(chemxb:chemxe) = 0._wp end subroutine s_compute_supersonic_inflow_L - !> L variables for the supersonic outflow CBC, see pg. 453 - !! of Thompson (1990). For the supersonic outflow CBC, the - !! flow evolution at the boundary is determined completely - !! by the interior data. + !> Supersonic outflow CBC (Thompson 1990, pg. 453) pure subroutine s_compute_supersonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds, dYs_ds) #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_supersonic_outflow_L @@ -300,30 +242,12 @@ contains real(wp), dimension(num_dims), intent(in) :: dvel_ds real(wp), dimension(num_fluids), intent(in) :: dadv_ds real(wp), dimension(num_species), intent(in) :: dYs_ds - integer :: i !< Generic loop iterator - - L(1) = lambda(1)*(dpres_ds - rho*c*dvel_ds(dir_idx(1))) - - do i = 2, momxb - L(i) = lambda(2)*(c*c*dalpha_rho_ds(i - 1) - mf(i - 1)*dpres_ds) - end do - - do i = momxb + 1, momxe - L(i) = lambda(2)*(dvel_ds(dir_idx(i - contxe))) - end do - - do i = E_idx, advxe - 1 - L(i) = lambda(2)*(dadv_ds(i - momxe)) - end do + L(1) = f_base_L1(lambda, rho, c, dpres_ds, dvel_ds) + call s_fill_density_L(L, 1._wp, lambda(2), c, mf, dalpha_rho_ds, dpres_ds) + call s_fill_velocity_L(L, 1._wp, lambda(2), dvel_ds) + call s_fill_advection_L(L, 1._wp, lambda(2), dadv_ds) + call s_fill_chemistry_L(L, 1._wp, lambda(2), dYs_ds) L(advxe) = lambda(3)*(dpres_ds + rho*c*dvel_ds(dir_idx(1))) - - if (chemistry) then - do i = chemxb, chemxe - L(i) = lambda(2)*dYs_ds(i - chemxb + 1) - end do - end if - end subroutine s_compute_supersonic_outflow_L - end module m_compute_cbc From 1f3b8f0f41456ce5375779c2ebdbc2f99fc2dcb9 Mon Sep 17 00:00:00 2001 From: "Mohammed S. Al-Mahrouqi" <145478595+mohdsaid497566@users.noreply.github.com> Date: Wed, 11 Jun 2025 09:38:07 -0400 Subject: [PATCH 36/75] changed output file from default (1) to run_time.inf (3) (#504) (#877) Co-authored-by: mohdsaid497566 --- src/simulation/m_data_output.fpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp index fdf81d528a..7407375f86 100644 --- a/src/simulation/m_data_output.fpp +++ b/src/simulation/m_data_output.fpp @@ -151,18 +151,18 @@ contains ! Generating table header for the stability criteria to be outputted if (cfl_dt) then if (viscous) then - write (1, '(A)') ' Time-steps dt = Time ICFL '// & + write (3, '(A)') ' Time-steps dt = Time ICFL '// & 'Max VCFL Max Rc Min =' else - write (1, '(A)') ' Time-steps dt Time '// & + write (3, '(A)') ' Time-steps dt Time '// & ' ICFL Max ' end if else if (viscous) then - write (1, '(A)') ' Time-steps Time ICFL '// & + write (3, '(A)') ' Time-steps Time ICFL '// & 'Max VCFL Max Rc Min ' else - write (1, '(A)') ' Time-steps Time '// & + write (3, '(A)') ' Time-steps Time '// & ' ICFL Max ' end if end if @@ -353,12 +353,12 @@ contains ! Outputting global stability criteria extrema at current time-step if (proc_rank == 0) then if (viscous) then - write (1, '(6X,I8,F10.6,6X,6X,F10.6,6X,F9.6,6X,F9.6,6X,F10.6)') & + write (3, '(6X,I8,F10.6,6X,6X,F10.6,6X,F9.6,6X,F9.6,6X,F10.6)') & t_step, dt, t_step*dt, icfl_max_glb, & vcfl_max_glb, & Rc_min_glb else - write (1, '(13X,I8,14X,F10.6,14X,F10.6,13X,F9.6)') & + write (3, '(13X,I8,14X,F10.6,14X,F10.6,13X,F9.6)') & t_step, dt, t_step*dt, icfl_max_glb end if From bea95b5bec2854fdeeae8f6462a68092c0943523 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Thu, 12 Jun 2025 16:29:39 -0400 Subject: [PATCH 37/75] Fixed issues with macro --- src/common/m_finite_differences.fpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/common/m_finite_differences.fpp b/src/common/m_finite_differences.fpp index 80db1d8c79..18a0a84484 100644 --- a/src/common/m_finite_differences.fpp +++ b/src/common/m_finite_differences.fpp @@ -18,7 +18,8 @@ contains integer :: x, y, z !< Generic loop iterators real(wp) :: divergence - $:PARALLEL_LOOP(collapse=3, gang vector default(present) private=["divergence"]) + + $:PARALLEL_LOOP(collapse=3, private=["divergence"]) do x = ix_s%beg, ix_s%end do y = iy_s%beg, iy_s%end do z = iz_s%beg, iz_s%end From c6d4a0465333ed4d30675b5dbf2137711ecb83be Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Wed, 11 Jun 2025 17:19:49 -0400 Subject: [PATCH 38/75] Replaced Openacc routine directives --- src/simulation/m_qbmm.fpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/simulation/m_qbmm.fpp b/src/simulation/m_qbmm.fpp index 72a0114865..dbc78a69d1 100644 --- a/src/simulation/m_qbmm.fpp +++ b/src/simulation/m_qbmm.fpp @@ -865,7 +865,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_chyqmom #else - !$acc routine seq + $:ROUTINE() #endif real(wp), intent(in) :: pres, rho, c real(wp), dimension(nterms, 0:2, 0:2), intent(out) :: coeff From a6dac4df28f4cd2e6d4a796228120f5ba474f0ca Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Wed, 11 Jun 2025 17:26:30 -0400 Subject: [PATCH 39/75] Added update device macros --- src/simulation/m_viscous.fpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp index 2cc8797e27..0ea46cdd90 100644 --- a/src/simulation/m_viscous.fpp +++ b/src/simulation/m_viscous.fpp @@ -1000,7 +1000,7 @@ contains end if - !$acc update device(is1_viscous, is2_viscous, is3_viscous, iv) + $:UPDATE(device=["is1_viscous","is2_viscous","is3_viscous","iv"]) if (n > 0) then if (p > 0) then @@ -1097,7 +1097,7 @@ contains end if - !$acc update device(is1_viscous, is2_viscous, is3_viscous, iv) + $:UPDATE(device=["is1_viscous","is2_viscous","is3_viscous","iv"]) if (n > 0) then if (p > 0) then @@ -1200,7 +1200,7 @@ contains is3_viscous = iz iv = iv_in - !$acc update device(is1_viscous, is2_viscous, is3_viscous, iv) + $:UPDATE(device=["is1_viscous","is2_viscous","is3_viscous","iv"]) ! First-Order Spatial Derivatives in x-direction if (norm_dir == 1) then From d1b0e73611635c59dc07e4f6914ed4fb53cc0e74 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Wed, 11 Jun 2025 17:30:59 -0400 Subject: [PATCH 40/75] Replaced rest of parallel loop directives --- src/simulation/m_viscous.fpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp index 0ea46cdd90..2cc8797e27 100644 --- a/src/simulation/m_viscous.fpp +++ b/src/simulation/m_viscous.fpp @@ -1000,7 +1000,7 @@ contains end if - $:UPDATE(device=["is1_viscous","is2_viscous","is3_viscous","iv"]) + !$acc update device(is1_viscous, is2_viscous, is3_viscous, iv) if (n > 0) then if (p > 0) then @@ -1097,7 +1097,7 @@ contains end if - $:UPDATE(device=["is1_viscous","is2_viscous","is3_viscous","iv"]) + !$acc update device(is1_viscous, is2_viscous, is3_viscous, iv) if (n > 0) then if (p > 0) then @@ -1200,7 +1200,7 @@ contains is3_viscous = iz iv = iv_in - $:UPDATE(device=["is1_viscous","is2_viscous","is3_viscous","iv"]) + !$acc update device(is1_viscous, is2_viscous, is3_viscous, iv) ! First-Order Spatial Derivatives in x-direction if (norm_dir == 1) then From 0d2d6ac2bdf6fcef877c046b1d2fd9f8df7881a0 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Thu, 12 Jun 2025 19:00:15 -0400 Subject: [PATCH 41/75] Renamed directives to have GPU_ beginning --- src/common/include/directive_macros.fpp | 22 +- src/common/m_boundary_common.fpp | 48 +-- src/common/m_chemistry.fpp | 12 +- src/common/m_finite_differences.fpp | 2 +- src/common/m_helper.fpp | 4 +- src/common/m_helper_basic.fpp | 6 +- src/common/m_mpi_common.fpp | 66 ++-- src/common/m_phase_change.fpp | 34 +- src/common/m_variables_conversion.fpp | 98 +++--- src/pre_process/m_assign_variables.fpp | 4 +- src/pre_process/m_patches.fpp | 8 +- src/simulation/include/inline_riemann.fpp | 4 +- src/simulation/m_acoustic_src.fpp | 54 +-- src/simulation/m_body_forces.fpp | 14 +- src/simulation/m_boundary_conditions.fpp | 8 +- src/simulation/m_bubbles.fpp | 38 +-- src/simulation/m_bubbles_EE.fpp | 44 +-- src/simulation/m_bubbles_EL.fpp | 96 +++--- src/simulation/m_bubbles_EL_kernels.fpp | 16 +- src/simulation/m_cbc.fpp | 180 +++++----- src/simulation/m_compute_cbc.fpp | 16 +- src/simulation/m_data_output.fpp | 14 +- src/simulation/m_fftw.fpp | 26 +- src/simulation/m_global_parameters.fpp | 152 ++++----- src/simulation/m_hyperelastic.fpp | 36 +- src/simulation/m_hypoelastic.fpp | 50 +-- src/simulation/m_ibm.fpp | 52 +-- src/simulation/m_mhd.fpp | 18 +- src/simulation/m_mpi_proxy.fpp | 74 ++-- src/simulation/m_qbmm.fpp | 58 ++-- src/simulation/m_rhs.fpp | 144 ++++---- src/simulation/m_riemann_solvers.fpp | 390 +++++++++++----------- src/simulation/m_sim_helpers.fpp | 18 +- src/simulation/m_start_up.fpp | 52 +-- src/simulation/m_surface_tension.fpp | 28 +- src/simulation/m_time_steppers.fpp | 48 +-- src/simulation/m_viscous.fpp | 208 ++++++------ src/simulation/m_weno.fpp | 50 +-- 38 files changed, 1096 insertions(+), 1096 deletions(-) diff --git a/src/common/include/directive_macros.fpp b/src/common/include/directive_macros.fpp index 4e6e62f746..0ca82b305c 100644 --- a/src/common/include/directive_macros.fpp +++ b/src/common/include/directive_macros.fpp @@ -171,7 +171,7 @@ #:enddef -#:def PARALLEL_LOOP(collapse=None, private=None, parallelism=['gang', 'vector'], & +#:def GPU_PARALLEL_LOOP(collapse=None, private=None, parallelism=['gang', 'vector'], & & default='present', firstprivate=None, reduction=None, reductionOp=None, & & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, & & no_create=None, present=None, deviceptr=None, attach=None, extraAccArgs=None) @@ -215,7 +215,7 @@ $:acc_directive #:enddef -#:def ROUTINE(parallelism=['seq'], nohost=False, extraAccArgs=None) +#:def GPU_ROUTINE(parallelism=['seq'], nohost=False, extraAccArgs=None) #:set parallelism_val = GEN_PARALLELISM_STR(parallelism) #:assert isinstance(nohost, bool) @@ -233,7 +233,7 @@ $:acc_directive #:enddef -#:def DECLARE(copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, present=None, deviceptr=None, link=None, extraAccArgs=None) +#:def GPU_DECLARE(copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, present=None, deviceptr=None, link=None, extraAccArgs=None) #:set copy_val = GEN_COPY_STR(copy) @@ -259,7 +259,7 @@ $:acc_directive #:enddef -#:def LOOP(collapse=None, parallelism=["seq"], data_dependency=None, reduction=None, reductionOp=None, private=None, extraAccArgs=None) +#:def GPU_LOOP(collapse=None, parallelism=["seq"], data_dependency=None, reduction=None, reductionOp=None, private=None, extraAccArgs=None) #:set collapse_val = GEN_COLLAPSE_STR(collapse) @@ -287,7 +287,7 @@ $:acc_directive #:enddef -#:def DATA(copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, no_create=None, present=None, deviceptr=None, attach=None, default=None, extraAccArgs=None) +#:def GPU_DATA(copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, no_create=None, present=None, deviceptr=None, attach=None, default=None, extraAccArgs=None) #:set copy_val = GEN_COPY_STR(copy) @@ -318,7 +318,7 @@ $:acc_directive #:enddef -#:def ENTER_DATA(copyin=None, copyinReadOnly=None, create=None, attach=None, extraAccArgs=None) +#:def GPU_ENTER_DATA(copyin=None, copyinReadOnly=None, create=None, attach=None, extraAccArgs=None) #:set copyin_val = GEN_COPYIN_STR(copyin, False).strip('\n') + GEN_COPYIN_STR(copyinReadOnly, True).strip('\n') #:set create_val = GEN_CREATE_STR(create) @@ -332,7 +332,7 @@ $:acc_directive #:enddef -#:def EXIT_DATA(copyout=None, delete=None, detach=None, extraAccArgs=None) +#:def GPU_EXIT_DATA(copyout=None, delete=None, detach=None, extraAccArgs=None) #:set copyout_val = GEN_COPYOUT_STR(copyout) #:set delete_val = GEN_DELETE_STR(delete) @@ -346,7 +346,7 @@ $:acc_directive #:enddef -#:def CACHE(cache, extraAccArgs=None) +#:def GPU_CACHE(cache, extraAccArgs=None) #:set cache_val = GEN_PARENTHESES_CLAUSE('cache', cache) #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) @@ -356,7 +356,7 @@ $:acc_directive #:enddef -#:def ATOMIC(atomic='update', extraAccArgs=None) +#:def GPU_ATOMIC(atomic='update', extraAccArgs=None) #:assert isinstance(atomic, str) #:assert (atomic == 'read' or atomic == 'write' or atomic == 'update' or atomic == 'capture') @@ -369,7 +369,7 @@ $:acc_directive #:enddef -#:def UPDATE(host=None, device=None, extraAccArgs=None) +#:def GPU_UPDATE(host=None, device=None, extraAccArgs=None) #:set host_val = GEN_HOST_STR(host) #:set device_val = GEN_DEVICE_STR(device) @@ -381,7 +381,7 @@ $:acc_directive #:enddef -#:def WAIT(host=None, device=None, extraAccArgs=None) +#:def GPU_WAIT(host=None, device=None, extraAccArgs=None) #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) #:set clause_val = host_val.strip('\n') + device_val.strip('\n') diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index d9f6094600..5f9f8fa16e 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -21,7 +21,7 @@ module m_boundary_common implicit none type(scalar_field), dimension(:, :), allocatable :: bc_buffers - $:DECLARE(create=["bc_buffers"]) + $:GPU_DECLARE(create=["bc_buffers"]) real(wp) :: bcxb, bcxe, bcyb, bcye, bczb, bcze @@ -84,7 +84,7 @@ contains if (bcxb >= 0) then call s_mpi_sendrecv_variables_buffers(q_prim_vf, pb, mv, 1, -1) else - $:PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(collapse=2) do l = 0, p do k = 0, n select case (int(bc_type(1, -1)%sf(0, k, l))) @@ -108,7 +108,7 @@ contains if (bcxe >= 0) then call s_mpi_sendrecv_variables_buffers(q_prim_vf, pb, mv, 1, 1) else - $:PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(collapse=2) do l = 0, p do k = 0, n select case (int(bc_type(1, 1)%sf(0, k, l))) @@ -136,7 +136,7 @@ contains if (bcyb >= 0) then call s_mpi_sendrecv_variables_buffers(q_prim_vf, pb, mv, 2, -1) else - $:PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(collapse=2) do l = 0, p do k = -buff_size, m + buff_size select case (int(bc_type(2, -1)%sf(k, 0, l))) @@ -162,7 +162,7 @@ contains if (bcye >= 0) then call s_mpi_sendrecv_variables_buffers(q_prim_vf, pb, mv, 2, 1) else - $:PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(collapse=2) do l = 0, p do k = -buff_size, m + buff_size select case (int(bc_type(2, 1)%sf(k, 0, l))) @@ -190,7 +190,7 @@ contains if (bczb >= 0) then call s_mpi_sendrecv_variables_buffers(q_prim_vf, pb, mv, 3, -1) else - $:PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(collapse=2) do l = -buff_size, n + buff_size do k = -buff_size, m + buff_size select case (int(bc_type(3, -1)%sf(k, l, 0))) @@ -214,7 +214,7 @@ contains if (bcze >= 0) then call s_mpi_sendrecv_variables_buffers(q_prim_vf, pb, mv, 3, 1) else - $:PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(collapse=2) do l = -buff_size, n + buff_size do k = -buff_size, m + buff_size select case (int(bc_type(3, 1)%sf(k, l, 0))) @@ -242,7 +242,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_ghost_cell_extrapolation #else - $:ROUTINE() + $:GPU_ROUTINE() #endif type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv @@ -311,7 +311,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_symmetry #else - $:ROUTINE() + $:GPU_ROUTINE() #endif type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv @@ -575,7 +575,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_periodic #else - $:ROUTINE() + $:GPU_ROUTINE() #endif type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv @@ -718,7 +718,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_axis #else - $:ROUTINE() + $:GPU_ROUTINE() #endif type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv @@ -781,7 +781,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_slip_wall #else - $:ROUTINE() + $:GPU_ROUTINE() #endif type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv @@ -880,7 +880,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_no_slip_wall #else - $:ROUTINE() + $:GPU_ROUTINE() #endif type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv @@ -1015,7 +1015,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_dirichlet #else - $:ROUTINE() + $:GPU_ROUTINE() #endif type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv @@ -1084,7 +1084,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_qbmm_extrapolation #else - $:ROUTINE() + $:GPU_ROUTINE() #endif real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv integer, intent(in) :: bc_dir, bc_loc @@ -1167,7 +1167,7 @@ contains if (bcxb >= 0) then call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 1, -1) else - $:PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(collapse=2) do l = 0, p do k = 0, n select case (bc_type(1, -1)%sf(0, k, l)) @@ -1185,7 +1185,7 @@ contains if (bcxe >= 0) then call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 1, 1) else - $:PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(collapse=2) do l = 0, p do k = 0, n select case (bc_type(1, 1)%sf(0, k, l)) @@ -1206,7 +1206,7 @@ contains if (bcyb >= 0) then call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 2, -1) else - $:PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(collapse=2) do l = 0, p do k = -buff_size, m + buff_size select case (bc_type(2, -1)%sf(k, 0, l)) @@ -1224,7 +1224,7 @@ contains if (bcye >= 0) then call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 2, 1) else - $:PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(collapse=2) do l = 0, p do k = -buff_size, m + buff_size select case (bc_type(2, 1)%sf(k, 0, l)) @@ -1245,7 +1245,7 @@ contains if (bczb >= 0) then call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 3, -1) else - $:PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(collapse=2) do l = -buff_size, n + buff_size do k = -buff_size, m + buff_size select case (bc_type(3, -1)%sf(k, l, 0)) @@ -1263,7 +1263,7 @@ contains if (bcze >= 0) then call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 3, 1) else - $:PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(collapse=2) do l = -buff_size, n + buff_size do k = -buff_size, m + buff_size select case (bc_type(3, 1)%sf(k, l, 0)) @@ -1283,7 +1283,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_color_function_periodic #else - $:ROUTINE() + $:GPU_ROUTINE() #endif type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs integer, intent(in) :: bc_dir, bc_loc @@ -1341,7 +1341,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_color_function_reflective #else - $:ROUTINE() + $:GPU_ROUTINE() #endif type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs integer, intent(in) :: bc_dir, bc_loc @@ -1423,7 +1423,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_color_function_ghost_cell_extrapolation #else - $:ROUTINE() + $:GPU_ROUTINE() #endif type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs integer, intent(in) :: bc_dir, bc_loc diff --git a/src/common/m_chemistry.fpp b/src/common/m_chemistry.fpp index 14b3c3d1a6..4984a2248f 100644 --- a/src/common/m_chemistry.fpp +++ b/src/common/m_chemistry.fpp @@ -36,7 +36,7 @@ contains do z = bounds(3)%beg, bounds(3)%end do y = bounds(2)%beg, bounds(2)%end do x = bounds(1)%beg, bounds(1)%end - $:LOOP() + $:GPU_LOOP() do eqn = chemxb, chemxe Ys(eqn - chemxb + 1) = & q_cons_vf(eqn)%sf(x, y, z)/q_cons_vf(contxb)%sf(x, y, z) @@ -47,7 +47,7 @@ contains ! cons. contxb = \rho (1-fluid model) ! cons. momxb + i = \rho u_i energy = q_cons_vf(E_idx)%sf(x, y, z)/q_cons_vf(contxb)%sf(x, y, z) - $:LOOP() + $:GPU_LOOP() do eqn = momxb, momxe energy = energy - & 0.5_wp*(q_cons_vf(eqn)%sf(x, y, z)/q_cons_vf(contxb)%sf(x, y, z))**2._wp @@ -73,7 +73,7 @@ contains do z = bounds(3)%beg, bounds(3)%end do y = bounds(2)%beg, bounds(2)%end do x = bounds(1)%beg, bounds(1)%end - $:LOOP() + $:GPU_LOOP() do i = chemxb, chemxe Ys(i - chemxb + 1) = q_prim_vf(i)%sf(x, y, z) end do @@ -100,12 +100,12 @@ contains real(wp), dimension(num_species) :: Ys real(wp), dimension(num_species) :: omega - $:PARALLEL_LOOP(collapse=3, private=["Ys", "omega"]) + $:GPU_PARALLEL_LOOP(collapse=3, private=["Ys", "omega"]) do z = bounds(3)%beg, bounds(3)%end do y = bounds(2)%beg, bounds(2)%end do x = bounds(1)%beg, bounds(1)%end - $:LOOP() + $:GPU_LOOP() do eqn = chemxb, chemxe Ys(eqn - chemxb + 1) = q_prim_qp(eqn)%sf(x, y, z) end do @@ -115,7 +115,7 @@ contains call get_net_production_rates(rho, T, Ys, omega) - $:LOOP() + $:GPU_LOOP() do eqn = chemxb, chemxe omega_m = molecular_weights(eqn - chemxb + 1)*omega(eqn - chemxb + 1) diff --git a/src/common/m_finite_differences.fpp b/src/common/m_finite_differences.fpp index 18a0a84484..8119c1257a 100644 --- a/src/common/m_finite_differences.fpp +++ b/src/common/m_finite_differences.fpp @@ -19,7 +19,7 @@ contains real(wp) :: divergence - $:PARALLEL_LOOP(collapse=3, private=["divergence"]) + $:GPU_PARALLEL_LOOP(collapse=3, private=["divergence"]) do x = ix_s%beg, ix_s%end do y = iy_s%beg, iy_s%end do z = iz_s%beg, iz_s%end diff --git a/src/common/m_helper.fpp b/src/common/m_helper.fpp index a73afddda0..cbcabc0a3f 100644 --- a/src/common/m_helper.fpp +++ b/src/common/m_helper.fpp @@ -44,7 +44,7 @@ contains !! @param Rtmp is the bubble radii !! @param ntmp is the output number bubble density pure subroutine s_comp_n_from_prim(vftmp, Rtmp, ntmp, weights) - $:ROUTINE() + $:GPU_ROUTINE() real(wp), intent(in) :: vftmp real(wp), dimension(nb), intent(in) :: Rtmp real(wp), intent(out) :: ntmp @@ -58,7 +58,7 @@ contains end subroutine s_comp_n_from_prim pure subroutine s_comp_n_from_cons(vftmp, nRtmp, ntmp, weights) - $:ROUTINE() + $:GPU_ROUTINE() real(wp), intent(in) :: vftmp real(wp), dimension(nb), intent(in) :: nRtmp real(wp), intent(out) :: ntmp diff --git a/src/common/m_helper_basic.fpp b/src/common/m_helper_basic.fpp index d0ace9e551..b6afc36416 100644 --- a/src/common/m_helper_basic.fpp +++ b/src/common/m_helper_basic.fpp @@ -24,7 +24,7 @@ contains !! @param tol_input Relative error (default = 1e-6_wp). !! @return Result of the comparison. logical pure elemental function f_approx_equal(a, b, tol_input) result(res) - $:ROUTINE() + $:GPU_ROUTINE() real(wp), intent(in) :: a, b real(wp), optional, intent(in) :: tol_input real(wp) :: tol @@ -47,7 +47,7 @@ contains !> Checks if a real(wp) variable is of default value. !! @param var Variable to check. logical pure elemental function f_is_default(var) result(res) - $:ROUTINE() + $:GPU_ROUTINE() real(wp), intent(in) :: var res = f_approx_equal(var, dflt_real) @@ -72,7 +72,7 @@ contains !> Checks if a real(wp) variable is an integer. !! @param var Variable to check. logical pure elemental function f_is_integer(var) result(res) - $:ROUTINE() + $:GPU_ROUTINE() real(wp), intent(in) :: var res = f_approx_equal(var, real(nint(var), wp)) diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index 1fdc19aa0d..6ea2bc60f2 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -26,7 +26,7 @@ module m_mpi_common implicit none integer, private :: err_code, ierr, v_size !< - $:DECLARE(create=["v_size"]) + $:GPU_DECLARE(create=["v_size"]) !! Generic flags used to identify and report MPI errors real(wp), private, allocatable, dimension(:), target :: buff_send !< @@ -39,10 +39,10 @@ module m_mpi_common !! average primitive variables, for a single computational domain boundary !! at the time, from the relevant neighboring processor. - $:DECLARE(create=["buff_send","buff_recv"]) + $:GPU_DECLARE(create=["buff_send","buff_recv"]) integer :: halo_size, nVars - $:DECLARE(create=["halo_size","nVars"]) + $:GPU_DECLARE(create=["halo_size","nVars"]) contains @@ -639,7 +639,7 @@ contains #ifdef MFC_MPI call nvtxStartRange("RHS-COMM-PACKBUF") - $:UPDATE(device=["v_size"]) + $:GPU_UPDATE(device=["v_size"]) #ifdef MFC_SIMULATION if (qbmm .and. .not. polytropic) then @@ -693,7 +693,7 @@ contains #:for mpi_dir in [1, 2, 3] if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 - $:PARALLEL_LOOP(collapse=4,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=4,private=["r"]) do l = 0, p do k = 0, n do j = 0, buff_size - 1 @@ -707,7 +707,7 @@ contains #ifdef MFC_SIMULATION if (qbmm .and. .not. polytropic) then - $:PARALLEL_LOOP(collapse=4,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=4,private=["r"]) do l = 0, p do k = 0, n do j = 0, buff_size - 1 @@ -722,7 +722,7 @@ contains end do end do - $:PARALLEL_LOOP(collapse=5,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=5,private=["r"]) do l = 0, p do k = 0, n do j = 0, buff_size - 1 @@ -739,7 +739,7 @@ contains end if #endif #:elif mpi_dir == 2 - $:PARALLEL_LOOP(collapse=4,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=4,private=["r"]) do i = 1, sys_size do l = 0, p do k = 0, buff_size - 1 @@ -755,7 +755,7 @@ contains #ifdef MFC_SIMULATION if (qbmm .and. .not. polytropic) then - $:PARALLEL_LOOP(collapse=5,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=5,private=["r"]) do i = sys_size + 1, sys_size + 4 do l = 0, p do k = 0, buff_size - 1 @@ -771,7 +771,7 @@ contains end do end do - $:PARALLEL_LOOP(collapse=5,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=5,private=["r"]) do i = sys_size + 1, sys_size + 4 do l = 0, p do k = 0, buff_size - 1 @@ -789,7 +789,7 @@ contains end if #endif #:else - $:PARALLEL_LOOP(collapse=4,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=4,private=["r"]) do i = 1, sys_size do l = 0, buff_size - 1 do k = -buff_size, n + buff_size @@ -805,7 +805,7 @@ contains #ifdef MFC_SIMULATION if (qbmm .and. .not. polytropic) then - $:PARALLEL_LOOP(collapse=5,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=5,private=["r"]) do i = sys_size + 1, sys_size + 4 do l = 0, buff_size - 1 do k = -buff_size, n + buff_size @@ -821,7 +821,7 @@ contains end do end do - $:PARALLEL_LOOP(collapse=5,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=5,private=["r"]) do i = sys_size + 1, sys_size + 4 do l = 0, buff_size - 1 do k = -buff_size, n + buff_size @@ -856,7 +856,7 @@ contains call nvtxStartRange("RHS-COMM-SENDRECV-RDMA") #:else call nvtxStartRange("RHS-COMM-DEV2HOST") - $:UPDATE(host=["buff_send"]) + $:GPU_UPDATE(host=["buff_send"]) call nvtxEndRange call nvtxStartRange("RHS-COMM-SENDRECV-NO-RMDA") #:endif @@ -874,7 +874,7 @@ contains !$acc wait #:else call nvtxStartRange("RHS-COMM-HOST2DEV") - $:UPDATE(device=["buff_recv"]) + $:GPU_UPDATE(device=["buff_recv"]) call nvtxEndRange #:endif end if @@ -891,7 +891,7 @@ contains #:for mpi_dir in [1, 2, 3] if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 - $:PARALLEL_LOOP(collapse=4,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=4,private=["r"]) do l = 0, p do k = 0, n do j = -buff_size, -1 @@ -912,7 +912,7 @@ contains #ifdef MFC_SIMULATION if (qbmm .and. .not. polytropic) then - $:PARALLEL_LOOP(collapse=5,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=5,private=["r"]) do l = 0, p do k = 0, n do j = -buff_size, -1 @@ -927,7 +927,7 @@ contains end do end do - $:PARALLEL_LOOP(collapse=5,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=5,private=["r"]) do l = 0, p do k = 0, n do j = -buff_size, -1 @@ -944,7 +944,7 @@ contains end if #endif #:elif mpi_dir == 2 - $:PARALLEL_LOOP(collapse=4,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=4,private=["r"]) do i = 1, sys_size do l = 0, p do k = -buff_size, -1 @@ -966,7 +966,7 @@ contains #ifdef MFC_SIMULATION if (qbmm .and. .not. polytropic) then - $:PARALLEL_LOOP(collapse=5,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=5,private=["r"]) do i = sys_size + 1, sys_size + 4 do l = 0, p do k = -buff_size, -1 @@ -982,7 +982,7 @@ contains end do end do - $:PARALLEL_LOOP(collapse=5,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=5,private=["r"]) do i = sys_size + 1, sys_size + 4 do l = 0, p do k = -buff_size, -1 @@ -1001,7 +1001,7 @@ contains #endif #:else ! Unpacking buffer from bc_z%beg - $:PARALLEL_LOOP(collapse=4,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=4,private=["r"]) do i = 1, sys_size do l = -buff_size, -1 do k = -buff_size, n + buff_size @@ -1024,7 +1024,7 @@ contains #ifdef MFC_SIMULATION if (qbmm .and. .not. polytropic) then - $:PARALLEL_LOOP(collapse=5,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=5,private=["r"]) do i = sys_size + 1, sys_size + 4 do l = -buff_size, -1 do k = -buff_size, n + buff_size @@ -1041,7 +1041,7 @@ contains end do end do - $:PARALLEL_LOOP(collapse=5,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=5,private=["r"]) do i = sys_size + 1, sys_size + 4 do l = -buff_size, -1 do k = -buff_size, n + buff_size @@ -1089,7 +1089,7 @@ contains #ifdef MFC_MPI nVars = num_dims + 1 - $:UPDATE(device=["nVars"]) + $:GPU_UPDATE(device=["nVars"]) buffer_counts = (/ & buff_size*nVars*(n + 1)*(p + 1), & @@ -1131,7 +1131,7 @@ contains #:for mpi_dir in [1, 2, 3] if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 - $:PARALLEL_LOOP(collapse=4,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=4,private=["r"]) do l = 0, p do k = 0, n do j = 0, buff_size - 1 @@ -1144,7 +1144,7 @@ contains end do #:elif mpi_dir == 2 - $:PARALLEL_LOOP(collapse=4,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=4,private=["r"]) do i = 1, nVars do l = 0, p do k = 0, buff_size - 1 @@ -1159,7 +1159,7 @@ contains end do #:else - $:PARALLEL_LOOP(collapse=4,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=4,private=["r"]) do i = 1, nVars do l = 0, buff_size - 1 do k = -buff_size, n + buff_size @@ -1187,7 +1187,7 @@ contains call nvtxStartRange("RHS-COMM-SENDRECV-RDMA") #:else call nvtxStartRange("RHS-COMM-DEV2HOST") - $:UPDATE(host=["buff_send"]) + $:GPU_UPDATE(host=["buff_send"]) call nvtxEndRange call nvtxStartRange("RHS-COMM-SENDRECV-NO-RMDA") #:endif @@ -1205,7 +1205,7 @@ contains !$acc wait #:else call nvtxStartRange("RHS-COMM-HOST2DEV") - $:UPDATE(device=["buff_recv"]) + $:GPU_UPDATE(device=["buff_recv"]) call nvtxEndRange #:endif end if @@ -1221,7 +1221,7 @@ contains #:for mpi_dir in [1, 2, 3] if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 - $:PARALLEL_LOOP(collapse=4,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=4,private=["r"]) do l = 0, p do k = 0, n do j = -buff_size, -1 @@ -1241,7 +1241,7 @@ contains end do #:elif mpi_dir == 2 - $:PARALLEL_LOOP(collapse=4,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=4,private=["r"]) do i = 1, nVars do l = 0, p do k = -buff_size, -1 @@ -1263,7 +1263,7 @@ contains #:else ! Unpacking buffer from bc_z%beg - $:PARALLEL_LOOP(collapse=4,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=4,private=["r"]) do i = 1, nVars do l = -buff_size, -1 do k = -buff_size, n + buff_size diff --git a/src/common/m_phase_change.fpp b/src/common/m_phase_change.fpp index 5bbb8c4e8c..15f11be298 100644 --- a/src/common/m_phase_change.fpp +++ b/src/common/m_phase_change.fpp @@ -41,7 +41,7 @@ module m_phase_change real(wp) :: A, B, C, D !> @} - $:DECLARE(create=["max_iter","pCr","TCr","mixM","lp","vp","A","B","C","D"]) + $:GPU_DECLARE(create=["max_iter","pCr","TCr","mixM","lp","vp","A","B","C","D"]) contains @@ -88,17 +88,17 @@ contains real(wp) :: rho, rM, m1, m2, MCT !< total density, total reacting mass, individual reacting masses real(wp) :: TvF !< total volume fraction - $:DECLARE(create=["pS","pSOV","pSSL","TS","TSOV","TSSL","TSatOV","TSatSL"]) - $:DECLARE(create=["rhoe","dynE","rhos","rho","rM","m1","m2","MCT","TvF"]) + $:GPU_DECLARE(create=["pS","pSOV","pSSL","TS","TSOV","TSSL","TSatOV","TSatSL"]) + $:GPU_DECLARE(create=["rhoe","dynE","rhos","rho","rM","m1","m2","MCT","TvF"]) real(wp), dimension(num_fluids) :: p_infOV, p_infpT, p_infSL, sk, hk, gk, ek, rhok - $:DECLARE(create=["p_infOV","p_infpT","p_infSL","sk","hk","gk","ek","rhok"]) + $:GPU_DECLARE(create=["p_infOV","p_infpT","p_infSL","sk","hk","gk","ek","rhok"]) !< Generic loop iterators integer :: i, j, k, l ! starting equilibrium solver - $:PARALLEL_LOOP(collapse=3, private=["p_infOV", "p_infpT", "p_infSL", & + $:GPU_PARALLEL_LOOP(collapse=3, private=["p_infOV", "p_infpT", "p_infSL", & "sk", "hk", "gk", "ek", "rhok", "pS", "pSOV", "pSSL", & "TS", "TSOV", "TSatOV", "TSatSL", "TSSL", "rhoe", & "dynE", "rhos", "rho", "rM", "m1", "m2", "MCT", "TvF"]) @@ -107,7 +107,7 @@ contains do l = 0, p rho = 0.0_wp; TvF = 0.0_wp - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids ! Mixture density @@ -133,7 +133,7 @@ contains ! kinetic energy as an auxiliary variable to the calculation of the total internal energy dynE = 0.0_wp - $:LOOP() + $:GPU_LOOP() do i = momxb, momxe dynE = dynE + 5.0e-1_wp*q_cons_vf(i)%sf(j, k, l)**2/rho @@ -255,7 +255,7 @@ contains ! calculating volume fractions, internal energies, and total entropy rhos = 0.0_wp - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids ! volume fractions @@ -290,7 +290,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_infinite_pt_relaxation_k #else - $:ROUTINE() + $:GPU_ROUTINE() #endif ! initializing variables @@ -307,7 +307,7 @@ contains ! auxiliary variables for the pT-equilibrium solver mCP = 0.0_wp; mQ = 0.0_wp; p_infpT = ps_inf; ! Performing tests before initializing the pT-equilibrium - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids ! sum of the total alpha*rho*cp of the system @@ -356,7 +356,7 @@ contains ! updating functions used in the Newton's solver gpp = 0.0_wp; gp = 0.0_wp; hp = 0.0_wp - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids gp = gp + (gs_min(i) - 1.0_wp)*q_cons_vf(i + contxb - 1)%sf(j, k, l)*cvs(i) & @@ -394,7 +394,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_infinite_ptg_relaxation_k #else - $:ROUTINE() + $:GPU_ROUTINE() #endif integer, intent(in) :: j, k, l @@ -450,7 +450,7 @@ contains mCP = 0.0_wp; mCPD = 0.0_wp; mCVGP = 0.0_wp; mCVGP2 = 0.0_wp; mQ = 0.0_wp; mQD = 0.0_wp ! Those must be updated through the iterations, as they either depend on ! the partial masses for all fluids, or on the equilibrium pressure - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids ! sum of the total alpha*rho*cp of the system @@ -519,7 +519,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_correct_partial_densities #else - $:ROUTINE() + $:GPU_ROUTINE() #endif !> @name variables for the correction of the reacting partial densities @@ -582,7 +582,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_jacobian_matrix #else - $:ROUTINE() + $:GPU_ROUTINE() #endif real(wp), dimension(2, 2), intent(out) :: InvJac @@ -689,7 +689,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_pTg_residue #else - $:ROUTINE() + $:GPU_ROUTINE() #endif integer, intent(in) :: j, k, l @@ -740,7 +740,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_TSat #else - $:ROUTINE() + $:GPU_ROUTINE() #endif real(wp), intent(in) :: pSat diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp index a23b5ff2ae..f7d5e9075a 100644 --- a/src/common/m_variables_conversion.fpp +++ b/src/common/m_variables_conversion.fpp @@ -51,16 +51,16 @@ module m_variables_conversion !! In simulation, gammas, pi_infs, and qvs are already declared in m_global_variables #ifndef MFC_SIMULATION real(wp), allocatable, public, dimension(:) :: gammas, gs_min, pi_infs, ps_inf, cvs, qvs, qvps - $:DECLARE(create=["gammas","gs_min","pi_infs","ps_inf","cvs","qvs","qvps"]) + $:GPU_DECLARE(create=["gammas","gs_min","pi_infs","ps_inf","cvs","qvs","qvps"]) #endif real(wp), allocatable, dimension(:) :: Gs integer, allocatable, dimension(:) :: bubrs real(wp), allocatable, dimension(:, :) :: Res - $:DECLARE(create=["bubrs","Gs","Res"]) + $:GPU_DECLARE(create=["bubrs","Gs","Res"]) integer :: is1b, is2b, is3b, is1e, is2e, is3e - $:DECLARE(create=["is1b","is2b","is3b","is1e","is2e","is3e"]) + $:GPU_DECLARE(create=["is1b","is2b","is3b","is1e","is2e","is3e"]) real(wp), allocatable, dimension(:, :, :), public :: rho_sf !< Scalar density function real(wp), allocatable, dimension(:, :, :), public :: gamma_sf !< Scalar sp. heat ratio function @@ -121,7 +121,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_pressure #else - $:ROUTINE() + $:GPU_ROUTINE() #endif real(wp), intent(in) :: energy, alf @@ -462,7 +462,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_convert_species_to_mixture_variables_acc #else - $:ROUTINE() + $:GPU_ROUTINE() #endif real(wp), intent(out) :: rho_K, gamma_K, pi_inf_K, qv_K @@ -543,7 +543,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_convert_species_to_mixture_variables_bubbles_acc #else - $:ROUTINE() + $:GPU_ROUTINE() #endif real(wp), intent(inout) :: rho_K, gamma_K, pi_inf_K, qv_K @@ -643,7 +643,7 @@ contains qvs(i) = fluid_pp(i)%qv qvps(i) = fluid_pp(i)%qvp end do - $:UPDATE(device=["gammas","gs_min","pi_infs","ps_inf","cvs","qvs","qvps","Gs"]) + $:GPU_UPDATE(device=["gammas","gs_min","pi_infs","ps_inf","cvs","qvs","qvps","Gs"]) #ifdef MFC_SIMULATION @@ -655,7 +655,7 @@ contains end do end do - $:UPDATE(device=["Res","Re_idx","Re_size"]) + $:GPU_UPDATE(device=["Res","Re_idx","Re_size"]) end if #endif @@ -669,7 +669,7 @@ contains do i = 1, nb bubrs(i) = bub_idx%rs(i) end do - $:UPDATE(device=["bubrs"]) + $:GPU_UPDATE(device=["bubrs"]) end if #ifdef MFC_POST_PROCESS @@ -749,7 +749,7 @@ contains nbub_sc = qK_cons_vf(bubxb)%sf(j, k, l) - $:LOOP() + $:GPU_LOOP() do i = 1, nb mu = qK_cons_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l)/nbub_sc sig = (qK_cons_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l)/nbub_sc - mu**2)**0.5_wp @@ -782,7 +782,7 @@ contains nbub_sc = qK_cons_vf(bubxb)%sf(j, k, l) - $:LOOP() + $:GPU_LOOP() do i = 1, nb mu = qK_cons_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l)/nbub_sc sig = (qK_cons_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l)/nbub_sc - mu**2)**0.5_wp @@ -870,7 +870,7 @@ contains end if #:endif - $:PARALLEL_LOOP(collapse=3, private=["alpha_K", "alpha_rho_K", "Re_K", & + $:GPU_PARALLEL_LOOP(collapse=3, private=["alpha_K", "alpha_rho_K", "Re_K", & "nRtmp", "rho_K", "gamma_K", "pi_inf_K", "qv_K", & "dyn_pres_K", "rhoYks", "B"]) do l = ibounds(3)%beg, ibounds(3)%end @@ -878,7 +878,7 @@ contains do j = ibounds(1)%beg, ibounds(1)%end dyn_pres_K = 0._wp - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids alpha_rho_K(i) = qK_cons_vf(i)%sf(j, k, l) alpha_K(i) = qK_cons_vf(advxb + i - 1)%sf(j, k, l) @@ -922,13 +922,13 @@ contains B2 = B(1)**2 + B(2)**2 + B(3)**2 m2 = 0._wp - $:LOOP() + $:GPU_LOOP() do i = momxb, momxe m2 = m2 + qK_cons_vf(i)%sf(j, k, l)**2 end do S = 0._wp - $:LOOP() + $:GPU_LOOP() do i = 1, 3 S = S + qK_cons_vf(momxb + i - 1)%sf(j, k, l)*B(i) end do @@ -936,14 +936,14 @@ contains E = qK_cons_vf(E_idx)%sf(j, k, l) D = 0._wp - $:LOOP() + $:GPU_LOOP() do i = 1, contxe D = D + qK_cons_vf(i)%sf(j, k, l) end do ! Newton-Raphson W = E + D - $:LOOP() + $:GPU_LOOP() do iter = 1, relativity_cons_to_prim_max_iter Ga = (W + B2)*W/sqrt((W + B2)**2*W**2 - (m2*W**2 + S**2*(2*W + B2))) pres = (W - D*Ga)/((gamma_K + 1)*Ga**2) ! Thermal pressure from EOS @@ -969,13 +969,13 @@ contains qK_prim_vf(E_idx)%sf(j, k, l) = (W - D*Ga)/((gamma_K + 1)*Ga**2) ! Recover the other primitive variables - $:LOOP() + $:GPU_LOOP() do i = 1, 3 qK_prim_vf(momxb + i - 1)%sf(j, k, l) = (qK_cons_vf(momxb + i - 1)%sf(j, k, l) + (S/W)*B(i))/(W + B2) end do qK_prim_vf(1)%sf(j, k, l) = D/Ga ! Hard-coded for single-component for now - $:LOOP() + $:GPU_LOOP() do i = B_idx%beg, B_idx%end qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) end do @@ -985,22 +985,22 @@ contains if (chemistry) then rho_K = 0._wp - $:LOOP() + $:GPU_LOOP() do i = chemxb, chemxe rho_K = rho_K + max(0._wp, qK_cons_vf(i)%sf(j, k, l)) end do - $:LOOP() + $:GPU_LOOP() do i = 1, contxe qK_prim_vf(i)%sf(j, k, l) = rho_K end do - $:LOOP() + $:GPU_LOOP() do i = chemxb, chemxe qK_prim_vf(i)%sf(j, k, l) = max(0._wp, qK_cons_vf(i)%sf(j, k, l)/rho_K) end do else - $:LOOP() + $:GPU_LOOP() do i = 1, contxe qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) end do @@ -1010,7 +1010,7 @@ contains rho_K = max(rho_K, sgm_eps) #endif - $:LOOP() + $:GPU_LOOP() do i = momxb, momxe if (model_eqns /= 4) then qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) & @@ -1024,7 +1024,7 @@ contains end do if (chemistry) then - $:LOOP() + $:GPU_LOOP() do i = 1, num_species rhoYks(i) = qK_cons_vf(chemxb + i - 1)%sf(j, k, l) end do @@ -1054,7 +1054,7 @@ contains end if if (bubbles_euler) then - $:LOOP() + $:GPU_LOOP() do i = 1, nb nRtmp(i) = qK_cons_vf(bubrs(i))%sf(j, k, l) end do @@ -1066,7 +1066,7 @@ contains nbub_sc = qK_cons_vf(bubxb)%sf(j, k, l) !Convert cons to prim - $:LOOP() + $:GPU_LOOP() do i = bubxb, bubxe qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/nbub_sc end do @@ -1083,7 +1083,7 @@ contains call s_comp_n_from_cons(vftmp, nRtmp, nbub_sc, weight) end if - $:LOOP() + $:GPU_LOOP() do i = bubxb, bubxe qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/nbub_sc end do @@ -1091,21 +1091,21 @@ contains end if if (mhd) then - $:LOOP() + $:GPU_LOOP() do i = B_idx%beg, B_idx%end qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) end do end if if (elasticity) then - $:LOOP() + $:GPU_LOOP() do i = strxb, strxe qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/rho_K end do end if if (hypoelasticity) then - $:LOOP() + $:GPU_LOOP() do i = strxb, strxe ! subtracting elastic contribution for pressure calculation if (G_K > verysmall) then @@ -1122,13 +1122,13 @@ contains end if if (hyperelasticity) then - $:LOOP() + $:GPU_LOOP() do i = xibeg, xiend qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/rho_K end do end if - $:LOOP() + $:GPU_LOOP() do i = advxb, advxe qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) end do @@ -1462,33 +1462,33 @@ contains is2b = is2%beg; is2e = is2%end is3b = is3%beg; is3e = is3%end - $:UPDATE(device=["is1b","is2b","is3b","is1e","is2e","is3e"]) + $:GPU_UPDATE(device=["is1b","is2b","is3b","is1e","is2e","is3e"]) ! Computing the flux variables from the primitive variables, without ! accounting for the contribution of either viscosity or capillarity #ifdef MFC_SIMULATION - $:PARALLEL_LOOP(collapse=3, private=["alpha_rho_K", "vel_K", & + $:GPU_PARALLEL_LOOP(collapse=3, private=["alpha_rho_K", "vel_K", & "alpha_K", "Re_K", "Y_K"]) do l = is3b, is3e do k = is2b, is2e do j = is1b, is1e - $:LOOP() + $:GPU_LOOP() do i = 1, contxe alpha_rho_K(i) = qK_prim_vf(j, k, l, i) end do - $:LOOP() + $:GPU_LOOP() do i = advxb, advxe alpha_K(i - E_idx) = qK_prim_vf(j, k, l, i) end do - $:LOOP() + $:GPU_LOOP() do i = 1, num_vels vel_K(i) = qK_prim_vf(j, k, l, contxe + i) end do vel_K_sum = 0._wp - $:LOOP() + $:GPU_LOOP() do i = 1, num_vels vel_K_sum = vel_K_sum + vel_K(i)**2._wp end do @@ -1509,7 +1509,7 @@ contains ! Computing the energy from the pressure if (chemistry) then - $:LOOP() + $:GPU_LOOP() do i = chemxb, chemxe Y_K(i - chemxb + 1) = qK_prim_vf(j, k, l, i) end do @@ -1526,12 +1526,12 @@ contains end if ! mass flux, this should be \alpha_i \rho_i u_i - $:LOOP() + $:GPU_LOOP() do i = 1, contxe FK_vf(j, k, l, i) = alpha_rho_K(i)*vel_K(dir_idx(1)) end do - $:LOOP() + $:GPU_LOOP() do i = 1, num_vels FK_vf(j, k, l, contxe + dir_idx(i)) = & rho_K*vel_K(dir_idx(1)) & @@ -1544,14 +1544,14 @@ contains ! Species advection Flux, \rho*u*Y if (chemistry) then - $:LOOP() + $:GPU_LOOP() do i = 1, num_species FK_vf(j, k, l, i - 1 + chemxb) = vel_K(dir_idx(1))*(rho_K*Y_K(i)) end do end if if (riemann_solver == 1 .or. riemann_solver == 4) then - $:LOOP() + $:GPU_LOOP() do i = advxb, advxe FK_vf(j, k, l, i) = 0._wp FK_src_vf(j, k, l, i) = alpha_K(i - E_idx) @@ -1559,12 +1559,12 @@ contains else ! Could be bubbles_euler! - $:LOOP() + $:GPU_LOOP() do i = advxb, advxe FK_vf(j, k, l, i) = vel_K(dir_idx(1))*alpha_K(i - E_idx) end do - $:LOOP() + $:GPU_LOOP() do i = advxb, advxe FK_src_vf(j, k, l, i) = vel_K(dir_idx(1)) end do @@ -1604,7 +1604,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_speed_of_sound #else - $:ROUTINE() + $:GPU_ROUTINE() #endif real(wp), intent(in) :: pres @@ -1638,7 +1638,7 @@ contains c = (1._wp/(rho*(adv(1)/blkmod1 + adv(2)/blkmod2))) elseif (model_eqns == 3) then c = 0._wp - $:LOOP() + $:GPU_LOOP() do q = 1, num_fluids c = c + adv(q)*(1._wp/gammas(q) + 1._wp)* & (pres + pi_infs(q)/(gammas(q) + 1._wp)) @@ -1674,7 +1674,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_fast_magnetosonic_speed #else - $:ROUTINE() + $:GPU_ROUTINE() #endif real(wp), intent(in) :: B(3), rho, c diff --git a/src/pre_process/m_assign_variables.fpp b/src/pre_process/m_assign_variables.fpp index 1adfd6ab1f..0f77df808d 100644 --- a/src/pre_process/m_assign_variables.fpp +++ b/src/pre_process/m_assign_variables.fpp @@ -104,7 +104,7 @@ contains !! @param patch_id_fp Array to track patch ids pure subroutine s_assign_patch_mixture_primitive_variables(patch_id, j, k, l, & eta, q_prim_vf, patch_id_fp) - $:ROUTINE() + $:GPU_ROUTINE() integer, intent(in) :: patch_id integer, intent(in) :: j, k, l @@ -277,7 +277,7 @@ contains !! @param patch_id_fp Array to track patch ids impure subroutine s_assign_patch_species_primitive_variables(patch_id, j, k, l, & eta, q_prim_vf, patch_id_fp) - $:ROUTINE() + $:GPU_ROUTINE() integer, intent(in) :: patch_id integer, intent(in) :: j, k, l diff --git a/src/pre_process/m_patches.fpp b/src/pre_process/m_patches.fpp index fe861c4c06..2dfefe6623 100644 --- a/src/pre_process/m_patches.fpp +++ b/src/pre_process/m_patches.fpp @@ -2382,7 +2382,7 @@ contains end subroutine s_model subroutine s_convert_cylindrical_to_cartesian_coord(cyl_y, cyl_z) - $:ROUTINE() + $:GPU_ROUTINE() real(wp), intent(in) :: cyl_y, cyl_z @@ -2393,7 +2393,7 @@ contains pure function f_convert_cyl_to_cart(cyl) result(cart) - $:ROUTINE() + $:GPU_ROUTINE() t_vec3, intent(in) :: cyl t_vec3 :: cart @@ -2405,7 +2405,7 @@ contains end function f_convert_cyl_to_cart subroutine s_convert_cylindrical_to_spherical_coord(cyl_x, cyl_y) - $:ROUTINE() + $:GPU_ROUTINE() real(wp), intent(IN) :: cyl_x, cyl_y @@ -2418,7 +2418,7 @@ contains !! @param offset Thickness !! @param a Starting position pure elemental function f_r(myth, offset, a) - $:ROUTINE() + $:GPU_ROUTINE() real(wp), intent(in) :: myth, offset, a real(wp) :: b real(wp) :: f_r diff --git a/src/simulation/include/inline_riemann.fpp b/src/simulation/include/inline_riemann.fpp index 81e311141a..b0ed99390b 100644 --- a/src/simulation/include/inline_riemann.fpp +++ b/src/simulation/include/inline_riemann.fpp @@ -1,7 +1,7 @@ #:def arithmetic_avg() rho_avg = 5e-1_wp*(rho_L + rho_R) vel_avg_rms = 0._wp - $:LOOP() + $:GPU_LOOP() do i = 1, num_vels vel_avg_rms = vel_avg_rms + (5e-1_wp*(vel_L(i) + vel_R(i)))**2._wp end do @@ -17,7 +17,7 @@ vel_avg_rms = 0._wp - $:LOOP() + $:GPU_LOOP() do i = 1, num_vels vel_avg_rms = vel_avg_rms + (sqrt(rho_L)*vel_L(i) + sqrt(rho_R)*vel_R(i))**2._wp/ & (sqrt(rho_L) + sqrt(rho_R))**2._wp diff --git a/src/simulation/m_acoustic_src.fpp b/src/simulation/m_acoustic_src.fpp index d7186d75a3..3e1dea3e5e 100644 --- a/src/simulation/m_acoustic_src.fpp +++ b/src/simulation/m_acoustic_src.fpp @@ -24,43 +24,43 @@ module m_acoustic_src private; public :: s_initialize_acoustic_src, s_precalculate_acoustic_spatial_sources, s_acoustic_src_calculations integer, allocatable, dimension(:) :: pulse, support - $:DECLARE(create=["pulse","support"]) + $:GPU_DECLARE(create=["pulse","support"]) logical, allocatable, dimension(:) :: dipole - $:DECLARE(create=["dipole"]) + $:GPU_DECLARE(create=["dipole"]) real(wp), allocatable, target, dimension(:, :) :: loc_acoustic - $:DECLARE(create=["loc_acoustic"]) + $:GPU_DECLARE(create=["loc_acoustic"]) real(wp), allocatable, dimension(:) :: mag, length, height, wavelength, frequency real(wp), allocatable, dimension(:) :: gauss_sigma_dist, gauss_sigma_time, npulse, dir, delay - $:DECLARE(create=["mag","length","height","wavelength","frequency"]) - $:DECLARE(create=["gauss_sigma_dist","gauss_sigma_time","npulse","dir","delay"]) + $:GPU_DECLARE(create=["mag","length","height","wavelength","frequency"]) + $:GPU_DECLARE(create=["gauss_sigma_dist","gauss_sigma_time","npulse","dir","delay"]) real(wp), allocatable, dimension(:) :: foc_length, aperture - $:DECLARE(create=["foc_length","aperture"]) + $:GPU_DECLARE(create=["foc_length","aperture"]) real(wp), allocatable, dimension(:) :: element_spacing_angle, element_polygon_ratio, rotate_angle - $:DECLARE(create=["element_spacing_angle","element_polygon_ratio","rotate_angle"]) + $:GPU_DECLARE(create=["element_spacing_angle","element_polygon_ratio","rotate_angle"]) real(wp), allocatable, dimension(:) :: bb_bandwidth, bb_lowest_freq - $:DECLARE(create=["bb_bandwidth","bb_lowest_freq"]) + $:GPU_DECLARE(create=["bb_bandwidth","bb_lowest_freq"]) integer, allocatable, dimension(:) :: num_elements, element_on, bb_num_freq - $:DECLARE(create=["num_elements","element_on","bb_num_freq"]) + $:GPU_DECLARE(create=["num_elements","element_on","bb_num_freq"]) !> @name Acoustic source terms !> @{ real(wp), allocatable, dimension(:, :, :) :: mass_src, e_src real(wp), allocatable, dimension(:, :, :, :) :: mom_src !> @} - $:DECLARE(create=["mass_src","e_src","mom_src"]) + $:GPU_DECLARE(create=["mass_src","e_src","mom_src"]) integer, dimension(:), allocatable :: source_spatials_num_points !< Number of non-zero source grid points for each source - $:DECLARE(create=["source_spatials_num_points"]) + $:GPU_DECLARE(create=["source_spatials_num_points"]) type(source_spatial_type), dimension(:), allocatable :: source_spatials !< Data of non-zero source grid points for each source - $:DECLARE(create=["source_spatials"]) + $:GPU_DECLARE(create=["source_spatials"]) contains @@ -111,7 +111,7 @@ contains delay(i) = acoustic(i)%delay end if end do - $:UPDATE(device=["loc_acoustic","mag","dipole","support","length", & + $:GPU_UPDATE(device=["loc_acoustic","mag","dipole","support","length", & & "height","wavelength","frequency","gauss_sigma_dist", & & "gauss_sigma_time","foc_length","aperture","npulse","pulse", & & "dir","delay","element_polygon_ratio","rotate_angle", & @@ -167,7 +167,7 @@ contains sim_time = t_step*dt - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -220,7 +220,7 @@ contains deallocate (phi_rn) - $:PARALLEL_LOOP(private=["myalpha","myalpha_rho"]) + $:GPU_PARALLEL_LOOP(private=["myalpha","myalpha_rho"]) do i = 1, num_points j = source_spatials(ai)%coord(1, i) k = source_spatials(ai)%coord(2, i) @@ -239,7 +239,7 @@ contains if (bubbles_euler) then if (num_fluids > 2) then - $:LOOP() + $:GPU_LOOP() do q = 1, num_fluids - 1 myRho = myRho + myalpha_rho(q) B_tait = B_tait + myalpha(q)*pi_infs(q) @@ -253,7 +253,7 @@ contains end if if ((.not. bubbles_euler) .or. (mpp_lim .and. (num_fluids > 2))) then - $:LOOP() + $:GPU_LOOP() do q = 1, num_fluids myRho = myRho + myalpha_rho(q) B_tait = B_tait + myalpha(q)*pi_infs(q) @@ -320,15 +320,15 @@ contains end do ! Update the rhs variables - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m - $:LOOP() + $:GPU_LOOP() do q = contxb, contxe rhs_vf(q)%sf(j, k, l) = rhs_vf(q)%sf(j, k, l) + mass_src(j, k, l) end do - $:LOOP() + $:GPU_LOOP() do q = momxb, momxe rhs_vf(q)%sf(j, k, l) = rhs_vf(q)%sf(j, k, l) + mom_src(q - contxe, j, k, l) end do @@ -347,7 +347,7 @@ contains !! @param gauss_sigma_time_local sigma in time for Gaussian pulse !! @param source Source term amplitude pure elemental subroutine s_source_temporal(sim_time, c, ai, term_index, frequency_local, gauss_sigma_time_local, source, sum_BB) - $:ROUTINE() + $:GPU_ROUTINE() integer, intent(in) :: ai, term_index real(wp), intent(in) :: sim_time, c, sum_BB real(wp), intent(in) :: frequency_local, gauss_sigma_time_local @@ -474,14 +474,14 @@ contains call s_mpi_abort('Fatal Error: Inconsistent allocation of source_spatials') end if - $:UPDATE(device=["source_spatials(ai)%coord"]) - $:UPDATE(device=["source_spatials(ai)%val"]) + $:GPU_UPDATE(device=["source_spatials(ai)%coord"]) + $:GPU_UPDATE(device=["source_spatials(ai)%val"]) if (support(ai) >= 5) then if (dim == 2) then - $:UPDATE(device=["source_spatials(ai)%angle"]) + $:GPU_UPDATE(device=["source_spatials(ai)%angle"]) end if if (dim == 3) then - $:UPDATE(device=["source_spatials(ai)%xyz_to_r_ratios"]) + $:GPU_UPDATE(device=["source_spatials(ai)%xyz_to_r_ratios"]) end if end if @@ -699,7 +699,7 @@ contains !! @param c Speed of sound !! @return frequency_local Converted frequency pure elemental function f_frequency_local(freq_conv_flag, ai, c) - $:ROUTINE() + $:GPU_ROUTINE() logical, intent(in) :: freq_conv_flag integer, intent(in) :: ai real(wp), intent(in) :: c @@ -718,7 +718,7 @@ contains !! @param ai Acoustic source index !! @return gauss_sigma_time_local Converted Gaussian sigma time pure elemental function f_gauss_sigma_time_local(gauss_conv_flag, ai, c) - $:ROUTINE() + $:GPU_ROUTINE() logical, intent(in) :: gauss_conv_flag integer, intent(in) :: ai real(wp), intent(in) :: c diff --git a/src/simulation/m_body_forces.fpp b/src/simulation/m_body_forces.fpp index b0d7cda444..d761141a0d 100644 --- a/src/simulation/m_body_forces.fpp +++ b/src/simulation/m_body_forces.fpp @@ -23,7 +23,7 @@ module m_body_forces s_finalize_body_forces_module real(wp), allocatable, dimension(:, :, :) :: rhoM - $:DECLARE(create=["rhoM"]) + $:GPU_DECLARE(create=["rhoM"]) contains @@ -68,7 +68,7 @@ contains end if end if - $:UPDATE(device=["accel_bf"]) + $:GPU_UPDATE(device=["accel_bf"]) end subroutine s_compute_acceleration @@ -80,7 +80,7 @@ contains type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf integer :: i, j, k, l !< standard iterators - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -110,7 +110,7 @@ contains call s_compute_acceleration(mytime) call s_compute_mixture_density(q_cons_vf) - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = momxb, E_idx do l = 0, p do k = 0, n @@ -123,7 +123,7 @@ contains if (bf_x) then ! x-direction body forces - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -138,7 +138,7 @@ contains if (bf_y) then ! y-direction body forces - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -153,7 +153,7 @@ contains if (bf_z) then ! z-direction body forces - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m diff --git a/src/simulation/m_boundary_conditions.fpp b/src/simulation/m_boundary_conditions.fpp index ad50b559ac..f7408c43de 100644 --- a/src/simulation/m_boundary_conditions.fpp +++ b/src/simulation/m_boundary_conditions.fpp @@ -40,7 +40,7 @@ contains do dir = 1, num_dims do loc = -1, 1, 2 read (1) bc_type(dir, loc)%sf - $:UPDATE(device=["bc_type(dir, loc)%sf"]) + $:GPU_UPDATE(device=["bc_type(dir, loc)%sf"]) end do end do close (1) @@ -56,7 +56,7 @@ contains do dir = 1, num_dims do loc = -1, 1, 2 read (1) bc_buffers(dir, loc)%sf - $:UPDATE(device=["bc_buffers(dir, loc)%sf"]) + $:GPU_UPDATE(device=["bc_buffers(dir, loc)%sf"]) end do end do close (1) @@ -104,7 +104,7 @@ contains call MPI_File_set_view(file_id, int(offset, KIND=MPI_ADDRESS_KIND), MPI_INTEGER, MPI_BC_TYPE_TYPE(dir, loc), 'native', MPI_INFO_NULL, ierr) call MPI_File_read_all(file_id, bc_type(dir, loc)%sf, 1, MPI_BC_TYPE_TYPE(dir, loc), MPI_STATUS_IGNORE, ierr) offset = offset + sizeof(bc_type(dir, loc)%sf) - $:UPDATE(device=["bc_type(dir, loc)%sf"]) + $:GPU_UPDATE(device=["bc_type(dir, loc)%sf"]) end do end do @@ -114,7 +114,7 @@ contains call MPI_File_set_view(file_id, int(offset, KIND=MPI_ADDRESS_KIND), mpi_p, MPI_BC_BUFFER_TYPE(dir, loc), 'native', MPI_INFO_NULL, ierr) call MPI_File_read_all(file_id, bc_buffers(dir, loc)%sf, 1, MPI_BC_BUFFER_TYPE(dir, loc), MPI_STATUS_IGNORE, ierr) offset = offset + sizeof(bc_buffers(dir, loc)%sf) - $:UPDATE(device=["bc_buffers(dir, loc)%sf"]) + $:GPU_UPDATE(device=["bc_buffers(dir, loc)%sf"]) end do end do diff --git a/src/simulation/m_bubbles.fpp b/src/simulation/m_bubbles.fpp index 54fc7fcc47..3d9341ef0a 100644 --- a/src/simulation/m_bubbles.fpp +++ b/src/simulation/m_bubbles.fpp @@ -21,7 +21,7 @@ module m_bubbles real(wp) :: chi_vw !< Bubble wall properties (Ando 2010) real(wp) :: k_mw !< Bubble wall properties (Ando 2010) real(wp) :: rho_mw !< Bubble wall properties (Ando 2010) - $:DECLARE(create=["chi_vw","k_mw","rho_mw"]) + $:GPU_DECLARE(create=["chi_vw","k_mw","rho_mw"]) contains @@ -40,7 +40,7 @@ contains !! @param f_divu Divergence of velocity !! @param fCson Speed of sound from fP (EL) pure elemental function f_rddot(fRho, fP, fR, fV, fR0, fpb, fpbdot, alf, fntait, fBtait, f_bub_adv_src, f_divu, fCson) - $:ROUTINE() + $:GPU_ROUTINE() real(wp), intent(in) :: fRho, fP, fR, fV, fR0, fpb, fpbdot, alf real(wp), intent(in) :: fntait, fBtait, f_bub_adv_src, f_divu real(wp), intent(in) :: fCson @@ -70,7 +70,7 @@ contains else if (bubble_model == 3) then ! Rayleigh-Plesset bubbles fCpbw = f_cpbw_KM(fR0, fR, fV, fpb) - f_rddot = f_rddot_RP(fP, fRho, fR, fV, fCpbw) + f_rddot = f_rddot_RP(fP, fRho, fR, fV, fR0, fCpbw) end if end function f_rddot @@ -81,7 +81,7 @@ contains !! @param fV Current bubble velocity !! @param fpb Internal bubble pressure pure elemental function f_cpbw(fR0, fR, fV, fpb) - $:ROUTINE() + $:GPU_ROUTINE() real(wp), intent(in) :: fR0, fR, fV, fpb real(wp) :: f_cpbw @@ -100,7 +100,7 @@ contains !! @param fntait Tait EOS parameter !! @param fBtait Tait EOS parameter pure elemental function f_H(fCpbw, fCpinf, fntait, fBtait) - $:ROUTINE() + $:GPU_ROUTINE() real(wp), intent(in) :: fCpbw, fCpinf, fntait, fBtait real(wp) :: tmp1, tmp2, tmp3 @@ -120,7 +120,7 @@ contains !! @param fBtait Tait EOS parameter !! @param fH Bubble enthalpy pure elemental function f_cgas(fCpinf, fntait, fBtait, fH) - $:ROUTINE() + $:GPU_ROUTINE() real(wp), intent(in) :: fCpinf, fntait, fBtait, fH real(wp) :: tmp @@ -143,7 +143,7 @@ contains !! @param advsrc Advection equation source term !! @param divu Divergence of velocity pure elemental function f_cpinfdot(fRho, fP, falf, fntait, fBtait, advsrc, divu) - $:ROUTINE() + $:GPU_ROUTINE() real(wp), intent(in) :: fRho, fP, falf, fntait, fBtait, advsrc, divu real(wp) :: c2_liquid @@ -173,7 +173,7 @@ contains !! @param fR0 Equilibrium bubble radius !! @param fpbdot Time derivative of the internal bubble pressure pure elemental function f_Hdot(fCpbw, fCpinf, fCpinf_dot, fntait, fBtait, fR, fV, fR0, fpbdot) - $:ROUTINE() + $:GPU_ROUTINE() real(wp), intent(in) :: fCpbw, fCpinf, fCpinf_dot, fntait, fBtait real(wp), intent(in) :: fR, fV, fR0, fpbdot @@ -209,7 +209,7 @@ contains !! @param fR0 Equilibrium bubble radius !! @param fCpbw Boundary wall pressure pure elemental function f_rddot_RP(fCp, fRho, fR, fV, fR0, fCpbw) - $:ROUTINE() + $:GPU_ROUTINE() real(wp), intent(in) :: fCp, fRho, fR, fV, fR0, fCpbw real(wp) :: f_rddot_RP @@ -232,7 +232,7 @@ contains !! @param fntait Tait EOS parameter !! @param fBtait Tait EOS parameter pure elemental function f_rddot_G(fCpbw, fR, fV, fH, fHdot, fcgas, fntait, fBtait) - $:ROUTINE() + $:GPU_ROUTINE() real(wp), intent(in) :: fCpbw, fR, fV, fH, fHdot real(wp), intent(in) :: fcgas, fntait, fBtait @@ -255,7 +255,7 @@ contains !! @param fV Current bubble velocity !! @param fpb Internal bubble pressure pure elemental function f_cpbw_KM(fR0, fR, fV, fpb) - $:ROUTINE() + $:GPU_ROUTINE() real(wp), intent(in) :: fR0, fR, fV, fpb real(wp) :: f_cpbw_KM @@ -282,7 +282,7 @@ contains !! @param fR0 Equilibrium bubble radius !! @param fC Current sound speed pure elemental function f_rddot_KM(fpbdot, fCp, fCpbw, fRho, fR, fV, fR0, fC) - $:ROUTINE() + $:GPU_ROUTINE() real(wp), intent(in) :: fpbdot, fCp, fCpbw real(wp), intent(in) :: fRho, fR, fV, fR0, fC @@ -316,7 +316,7 @@ contains !! @param pb Internal bubble pressure !! @param iR0 Current bubble size index pure elemental subroutine s_bwproperty(pb, iR0, chi_vw, k_mw, rho_mw) - $:ROUTINE() + $:GPU_ROUTINE() real(wp), intent(in) :: pb integer, intent(in) :: iR0 real(wp), intent(out) :: chi_vw !< Bubble wall properties (Ando 2010) @@ -346,7 +346,7 @@ contains !! @param fR_m Mixture gas constant (EL) !! @param fgamma_m Mixture gamma (EL) pure elemental subroutine s_vflux(fR, fV, fpb, fmass_v, iR0, vflux, fmass_n, fbeta_c, fR_m, fgamma_m) - $:ROUTINE() + $:GPU_ROUTINE() real(wp), intent(in) :: fR real(wp), intent(in) :: fV real(wp), intent(in) :: fpb @@ -404,7 +404,7 @@ contains !! @param fR_m Mixture gas constant (EL) !! @param fgamma_m Mixture gamma (EL) pure elemental function f_bpres_dot(fvflux, fR, fV, fpb, fmass_v, iR0, fbeta_t, fR_m, fgamma_m) - $:ROUTINE() + $:GPU_ROUTINE() real(wp), intent(in) :: fvflux real(wp), intent(in) :: fR real(wp), intent(in) :: fV @@ -467,7 +467,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_advance_step #else - $:ROUTINE() + $:GPU_ROUTINE() #endif real(wp), intent(inout) :: fR, fV, fpb, fmass_v real(wp), intent(in) :: fRho, fP, fR0, fpbdot, alf @@ -601,7 +601,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_initial_substep_h #else - $:ROUTINE() + $:GPU_ROUTINE() #endif real(wp), intent(IN) :: fRho, fP, fR, fV, fR0, fpb, fpbdot, alf real(wp), intent(IN) :: fntait, fBtait, f_bub_adv_src, f_divu @@ -687,7 +687,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_advance_substep #else - $:ROUTINE() + $:GPU_ROUTINE() #endif real(wp), intent(OUT) :: err real(wp), intent(IN) :: fRho, fP, fR, fV, fR0, fpb, fpbdot, alf @@ -785,7 +785,7 @@ contains !! @param fdMvdt_tmp Rate of change of the mass of vapor in the bubble pure elemental subroutine s_advance_EL(fR_tmp, fV_tmp, fPb_tmp, fMv_tmp, bub_id, & fmass_n, fbeta_c, fbeta_t, fdPbdt_tmp, advance_EL) - $:ROUTINE() + $:GPU_ROUTINE() real(wp), intent(IN) :: fR_tmp, fV_tmp, fPb_tmp, fMv_tmp real(wp), intent(IN) :: fmass_n, fbeta_c, fbeta_t integer, intent(IN) :: bub_id diff --git a/src/simulation/m_bubbles_EE.fpp b/src/simulation/m_bubbles_EE.fpp index d6c2b0212c..c522af5e27 100644 --- a/src/simulation/m_bubbles_EE.fpp +++ b/src/simulation/m_bubbles_EE.fpp @@ -22,13 +22,13 @@ module m_bubbles_EE real(wp), allocatable, dimension(:, :, :) :: bub_adv_src real(wp), allocatable, dimension(:, :, :, :) :: bub_r_src, bub_v_src, bub_p_src, bub_m_src - $:DECLARE(create=["bub_adv_src","bub_r_src","bub_v_src","bub_p_src","bub_m_src"]) + $:GPU_DECLARE(create=["bub_adv_src","bub_r_src","bub_v_src","bub_p_src","bub_m_src"]) type(scalar_field) :: divu !< matrix for div(u) - $:DECLARE(create=["divu"]) + $:GPU_DECLARE(create=["divu"]) integer, allocatable, dimension(:) :: rs, vs, ms, ps - $:DECLARE(create=["rs","vs","ms","ps"]) + $:GPU_DECLARE(create=["rs","vs","ms","ps"]) contains @@ -52,9 +52,9 @@ contains end if end do - $:UPDATE(device=["rs", "vs"]) + $:GPU_UPDATE(device=["rs", "vs"]) if (.not. polytropic) then - $:UPDATE(device=["ps", "ms"]) + $:GPU_UPDATE(device=["ps", "ms"]) end if @:ALLOCATE(divu%sf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, idwbuff(3)%beg:idwbuff(3)%end)) @@ -77,12 +77,12 @@ contains real(wp) :: nR3bar integer(wp) :: i, j, k, l - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m nR3bar = 0._wp - $:LOOP() + $:GPU_LOOP() do i = 1, nb nR3bar = nR3bar + weight(i)*(q_cons_vf(rs(i))%sf(j, k, l))**3._wp end do @@ -104,7 +104,7 @@ contains if (idir == 1) then if (.not. qbmm) then - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -120,7 +120,7 @@ contains elseif (idir == 2) then - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -134,7 +134,7 @@ contains elseif (idir == 3) then - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -173,13 +173,13 @@ contains integer :: dmBub_id !< Dummy variables for unified subgrid bubble subroutines real(wp) :: dmMass_v, dmMass_n, dmBeta_c, dmBeta_t, dmCson - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m bub_adv_src(j, k, l) = 0._wp - $:LOOP() + $:GPU_LOOP() do q = 1, nb bub_r_src(j, k, l, q) = 0._wp bub_v_src(j, k, l, q) = 0._wp @@ -191,7 +191,7 @@ contains end do adap_dt_stop_max = 0 - $:PARALLEL_LOOP(collapse=3, private=["Rtmp", "Vtmp", "myalpha_rho", "myalpha"], & + $:GPU_PARALLEL_LOOP(collapse=3, private=["Rtmp", "Vtmp", "myalpha_rho", "myalpha"], & reduction=["adap_dt_stop_max"], reductionOp="MAX", & copy=["adap_dt_stop_max"]) do l = 0, p @@ -201,7 +201,7 @@ contains if (adv_n) then nbub = q_prim_vf(n_idx)%sf(j, k, l) else - $:LOOP() + $:GPU_LOOP() do q = 1, nb Rtmp(q) = q_prim_vf(rs(q))%sf(j, k, l) Vtmp(q) = q_prim_vf(vs(q))%sf(j, k, l) @@ -209,7 +209,7 @@ contains R3 = 0._wp - $:LOOP() + $:GPU_LOOP() do q = 1, nb R3 = R3 + weight(q)*Rtmp(q)**3._wp end do @@ -220,7 +220,7 @@ contains if (.not. adap_dt) then R2Vav = 0._wp - $:LOOP() + $:GPU_LOOP() do q = 1, nb R2Vav = R2Vav + weight(q)*Rtmp(q)**2._wp*Vtmp(q) end do @@ -228,10 +228,10 @@ contains bub_adv_src(j, k, l) = 4._wp*pi*nbub*R2Vav end if - $:LOOP() + $:GPU_LOOP() do q = 1, nb - $:LOOP() + $:GPU_LOOP() do ii = 1, num_fluids myalpha_rho(ii) = q_cons_vf(ii)%sf(j, k, l) myalpha(ii) = q_cons_vf(advxb + ii - 1)%sf(j, k, l) @@ -242,14 +242,14 @@ contains B_tait = 0._wp if (mpp_lim .and. (num_fluids > 2)) then - $:LOOP() + $:GPU_LOOP() do ii = 1, num_fluids myRho = myRho + myalpha_rho(ii) n_tait = n_tait + myalpha(ii)*gammas(ii) B_tait = B_tait + myalpha(ii)*pi_infs(ii) end do else if (num_fluids > 2) then - $:LOOP() + $:GPU_LOOP() do ii = 1, num_fluids - 1 myRho = myRho + myalpha_rho(ii) n_tait = n_tait + myalpha(ii)*gammas(ii) @@ -325,14 +325,14 @@ contains if (adap_dt .and. adap_dt_stop_max > 0) call s_mpi_abort("Adaptive time stepping failed to converge.") if (.not. adap_dt) then - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do q = 0, n do i = 0, m rhs_vf(alf_idx)%sf(i, q, l) = rhs_vf(alf_idx)%sf(i, q, l) + bub_adv_src(i, q, l) if (num_fluids > 1) rhs_vf(advxb)%sf(i, q, l) = & rhs_vf(advxb)%sf(i, q, l) - bub_adv_src(i, q, l) - $:LOOP() + $:GPU_LOOP() do k = 1, nb rhs_vf(rs(k))%sf(i, q, l) = rhs_vf(rs(k))%sf(i, q, l) + bub_r_src(i, q, l, k) rhs_vf(vs(k))%sf(i, q, l) = rhs_vf(vs(k))%sf(i, q, l) + bub_v_src(i, q, l, k) diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index c455426180..3240a9d9a8 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -33,26 +33,26 @@ module m_bubbles_EL real(wp), allocatable, dimension(:) :: bub_R0 !< Initial bubble radius real(wp), allocatable, dimension(:) :: Rmax_stats !< Maximum radius real(wp), allocatable, dimension(:) :: Rmin_stats !< Minimum radius - $:DECLARE(create=["lag_id", "bub_R0", "Rmax_stats", "Rmin_stats"]) + $:GPU_DECLARE(create=["lag_id", "bub_R0", "Rmax_stats", "Rmin_stats"]) real(wp), allocatable, dimension(:) :: gas_mg !< Bubble's gas mass real(wp), allocatable, dimension(:) :: gas_betaT !< heatflux model (Preston et al., 2007) real(wp), allocatable, dimension(:) :: gas_betaC !< massflux model (Preston et al., 2007) real(wp), allocatable, dimension(:) :: bub_dphidt !< subgrid velocity potential (Maeda & Colonius, 2018) - $:DECLARE(create=["gas_mg", "gas_betaT", "gas_betaC", "bub_dphidt"]) + $:GPU_DECLARE(create=["gas_mg", "gas_betaT", "gas_betaC", "bub_dphidt"]) !(nBub, 1 -> actual val or 2 -> temp val) real(wp), allocatable, dimension(:, :) :: gas_p !< Pressure in the bubble real(wp), allocatable, dimension(:, :) :: gas_mv !< Vapor mass in the bubble real(wp), allocatable, dimension(:, :) :: intfc_rad !< Bubble radius real(wp), allocatable, dimension(:, :) :: intfc_vel !< Velocity of the bubble interface - $:DECLARE(create=["gas_p", "gas_mv", "intfc_rad", "intfc_vel"]) + $:GPU_DECLARE(create=["gas_p", "gas_mv", "intfc_rad", "intfc_vel"]) !(nBub, 1-> x or 2->y or 3 ->z, 1 -> actual or 2 -> temporal val) real(wp), allocatable, dimension(:, :, :) :: mtn_pos !< Bubble's position real(wp), allocatable, dimension(:, :, :) :: mtn_posPrev !< Bubble's previous position real(wp), allocatable, dimension(:, :, :) :: mtn_vel !< Bubble's velocity real(wp), allocatable, dimension(:, :, :) :: mtn_s !< Bubble's computational cell position in real format - $:DECLARE(create=["mtn_pos", "mtn_posPrev", "mtn_vel", "mtn_s"]) + $:GPU_DECLARE(create=["mtn_pos", "mtn_posPrev", "mtn_vel", "mtn_s"]) !(nBub, 1-> x or 2->y or 3 ->z, time-stage) real(wp), allocatable, dimension(:, :) :: intfc_draddt !< Time derivative of bubble's radius real(wp), allocatable, dimension(:, :) :: intfc_dveldt !< Time derivative of bubble's interface velocity @@ -60,18 +60,18 @@ module m_bubbles_EL real(wp), allocatable, dimension(:, :) :: gas_dmvdt !< Time derivative of the vapor mass in the bubble real(wp), allocatable, dimension(:, :, :) :: mtn_dposdt !< Time derivative of the bubble's position real(wp), allocatable, dimension(:, :, :) :: mtn_dveldt !< Time derivative of the bubble's velocity - $:DECLARE(create=["intfc_draddt", "intfc_dveldt", "gas_dpdt", "gas_dmvdt", "mtn_dposdt", "mtn_dveldt"]) + $:GPU_DECLARE(create=["intfc_draddt", "intfc_dveldt", "gas_dpdt", "gas_dmvdt", "mtn_dposdt", "mtn_dveldt"]) integer, private :: lag_num_ts !< Number of time stages in the time-stepping scheme - $:DECLARE(create=["lag_num_ts"]) + $:GPU_DECLARE(create=["lag_num_ts"]) integer :: nBubs !< Number of bubbles in the local domain real(wp) :: Rmax_glb, Rmin_glb !< Maximum and minimum bubbe size in the local domain type(vector_field) :: q_beta !< Projection of the lagrangian particles in the Eulerian framework integer :: q_beta_idx !< Size of the q_beta vector field - $:DECLARE(create=["nBubs","Rmax_glb","Rmin_glb","q_beta","q_beta_idx"]) + $:GPU_DECLARE(create=["nBubs","Rmax_glb","Rmin_glb","q_beta","q_beta_idx"]) contains @@ -101,7 +101,7 @@ contains call s_mpi_abort('Please check the lag_params%solver_approach input') end if - $:UPDATE(device=["lag_num_ts", "q_beta_idx"]) + $:GPU_UPDATE(device=["lag_num_ts", "q_beta_idx"]) @:ALLOCATE(q_beta%vf(1:q_beta_idx)) @@ -249,9 +249,9 @@ contains print *, " Lagrange bubbles running, in proc", proc_rank, "number:", bub_id, "/", id - $:UPDATE(device=["bubbles_lagrange", "lag_params"]) + $:GPU_UPDATE(device=["bubbles_lagrange", "lag_params"]) - $:UPDATE(device=["lag_id","bub_R0","Rmax_stats","Rmin_stats","gas_mg", & + $:GPU_UPDATE(device=["lag_id","bub_R0","Rmax_stats","Rmin_stats","gas_mg", & & "gas_betaT","gas_betaC","bub_dphidt","gas_p","gas_mv", & & "intfc_rad","intfc_vel","mtn_pos","mtn_posPrev","mtn_vel", & & "mtn_s","intfc_draddt","intfc_dveldt","gas_dpdt","gas_dmvdt", & @@ -259,9 +259,9 @@ contains Rmax_glb = min(dflt_real, -dflt_real) Rmin_glb = max(dflt_real, -dflt_real) - $:UPDATE(device=["Rmax_glb", "Rmin_glb"]) + $:GPU_UPDATE(device=["Rmax_glb", "Rmin_glb"]) - $:UPDATE(device=["dx","dy","dz","x_cb","x_cc","y_cb","y_cc","z_cb","z_cc"]) + $:GPU_UPDATE(device=["dx","dy","dz","x_cb","x_cc","y_cb","y_cc","z_cb","z_cc"]) !Populate temporal variables call s_transfer_data_to_tmp() @@ -528,7 +528,7 @@ contains ! Subgrid p_inf model based on Maeda and Colonius (2018). if (lag_params%pressure_corrector) then ! Calculate velocity potentials (valid for one bubble per cell) - $:PARALLEL_LOOP(private=["k","cell"]) + $:GPU_PARALLEL_LOOP(private=["k","cell"]) do k = 1, nBubs call s_get_pinf(k, q_prim_vf, 2, paux, cell, preterm1, term2, Romega) myR0 = bub_R0(k) @@ -549,7 +549,7 @@ contains ! Radial motion model adap_dt_stop_max = 0 - $:PARALLEL_LOOP(private=["k","myalpha_rho","myalpha","Re","cell"], & + $:GPU_PARALLEL_LOOP(private=["k","myalpha_rho","myalpha","Re","cell"], & & reduction=["adap_dt_stop_max"],reductionOp="MAX", & & copy=["adap_dt_stop_max"],copyin=["stage"]) do k = 1, nBubs @@ -574,7 +574,7 @@ contains call s_get_pinf(k, q_prim_vf, 1, myPinf, cell, aux1, aux2) ! Obtain liquid density and computing speed of sound from pinf - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids myalpha_rho(i) = q_prim_vf(i)%sf(cell(1), cell(2), cell(3)) myalpha(i) = q_prim_vf(E_idx + i)%sf(cell(1), cell(2), cell(3)) @@ -619,7 +619,7 @@ contains if (adap_dt .and. adap_dt_stop_max > 0) call s_mpi_abort("Adaptive time stepping failed to converge.") ! Bubbles remain in a fixed position - $:PARALLEL_LOOP(collapse=2, private=["k"], copyin=["stage"]) + $:GPU_PARALLEL_LOOP(collapse=2, private=["k"], copyin=["stage"]) do k = 1, nBubs do l = 1, 3 mtn_dposdt(k, l, stage) = 0._wp @@ -649,7 +649,7 @@ contains if (lag_params%solver_approach == 2) then if (p == 0) then - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do k = 0, p do j = 0, n do i = 0, m @@ -665,7 +665,7 @@ contains end do end do else - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do k = 0, p do j = 0, n do i = 0, m @@ -685,7 +685,7 @@ contains call s_gradient_dir(q_prim_vf(E_idx), q_beta%vf(3), l) - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do k = 0, p do j = 0, n do i = 0, m @@ -700,7 +700,7 @@ contains end do !source in energy - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do k = idwbuff(3)%beg, idwbuff(3)%end do j = idwbuff(2)%beg, idwbuff(2)%end do i = idwbuff(1)%beg, idwbuff(1)%end @@ -711,7 +711,7 @@ contains call s_gradient_dir(q_beta%vf(3), q_beta%vf(4), l) - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do k = 0, p do j = 0, n do i = 0, m @@ -742,7 +742,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_cson_from_pinf #else - $:ROUTINE() + $:GPU_ROUTINE() #endif type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf real(wp), intent(in) :: pinf, rhol, gamma, pi_inf @@ -753,7 +753,7 @@ contains real(wp), dimension(num_dims) :: vel integer :: i - $:LOOP() + $:GPU_LOOP() do i = 1, num_dims vel(i) = q_prim_vf(i + contxe)%sf(cell(1), cell(2), cell(3)) end do @@ -770,7 +770,7 @@ contains call nvtxStartRange("BUBBLES-LAGRANGE-KERNELS") - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = 1, q_beta_idx do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end @@ -785,7 +785,7 @@ contains mtn_s, mtn_pos, q_beta) !Store 1-beta - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end do j = idwbuff(1)%beg, idwbuff(1)%end @@ -812,7 +812,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_get_pinf #else - $:ROUTINE() + $:GPU_ROUTINE() #endif integer, intent(in) :: bub_id, ptype type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf @@ -834,7 +834,7 @@ contains !< Find current bubble cell cell(:) = int(scoord(:)) - $:LOOP() + $:GPU_LOOP() do i = 1, num_dims if (scoord(i) < 0._wp) cell(i) = cell(i) - 1 end do @@ -925,11 +925,11 @@ contains charpres2 = 0._wp vol = 0._wp - $:LOOP() + $:GPU_LOOP() do i = 1, smearGrid - $:LOOP() + $:GPU_LOOP() do j = 1, smearGrid - $:LOOP() + $:GPU_LOOP() do k = 1, smearGridz cellaux(1) = cell(1) + i - (mapCells + 1) cellaux(2) = cell(2) + j - (mapCells + 1) @@ -1028,7 +1028,7 @@ contains integer :: k if (time_stepper == 1) then ! 1st order TVD RK - $:PARALLEL_LOOP(private=["k"]) + $:GPU_PARALLEL_LOOP(private=["k"]) do k = 1, nBubs !u{1} = u{n} + dt * RHS{n} intfc_rad(k, 1) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) @@ -1044,13 +1044,13 @@ contains if (lag_params%write_bubbles_stats) call s_calculate_lag_bubble_stats() if (lag_params%write_bubbles) then - $:UPDATE(host=["gas_p","gas_mv","intfc_rad","intfc_vel"]) + $:GPU_UPDATE(host=["gas_p","gas_mv","intfc_rad","intfc_vel"]) call s_write_lag_particles(mytime) end if elseif (time_stepper == 2) then ! 2nd order TVD RK if (stage == 1) then - $:PARALLEL_LOOP(private=["k"]) + $:GPU_PARALLEL_LOOP(private=["k"]) do k = 1, nBubs !u{1} = u{n} + dt * RHS{n} intfc_rad(k, 2) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) @@ -1062,7 +1062,7 @@ contains end do elseif (stage == 2) then - $:PARALLEL_LOOP(private=["k"]) + $:GPU_PARALLEL_LOOP(private=["k"]) do k = 1, nBubs !u{1} = u{n} + (1/2) * dt * (RHS{n} + RHS{1}) intfc_rad(k, 1) = intfc_rad(k, 1) + dt*(intfc_draddt(k, 1) + intfc_draddt(k, 2))/2._wp @@ -1078,7 +1078,7 @@ contains if (lag_params%write_bubbles_stats) call s_calculate_lag_bubble_stats() if (lag_params%write_bubbles) then - $:UPDATE(host=["gas_p","gas_mv","intfc_rad","intfc_vel"]) + $:GPU_UPDATE(host=["gas_p","gas_mv","intfc_rad","intfc_vel"]) call s_write_lag_particles(mytime) end if @@ -1086,7 +1086,7 @@ contains elseif (time_stepper == 3) then ! 3rd order TVD RK if (stage == 1) then - $:PARALLEL_LOOP(private=["k"]) + $:GPU_PARALLEL_LOOP(private=["k"]) do k = 1, nBubs !u{1} = u{n} + dt * RHS{n} intfc_rad(k, 2) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) @@ -1098,7 +1098,7 @@ contains end do elseif (stage == 2) then - $:PARALLEL_LOOP(private=["k"]) + $:GPU_PARALLEL_LOOP(private=["k"]) do k = 1, nBubs !u{2} = u{n} + (1/4) * dt * [RHS{n} + RHS{1}] intfc_rad(k, 2) = intfc_rad(k, 1) + dt*(intfc_draddt(k, 1) + intfc_draddt(k, 2))/4._wp @@ -1109,7 +1109,7 @@ contains gas_mv(k, 2) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/4._wp end do elseif (stage == 3) then - $:PARALLEL_LOOP(private=["k"]) + $:GPU_PARALLEL_LOOP(private=["k"]) do k = 1, nBubs !u{n+1} = u{n} + (2/3) * dt * [(1/4)* RHS{n} + (1/4)* RHS{1} + RHS{2}] intfc_rad(k, 1) = intfc_rad(k, 1) + (2._wp/3._wp)*dt*(intfc_draddt(k, 1)/4._wp + intfc_draddt(k, 2)/4._wp + intfc_draddt(k, 3)) @@ -1125,7 +1125,7 @@ contains if (lag_params%write_bubbles_stats) call s_calculate_lag_bubble_stats() if (lag_params%write_bubbles) then - $:UPDATE(host=["gas_p","gas_mv","intfc_rad","intfc_vel"]) + $:GPU_UPDATE(host=["gas_p","gas_mv","intfc_rad","intfc_vel"]) call s_write_lag_particles(mytime) end if @@ -1195,7 +1195,7 @@ contains integer :: k - $:PARALLEL_LOOP(private=["k"]) + $:GPU_PARALLEL_LOOP(private=["k"]) do k = 1, nBubs gas_p(k, 2) = gas_p(k, 1) gas_mv(k, 2) = gas_mv(k, 1) @@ -1294,7 +1294,7 @@ contains if (dir == 1) then ! Gradient in x dir. - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do k = 0, p do j = 0, n do i = 0, m @@ -1309,7 +1309,7 @@ contains else if (dir == 2) then ! Gradient in y dir. - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do k = 0, p do j = 0, n do i = 0, m @@ -1323,7 +1323,7 @@ contains end do else ! Gradient in z dir. - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do k = 0, p do j = 0, n do i = 0, m @@ -1417,7 +1417,7 @@ contains lag_void_max = 0._wp lag_void_avg = 0._wp lag_vol = 0._wp - $:PARALLEL_LOOP(collapse=3, reduction=[["lag_vol", "lag_void_avg"], & + $:GPU_PARALLEL_LOOP(collapse=3, reduction=[["lag_vol", "lag_void_avg"], & ["lag_void_max"]], reductionOp=["+", "MAX"], & copy=["lag_vol", "lag_void_avg", "lag_void_max"]) do k = 0, p @@ -1603,7 +1603,7 @@ contains integer :: k - $:PARALLEL_LOOP(reduction=[["Rmax_glb"], ["Rmin_glb"]], & + $:GPU_PARALLEL_LOOP(reduction=[["Rmax_glb"], ["Rmin_glb"]], & & reductionOp=["MAX", "MIN"], copy=["Rmax_glb","Rmin_glb"]) do k = 1, nBubs Rmax_glb = max(Rmax_glb, intfc_rad(k, 1)/bub_R0(k)) @@ -1623,7 +1623,7 @@ contains write (file_loc, '(A,I0,A)') 'stats_lag_bubbles_', proc_rank, '.dat' file_loc = trim(case_dir)//'/D/'//trim(file_loc) - $:UPDATE(host=["Rmax_glb","Rmin_glb"]) + $:GPU_UPDATE(host=["Rmax_glb","Rmin_glb"]) open (13, FILE=trim(file_loc), FORM='formatted', position='rewind') write (13, *) 'proc_rank, particleID, x, y, z, Rmax_glb, Rmin_glb' @@ -1651,7 +1651,7 @@ contains integer :: i - $:LOOP() + $:GPU_LOOP() do i = bub_id, nBubs - 1 lag_id(i, 1) = lag_id(i + 1, 1) bub_R0(i) = bub_R0(i + 1) @@ -1676,7 +1676,7 @@ contains end do nBubs = nBubs - 1 - $:UPDATE(device=["nBubs"]) + $:GPU_UPDATE(device=["nBubs"]) end subroutine s_remove_lag_bubble diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp index ae00f42a25..aa660b2ed0 100644 --- a/src/simulation/m_bubbles_EL_kernels.fpp +++ b/src/simulation/m_bubbles_EL_kernels.fpp @@ -56,7 +56,7 @@ contains real(wp), dimension(3) :: s_coord integer :: l - $:PARALLEL_LOOP(private=["l","s_coord","cell"]) + $:GPU_PARALLEL_LOOP(private=["l","s_coord","cell"]) do l = 1, nBubs volpart = 4._wp/3._wp*pi*lbk_rad(l, 2)**3._wp @@ -121,7 +121,7 @@ contains smearGridz = smearGrid if (p == 0) smearGridz = 1 - $:PARALLEL_LOOP(private=["nodecoord","l","s_coord","cell","center"], copyin=["smearGrid","smearGridz"]) + $:GPU_PARALLEL_LOOP(private=["nodecoord","l","s_coord","cell","center"], copyin=["smearGrid","smearGridz"]) do l = 1, nBubs nodecoord(1:3) = 0 center(1:3) = 0._wp @@ -204,7 +204,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_applygaussian #else - $:ROUTINE() + $:GPU_ROUTINE() #endif real(wp), dimension(3), intent(in) :: center integer, dimension(3), intent(in) :: cellaux @@ -274,7 +274,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_check_celloutside #else - $:ROUTINE() + $:GPU_ROUTINE() #endif integer, dimension(3), intent(inout) :: cellaux logical, intent(out) :: celloutside @@ -310,7 +310,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_shift_cell_symmetric_bc #else - $:ROUTINE() + $:GPU_ROUTINE() #endif integer, dimension(3), intent(inout) :: cellaux integer, dimension(3), intent(in) :: cell @@ -351,7 +351,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_stddsv #else - $:ROUTINE() + $:GPU_ROUTINE() #endif integer, dimension(3), intent(in) :: cell real(wp), intent(in) :: volpart @@ -392,7 +392,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_get_char_vol #else - $:ROUTINE() + $:GPU_ROUTINE() #endif integer, intent(in) :: cellx, celly, cellz real(wp), intent(out) :: Charvol @@ -417,7 +417,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_get_cell #else - $:ROUTINE() + $:GPU_ROUTINE() #endif real(wp), dimension(3), intent(in) :: s_cell integer, dimension(3), intent(out) :: get_cell diff --git a/src/simulation/m_cbc.fpp b/src/simulation/m_cbc.fpp index 6b19251034..3c587d9735 100644 --- a/src/simulation/m_cbc.fpp +++ b/src/simulation/m_cbc.fpp @@ -49,7 +49,7 @@ module m_cbc real(wp), allocatable, dimension(:, :, :, :) :: q_prim_rsx_vf real(wp), allocatable, dimension(:, :, :, :) :: q_prim_rsy_vf real(wp), allocatable, dimension(:, :, :, :) :: q_prim_rsz_vf - $:DECLARE(create=["q_prim_rsx_vf","q_prim_rsy_vf","q_prim_rsz_vf"]) + $:GPU_DECLARE(create=["q_prim_rsx_vf","q_prim_rsy_vf","q_prim_rsz_vf"]) type(scalar_field), allocatable, dimension(:) :: F_rs_vf, F_src_rs_vf !< @@ -59,7 +59,7 @@ module m_cbc real(wp), allocatable, dimension(:, :, :, :) :: F_rsx_vf, F_src_rsx_vf !< real(wp), allocatable, dimension(:, :, :, :) :: F_rsy_vf, F_src_rsy_vf !< real(wp), allocatable, dimension(:, :, :, :) :: F_rsz_vf, F_src_rsz_vf !< - $:DECLARE(create=["F_rsx_vf","F_src_rsx_vf","F_rsy_vf","F_src_rsy_vf","F_rsz_vf","F_src_rsz_vf"]) + $:GPU_DECLARE(create=["F_rsx_vf","F_src_rsx_vf","F_rsy_vf","F_src_rsy_vf","F_rsz_vf","F_src_rsz_vf"]) !! There is a CCE bug that is causing some subset of these variables to interfere !! with variables of the same name in m_riemann_solvers.fpp, and giving this versions @@ -70,14 +70,14 @@ module m_cbc real(wp), allocatable, dimension(:, :, :, :) :: flux_rsx_vf_l, flux_src_rsx_vf_l !< real(wp), allocatable, dimension(:, :, :, :) :: flux_rsy_vf_l, flux_src_rsy_vf_l real(wp), allocatable, dimension(:, :, :, :) :: flux_rsz_vf_l, flux_src_rsz_vf_l - $:DECLARE(create=["flux_rsx_vf_l","flux_src_rsx_vf_l","flux_rsy_vf_l","flux_src_rsy_vf_l","flux_rsz_vf_l","flux_src_rsz_vf_l"]) + $:GPU_DECLARE(create=["flux_rsx_vf_l","flux_src_rsx_vf_l","flux_rsy_vf_l","flux_src_rsy_vf_l","flux_rsz_vf_l","flux_src_rsz_vf_l"]) real(wp) :: c !< Cell averaged speed of sound real(wp), dimension(2) :: Re !< Cell averaged Reynolds numbers - $:DECLARE(create=["c","Re"]) + $:GPU_DECLARE(create=["c","Re"]) real(wp) :: dpres_ds !< Spatial derivatives in s-dir of pressure - $:DECLARE(create=["dpres_ds"]) + $:GPU_DECLARE(create=["dpres_ds"]) real(wp), allocatable, dimension(:) :: ds !< Cell-width distribution in the s-direction @@ -97,21 +97,21 @@ module m_cbc real(wp), allocatable, dimension(:, :, :) :: pi_coef_y !< Polynomial interpolant coefficients in y-dir real(wp), allocatable, dimension(:, :, :) :: pi_coef_z !< Polynomial interpolant coefficients in z-dir - $:DECLARE(create=["ds","fd_coef_x","fd_coef_y","fd_coef_z","pi_coef_x","pi_coef_y","pi_coef_z"]) + $:GPU_DECLARE(create=["ds","fd_coef_x","fd_coef_y","fd_coef_z","pi_coef_x","pi_coef_y","pi_coef_z"]) !! The first dimension of the array identifies the polynomial, the !! second dimension identifies the position of its coefficients and the last !! dimension denotes the location of the CBC. type(int_bounds_info) :: is1, is2, is3 !< Indical bounds in the s1-, s2- and s3-directions - $:DECLARE(create=["is1","is2","is3"]) + $:GPU_DECLARE(create=["is1","is2","is3"]) integer :: dj integer :: bcxb, bcxe, bcyb, bcye, bczb, bcze integer :: cbc_dir, cbc_loc integer :: flux_cbc_index - $:DECLARE(create=["dj","bcxb","bcxe","bcyb","bcye","bczb","bcze"]) - $:DECLARE(create=["cbc_dir", "cbc_loc","flux_cbc_index"]) + $:GPU_DECLARE(create=["dj","bcxb","bcxe","bcyb","bcye","bczb","bcze"]) + $:GPU_DECLARE(create=["cbc_dir", "cbc_loc","flux_cbc_index"]) !! GRCBC inputs for subsonic inflow and outflow conditions consisting of !! inflow velocities, pressure, density and void fraction as well as @@ -120,9 +120,9 @@ module m_cbc real(wp), allocatable, dimension(:) :: pres_in, pres_out, Del_in, Del_out real(wp), allocatable, dimension(:, :) :: vel_in, vel_out real(wp), allocatable, dimension(:, :) :: alpha_rho_in, alpha_in - $:DECLARE(create=["pres_in","pres_out","Del_in","Del_out"]) - $:DECLARE(create=["vel_in","vel_out"]) - $:DECLARE(create=["alpha_rho_in","alpha_in"]) + $:GPU_DECLARE(create=["pres_in","pres_out","Del_in","Del_out"]) + $:GPU_DECLARE(create=["vel_in","vel_out"]) + $:GPU_DECLARE(create=["alpha_rho_in","alpha_in"]) contains @@ -139,7 +139,7 @@ contains else flux_cbc_index = adv_idx%end end if - $:UPDATE(device=["flux_cbc_index"]) + $:GPU_UPDATE(device=["flux_cbc_index"]) call s_any_cbc_boundaries(is_cbc) @@ -389,7 +389,7 @@ contains end if - $:UPDATE(device=["fd_coef_x","fd_coef_y","fd_coef_z", & + $:GPU_UPDATE(device=["fd_coef_x","fd_coef_y","fd_coef_z", & & "pi_coef_x","pi_coef_y","pi_coef_z"]) ! Associating the procedural pointer to the appropriate subroutine @@ -398,20 +398,20 @@ contains bcxb = bc_x%beg bcxe = bc_x%end - $:UPDATE(device=["bcxb", "bcxe"]) + $:GPU_UPDATE(device=["bcxb", "bcxe"]) if (n > 0) then bcyb = bc_y%beg bcye = bc_y%end - $:UPDATE(device=["bcyb", "bcye"]) + $:GPU_UPDATE(device=["bcyb", "bcye"]) end if if (p > 0) then bczb = bc_z%beg bcze = bc_z%end - $:UPDATE(device=["bczb", "bcze"]) + $:GPU_UPDATE(device=["bczb", "bcze"]) end if ! Allocate GRCBC inputs @@ -443,7 +443,7 @@ contains end do end if #:endfor - $:UPDATE(device=["vel_in","vel_out","pres_in","pres_out", & + $:GPU_UPDATE(device=["vel_in","vel_out","pres_in","pres_out", & & "Del_in","Del_out","alpha_rho_in","alpha_in"]) end subroutine s_initialize_cbc_module @@ -608,7 +608,7 @@ contains end if - $:UPDATE(device=["ds"]) + $:GPU_UPDATE(device=["ds"]) end subroutine s_associate_cbc_coefficients_pointers @@ -684,7 +684,7 @@ contains cbc_dir = cbc_dir_norm cbc_loc = cbc_loc_norm - $:UPDATE(device=["cbc_dir", "cbc_loc"]) + $:GPU_UPDATE(device=["cbc_dir", "cbc_loc"]) call s_initialize_cbc(q_prim_vf, flux_vf, flux_src_vf, & ix, iy, iz) @@ -702,7 +702,7 @@ contains F_src_rs${XYZ}$_vf, & is1, is2, is3, idwbuff(2)%beg, idwbuff(3)%beg) - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do i = 1, flux_cbc_index do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -714,7 +714,7 @@ contains end do end do - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do i = advxb, advxe do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -733,7 +733,7 @@ contains F_src_rs${XYZ}$_vf, & is1, is2, is3, idwbuff(2)%beg, idwbuff(3)%beg) - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = 1, flux_cbc_index do j = 0, 1 do r = is3%beg, is3%end @@ -753,7 +753,7 @@ contains end do end do - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = advxb, advxe do j = 0, 1 do r = is3%beg, is3%end @@ -776,7 +776,7 @@ contains end if ! FD2 or FD4 of RHS at j = 0 - $:PARALLEL_LOOP(collapse=2, private=["alpha_rho", "vel", "adv", & + $:GPU_PARALLEL_LOOP(collapse=2, private=["alpha_rho", "vel", "adv", & "mf", "dvel_ds", "dadv_ds", "Re_cbc", "dalpha_rho_ds","dvel_dt", & "dadv_dt", "dalpha_rho_dt", "L", "lambda", "Ys", "dYs_dt", & "dYs_ds", "h_k", "Cp_i", "Gamma_i", "Xs"]) @@ -784,25 +784,25 @@ contains do k = is2%beg, is2%end ! Transferring the Primitive Variables - $:LOOP() + $:GPU_LOOP() do i = 1, contxe alpha_rho(i) = q_prim_rs${XYZ}$_vf(0, k, r, i) end do - $:LOOP() + $:GPU_LOOP() do i = 1, num_dims vel(i) = q_prim_rs${XYZ}$_vf(0, k, r, contxe + i) end do vel_K_sum = 0._wp - $:LOOP() + $:GPU_LOOP() do i = 1, num_dims vel_K_sum = vel_K_sum + vel(i)**2._wp end do pres = q_prim_rs${XYZ}$_vf(0, k, r, E_idx) - $:LOOP() + $:GPU_LOOP() do i = 1, advxe - E_idx adv(i) = q_prim_rs${XYZ}$_vf(0, k, r, E_idx + i) end do @@ -813,13 +813,13 @@ contains call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv, adv, alpha_rho, Re_cbc) end if - $:LOOP() + $:GPU_LOOP() do i = 1, contxe mf(i) = alpha_rho(i)/rho end do if (chemistry) then - $:LOOP() + $:GPU_LOOP() do i = chemxb, chemxe Ys(i - chemxb + 1) = q_prim_rs${XYZ}$_vf(0, k, r, i) end do @@ -852,39 +852,39 @@ contains ! First-Order Spatial Derivatives of Primitive Variables - $:LOOP() + $:GPU_LOOP() do i = 1, contxe dalpha_rho_ds(i) = 0._wp end do - $:LOOP() + $:GPU_LOOP() do i = 1, num_dims dvel_ds(i) = 0._wp end do dpres_ds = 0._wp - $:LOOP() + $:GPU_LOOP() do i = 1, advxe - E_idx dadv_ds(i) = 0._wp end do if (chemistry) then - $:LOOP() + $:GPU_LOOP() do i = 1, num_species dYs_ds(i) = 0._wp end do end if - $:LOOP() + $:GPU_LOOP() do j = 0, buff_size - $:LOOP() + $:GPU_LOOP() do i = 1, contxe dalpha_rho_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, i)* & fd_coef_${XYZ}$ (j, cbc_loc) + & dalpha_rho_ds(i) end do - $:LOOP() + $:GPU_LOOP() do i = 1, num_dims dvel_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, contxe + i)* & fd_coef_${XYZ}$ (j, cbc_loc) + & @@ -894,7 +894,7 @@ contains dpres_ds = q_prim_rs${XYZ}$_vf(j, k, r, E_idx)* & fd_coef_${XYZ}$ (j, cbc_loc) + & dpres_ds - $:LOOP() + $:GPU_LOOP() do i = 1, advxe - E_idx dadv_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, E_idx + i)* & fd_coef_${XYZ}$ (j, cbc_loc) + & @@ -902,7 +902,7 @@ contains end do if (chemistry) then - $:LOOP() + $:GPU_LOOP() do i = 1, num_species dYs_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, chemxb - 1 + i)* & fd_coef_${XYZ}$ (j, cbc_loc) + & @@ -929,7 +929,7 @@ contains call s_compute_nonreflecting_subsonic_inflow_L(lambda, L, rho, c, dpres_ds, dvel_ds) ! Add GRCBC for Subsonic Inflow if (bc_${XYZ}$%grcbc_in) then - $:LOOP() + $:GPU_LOOP() do i = 2, momxb L(2) = c**3._wp*Ma*(alpha_rho(i - 1) - alpha_rho_in(i - 1, ${CBC_DIR}$))/Del_in(${CBC_DIR}$) - c*Ma*(pres - pres_in(${CBC_DIR}$))/Del_in(${CBC_DIR}$) end do @@ -939,7 +939,7 @@ contains L(momxb + 2) = c*Ma*(vel(dir_idx(3)) - vel_in(${CBC_DIR}$, dir_idx(3)))/Del_in(${CBC_DIR}$) end if end if - $:LOOP() + $:GPU_LOOP() do i = E_idx, advxe - 1 L(i) = c*Ma*(adv(i + 1 - E_idx) - alpha_in(i + 1 - E_idx, ${CBC_DIR}$))/Del_in(${CBC_DIR}$) end do @@ -979,13 +979,13 @@ contains dpres_dt = -5e-1_wp*(L(advxe) + L(1)) end if - $:LOOP() + $:GPU_LOOP() do i = 1, contxe dalpha_rho_dt(i) = & -(L(i + 1) - mf(i)*dpres_dt)/(c*c) end do - $:LOOP() + $:GPU_LOOP() do i = 1, num_dims dvel_dt(dir_idx(i)) = dir_flg(dir_idx(i))* & (L(1) - L(advxe))/(2._wp*rho*c) + & @@ -994,13 +994,13 @@ contains end do vel_dv_dt_sum = 0._wp - $:LOOP() + $:GPU_LOOP() do i = 1, num_dims vel_dv_dt_sum = vel_dv_dt_sum + vel(i)*dvel_dt(i) end do if (chemistry) then - $:LOOP() + $:GPU_LOOP() do i = 1, num_species dYs_dt(i) = -1._wp*L(chemxb + i - 1) end do @@ -1008,12 +1008,12 @@ contains ! The treatment of void fraction source is unclear if (cyl_coord .and. cbc_dir == 2 .and. cbc_loc == 1) then - $:LOOP() + $:GPU_LOOP() do i = 1, advxe - E_idx dadv_dt(i) = -L(momxe + i) !+ adv(i) * vel(dir_idx(1))/y_cc(n) end do else - $:LOOP() + $:GPU_LOOP() do i = 1, advxe - E_idx dadv_dt(i) = -L(momxe + i) end do @@ -1026,7 +1026,7 @@ contains dgamma_dt = dadv_dt(1) dpi_inf_dt = dadv_dt(2) else - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids drho_dt = drho_dt + dalpha_rho_dt(i) dgamma_dt = dgamma_dt + dadv_dt(i)*gammas(i) @@ -1036,13 +1036,13 @@ contains end if ! flux_rs_vf_l and flux_src_rs_vf_l at j = -1/2 - $:LOOP() + $:GPU_LOOP() do i = 1, contxe flux_rs${XYZ}$_vf_l(-1, k, r, i) = flux_rs${XYZ}$_vf_l(0, k, r, i) & + ds(0)*dalpha_rho_dt(i) end do - $:LOOP() + $:GPU_LOOP() do i = momxb, momxe flux_rs${XYZ}$_vf_l(-1, k, r, i) = flux_rs${XYZ}$_vf_l(0, k, r, i) & + ds(0)*(vel(i - contxe)*drho_dt & @@ -1053,14 +1053,14 @@ contains ! Evolution of LODI equation of energy for real gases adjusted to perfect gas, doi:10.1006/jcph.2002.6990 call get_species_enthalpies_rt(T, h_k) sum_Enthalpies = 0._wp - $:LOOP() + $:GPU_LOOP() do i = 1, num_species h_k(i) = h_k(i)*gas_constant/molecular_weights(i)*T sum_Enthalpies = sum_Enthalpies + (rho*h_k(i) - pres*Mw/molecular_weights(i)*Cp/R_gas)*dYs_dt(i) end do flux_rs${XYZ}$_vf_l(-1, k, r, E_idx) = flux_rs${XYZ}$_vf_l(0, k, r, E_idx) & + ds(0)*((E/rho + pres/rho)*drho_dt + rho*vel_dv_dt_sum + Cp*T*L(2)/(c*c) + sum_Enthalpies) - $:LOOP() + $:GPU_LOOP() do i = 1, num_species flux_rs${XYZ}$_vf_l(-1, k, r, i - 1 + chemxb) = flux_rs${XYZ}$_vf_l(0, k, r, chemxb + i - 1) & + ds(0)*(drho_dt*Ys(i) + rho*dYs_dt(i)) @@ -1076,12 +1076,12 @@ contains end if if (riemann_solver == 1) then - $:LOOP() + $:GPU_LOOP() do i = advxb, advxe flux_rs${XYZ}$_vf_l(-1, k, r, i) = 0._wp end do - $:LOOP() + $:GPU_LOOP() do i = advxb, advxe flux_src_rs${XYZ}$_vf_l(-1, k, r, i) = & 1._wp/max(abs(vel(dir_idx(1))), sgm_eps) & @@ -1094,13 +1094,13 @@ contains else - $:LOOP() + $:GPU_LOOP() do i = advxb, advxe flux_rs${XYZ}$_vf_l(-1, k, r, i) = flux_rs${XYZ}$_vf_l(0, k, r, i) + & ds(0)*dadv_dt(i - E_idx) end do - $:LOOP() + $:GPU_LOOP() do i = advxb, advxe flux_src_rs${XYZ}$_vf_l(-1, k, r, i) = flux_src_rs${XYZ}$_vf_l(0, k, r, i) end do @@ -1164,13 +1164,13 @@ contains end if dj = max(0, cbc_loc) - $:UPDATE(device=["is1","is2","is3","dj"]) - $:UPDATE(device=["dir_idx","dir_flg"]) + $:GPU_UPDATE(device=["is1","is2","is3","dj"]) + $:GPU_UPDATE(device=["dir_idx","dir_flg"]) ! Reshaping Inputted Data in x-direction if (cbc_dir == 1) then - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1182,7 +1182,7 @@ contains end do end do - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = 0, buff_size @@ -1193,7 +1193,7 @@ contains end do end do - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = 1, flux_cbc_index do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1206,7 +1206,7 @@ contains end do end do - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1217,7 +1217,7 @@ contains end do if (riemann_solver == 1) then - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = advxb, advxe do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1229,7 +1229,7 @@ contains end do end do else - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1246,7 +1246,7 @@ contains ! Reshaping Inputted Data in y-direction elseif (cbc_dir == 2) then - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1258,7 +1258,7 @@ contains end do end do - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = 0, buff_size @@ -1269,7 +1269,7 @@ contains end do end do - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = 1, flux_cbc_index do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1282,7 +1282,7 @@ contains end do end do - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1293,7 +1293,7 @@ contains end do if (riemann_solver == 1) then - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = advxb, advxe do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1305,7 +1305,7 @@ contains end do end do else - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1322,7 +1322,7 @@ contains ! Reshaping Inputted Data in z-direction else - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1334,7 +1334,7 @@ contains end do end do - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = 0, buff_size @@ -1345,7 +1345,7 @@ contains end do end do - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = 1, flux_cbc_index do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1358,7 +1358,7 @@ contains end do end do - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1369,7 +1369,7 @@ contains end do if (riemann_solver == 1) then - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = advxb, advxe do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1381,7 +1381,7 @@ contains end do end do else - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1415,12 +1415,12 @@ contains ! Determining the indicial shift based on CBC location dj = max(0, cbc_loc) - $:UPDATE(device=["dj"]) + $:GPU_UPDATE(device=["dj"]) ! Reshaping Outputted Data in x-direction if (cbc_dir == 1) then - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = 1, flux_cbc_index do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1432,7 +1432,7 @@ contains end do end do end do - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1443,7 +1443,7 @@ contains end do if (riemann_solver == 1) then - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = advxb, advxe do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1455,7 +1455,7 @@ contains end do end do else - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1471,7 +1471,7 @@ contains ! Reshaping Outputted Data in y-direction elseif (cbc_dir == 2) then - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = 1, flux_cbc_index do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1484,7 +1484,7 @@ contains end do end do - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1495,7 +1495,7 @@ contains end do if (riemann_solver == 1) then - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = advxb, advxe do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1507,7 +1507,7 @@ contains end do end do else - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1524,7 +1524,7 @@ contains ! Reshaping Outputted Data in z-direction else - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = 1, flux_cbc_index do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1537,7 +1537,7 @@ contains end do end do - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1548,7 +1548,7 @@ contains end do if (riemann_solver == 1) then - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = advxb, advxe do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1560,7 +1560,7 @@ contains end do end do else - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size diff --git a/src/simulation/m_compute_cbc.fpp b/src/simulation/m_compute_cbc.fpp index 5880361461..d397d19cf5 100644 --- a/src/simulation/m_compute_cbc.fpp +++ b/src/simulation/m_compute_cbc.fpp @@ -87,7 +87,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_slip_wall_L #else - $:ROUTINE() + $:GPU_ROUTINE() #endif real(wp), dimension(3), intent(in) :: lambda real(wp), dimension(sys_size), intent(inout) :: L @@ -105,7 +105,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_nonreflecting_subsonic_buffer_L #else - $:ROUTINE() + $:GPU_ROUTINE() #endif real(wp), dimension(3), intent(in) :: lambda real(wp), dimension(sys_size), intent(inout) :: L @@ -135,7 +135,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_nonreflecting_subsonic_inflow_L #else - $:ROUTINE() + $:GPU_ROUTINE() #endif real(wp), dimension(3), intent(in) :: lambda real(wp), dimension(sys_size), intent(inout) :: L @@ -152,7 +152,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_nonreflecting_subsonic_outflow_L #else - $:ROUTINE() + $:GPU_ROUTINE() #endif real(wp), dimension(3), intent(in) :: lambda real(wp), dimension(sys_size), intent(inout) :: L @@ -176,7 +176,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_force_free_subsonic_outflow_L #else - $:ROUTINE() + $:GPU_ROUTINE() #endif real(wp), dimension(3), intent(in) :: lambda real(wp), dimension(sys_size), intent(inout) :: L @@ -198,7 +198,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_constant_pressure_subsonic_outflow_L #else - $:ROUTINE() + $:GPU_ROUTINE() #endif real(wp), dimension(3), intent(in) :: lambda real(wp), dimension(sys_size), intent(inout) :: L @@ -220,7 +220,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_supersonic_inflow_L #else - $:ROUTINE() + $:GPU_ROUTINE() #endif real(wp), dimension(sys_size), intent(inout) :: L L(1:advxe) = 0._wp @@ -232,7 +232,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_supersonic_outflow_L #else - $:ROUTINE() + $:GPU_ROUTINE() #endif real(wp), dimension(3), intent(in) :: lambda real(wp), dimension(sys_size), intent(inout) :: L diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp index 7407375f86..b1922c08fa 100644 --- a/src/simulation/m_data_output.fpp +++ b/src/simulation/m_data_output.fpp @@ -55,14 +55,14 @@ module m_data_output real(wp), allocatable, dimension(:, :, :) :: ccfl_sf !< CCFL stability criterion real(wp), allocatable, dimension(:, :, :) :: Rc_sf !< Rc stability criterion real(wp), public, allocatable, dimension(:, :) :: c_mass - $:DECLARE(create=["icfl_sf","vcfl_sf","ccfl_sf","Rc_sf"]) + $:GPU_DECLARE(create=["icfl_sf","vcfl_sf","ccfl_sf","Rc_sf"]) real(wp) :: icfl_max_loc, icfl_max_glb !< ICFL stability extrema on local and global grids real(wp) :: vcfl_max_loc, vcfl_max_glb !< VCFL stability extrema on local and global grids real(wp) :: ccfl_max_loc, ccfl_max_glb !< CCFL stability extrema on local and global grids real(wp) :: Rc_min_loc, Rc_min_glb !< Rc stability extrema on local and global grids - $:DECLARE(create=["icfl_max_loc","icfl_max_glb","vcfl_max_loc","vcfl_max_glb"]) - $:DECLARE(create=["ccfl_max_loc","ccfl_max_glb","Rc_min_loc","Rc_min_glb"]) + $:GPU_DECLARE(create=["icfl_max_loc","icfl_max_glb","vcfl_max_loc","vcfl_max_glb"]) + $:GPU_DECLARE(create=["ccfl_max_loc","ccfl_max_glb","Rc_min_loc","Rc_min_glb"]) !> @name ICFL, VCFL, CCFL and Rc stability criteria extrema over all the time-steps !> @{ @@ -279,7 +279,7 @@ contains integer :: j, k, l ! Computing Stability Criteria at Current Time-step - $:PARALLEL_LOOP(collapse=3, private=["vel", "alpha", "Re"]) + $:GPU_PARALLEL_LOOP(collapse=3, private=["vel", "alpha", "Re"]) do l = 0, p do k = 0, n do j = 0, m @@ -303,10 +303,10 @@ contains ! Determining local stability criteria extrema at current time-step #ifdef _CRAYFTN - $:UPDATE(host=["icfl_sf"]) + $:GPU_UPDATE(host=["icfl_sf"]) if (viscous) then - $:UPDATE(host=["vcfl_sf","Rc_sf"]) + $:GPU_UPDATE(host=["vcfl_sf","Rc_sf"]) end if icfl_max_loc = maxval(icfl_sf) @@ -527,7 +527,7 @@ contains if (prim_vars_wrt .or. (n == 0 .and. p == 0)) then call s_convert_conservative_to_primitive_variables(q_cons_vf, q_T_sf, q_prim_vf, idwint) do i = 1, sys_size - $:UPDATE(host=["q_prim_vf(i)%sf(:,:,:)"]) + $:GPU_UPDATE(host=["q_prim_vf(i)%sf(:,:,:)"]) end do ! q_prim_vf(bubxb) stores the value of nb needed in riemann solvers, so replace with true primitive value (=1._wp) if (qbmm) then diff --git a/src/simulation/m_fftw.fpp b/src/simulation/m_fftw.fpp index c8170d39a1..b0614f880a 100644 --- a/src/simulation/m_fftw.fpp +++ b/src/simulation/m_fftw.fpp @@ -47,12 +47,12 @@ module m_fftw !! Filtered complex data in Fourier space #if defined(MFC_OpenACC) - $:DECLARE(create=["real_size","cmplx_size","x_size","batch_size","Nfq"]) + $:GPU_DECLARE(create=["real_size","cmplx_size","x_size","batch_size","Nfq"]) real(dp), allocatable, target :: data_real_gpu(:) complex(dp), allocatable, target :: data_cmplx_gpu(:) complex(dp), allocatable, target :: data_fltr_cmplx_gpu(:) - $:DECLARE(create=["data_real_gpu","data_cmplx_gpu","data_fltr_cmplx_gpu"]) + $:GPU_DECLARE(create=["data_real_gpu","data_cmplx_gpu","data_fltr_cmplx_gpu"]) #if defined(__PGI) integer :: fwd_plan_gpu, bwd_plan_gpu @@ -91,7 +91,7 @@ contains iembed(1) = 0 oembed(1) = 0 !$acc enter data copyin(real_size, cmplx_size, x_size, sys_size, batch_size, Nfq) - $:UPDATE(device=["real_size","cmplx_size","x_size","sys_size","batch_size"]) + $:GPU_UPDATE(device=["real_size","cmplx_size","x_size","sys_size","batch_size"]) #else ! Allocate input and output DFT data sizes fftw_real_data = fftw_alloc_real(int(real_size, c_size_t)) @@ -140,7 +140,7 @@ contains if (bc_y%beg >= 0) return #if defined(MFC_OpenACC) - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do k = 1, sys_size do j = 0, m do l = 1, cmplx_size @@ -149,7 +149,7 @@ contains end do end do - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do k = 1, sys_size do j = 0, m do l = 0, p @@ -172,9 +172,9 @@ contains #endif !$acc end host_data Nfq = 3 - $:UPDATE(device=["Nfq"]) + $:GPU_UPDATE(device=["Nfq"]) - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do k = 1, sys_size do j = 0, m do l = 1, Nfq @@ -192,7 +192,7 @@ contains #endif !$acc end host_data - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do k = 1, sys_size do j = 0, m do l = 0, p @@ -204,7 +204,7 @@ contains do i = 1, fourier_rings - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do k = 1, sys_size do j = 0, m do l = 1, cmplx_size @@ -213,7 +213,7 @@ contains end do end do - $:PARALLEL_LOOP(collapse=3, firstprivate=["i"]) + $:GPU_PARALLEL_LOOP(collapse=3, firstprivate=["i"]) do k = 1, sys_size do j = 0, m do l = 0, p @@ -232,9 +232,9 @@ contains !$acc end host_data Nfq = min(floor(2_dp*real(i, dp)*pi), cmplx_size) - $:UPDATE(device=["Nfq"]) + $:GPU_UPDATE(device=["Nfq"]) - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do k = 1, sys_size do j = 0, m do l = 1, Nfq @@ -252,7 +252,7 @@ contains #endif !$acc end host_data - $:PARALLEL_LOOP(collapse=3, firstprivate=["i"]) + $:GPU_PARALLEL_LOOP(collapse=3, firstprivate=["i"]) do k = 1, sys_size do j = 0, m do l = 0, p diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index e2e14286d7..a91b2d49b6 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -54,7 +54,7 @@ module m_global_parameters logical :: cyl_coord integer :: grid_geometry !> @} - $:DECLARE(create=["cyl_coord","grid_geometry"]) + $:GPU_DECLARE(create=["cyl_coord","grid_geometry"]) !> @name Cell-boundary (CB) locations in the x-, y- and z-directions, respectively !> @{ @@ -77,7 +77,7 @@ module m_global_parameters real(wp) :: dt !< Size of the time-step - $:DECLARE(create=["x_cb","y_cb","z_cb","x_cc","y_cc","z_cc","dx","dy","dz","dt","m","n","p"]) + $:GPU_DECLARE(create=["x_cb","y_cb","z_cb","x_cc","y_cc","z_cc","dx","dy","dz","dt","m","n","p"]) !> @name Starting time-step iteration, stopping time-step iteration and the number !! of time-step iterations between successive solution backups, respectively @@ -91,7 +91,7 @@ module m_global_parameters real(wp) :: t_stop, t_save, cfl_target integer :: n_start !> @} - $:DECLARE(create=["cfl_target"]) + $:GPU_DECLARE(create=["cfl_target"]) logical :: cfl_adap_dt, cfl_const_dt, cfl_dt @@ -159,7 +159,7 @@ module m_global_parameters logical :: bulk_stress !< Bulk stresses logical :: cont_damage !< Continuum damage modeling - $:DECLARE(create=["chemistry"]) + $:GPU_DECLARE(create=["chemistry"]) logical :: bodyForces logical :: bf_x, bf_y, bf_z !< body force toggle in three directions @@ -170,27 +170,27 @@ module m_global_parameters #:endfor #:endfor real(wp), dimension(3) :: accel_bf - $:DECLARE(create=["accel_bf"]) + $:GPU_DECLARE(create=["accel_bf"]) integer :: cpu_start, cpu_end, cpu_rate #:if not MFC_CASE_OPTIMIZATION - $:DECLARE(create=["num_dims","num_vels","weno_polyn","weno_order"]) - $:DECLARE(create=["weno_num_stencils","num_fluids","wenojs"]) - $:DECLARE(create=["mapped_weno", "wenoz","teno","wenoz_q","mhd","relativity"]) + $:GPU_DECLARE(create=["num_dims","num_vels","weno_polyn","weno_order"]) + $:GPU_DECLARE(create=["weno_num_stencils","num_fluids","wenojs"]) + $:GPU_DECLARE(create=["mapped_weno", "wenoz","teno","wenoz_q","mhd","relativity"]) #:endif - $:DECLARE(create=["mpp_lim","model_eqns","mixture_err","alt_soundspeed"]) - $:DECLARE(create=["avg_state","mp_weno","weno_eps","teno_CT","hypoelasticity"]) - $:DECLARE(create=["hyperelasticity","hyper_model","elasticity","low_Mach"]) - $:DECLARE(create=["viscous","shear_stress","bulk_stress","cont_damage"]) + $:GPU_DECLARE(create=["mpp_lim","model_eqns","mixture_err","alt_soundspeed"]) + $:GPU_DECLARE(create=["avg_state","mp_weno","weno_eps","teno_CT","hypoelasticity"]) + $:GPU_DECLARE(create=["hyperelasticity","hyper_model","elasticity","low_Mach"]) + $:GPU_DECLARE(create=["viscous","shear_stress","bulk_stress","cont_damage"]) logical :: relax !< activate phase change integer :: relax_model !< Relaxation model real(wp) :: palpha_eps !< trigger parameter for the p relaxation procedure, phase change model real(wp) :: ptgalpha_eps !< trigger parameter for the pTg relaxation procedure, phase change model - $:DECLARE(create=["relax", "relax_model", "palpha_eps","ptgalpha_eps"]) + $:GPU_DECLARE(create=["relax", "relax_model", "palpha_eps","ptgalpha_eps"]) integer :: num_bc_patches logical :: bc_io @@ -200,9 +200,9 @@ module m_global_parameters !> @{ type(int_bounds_info) :: bc_x, bc_y, bc_z !> @} - $:DECLARE(create=["bc_x%vb1", "bc_x%vb2", "bc_x%vb3", "bc_x%ve1", "bc_x%ve2", "bc_x%ve3"]) - $:DECLARE(create=["bc_y%vb1", "bc_y%vb2", "bc_y%vb3", "bc_y%ve1", "bc_y%ve2", "bc_y%ve3"]) - $:DECLARE(create=["bc_z%vb1", "bc_z%vb2", "bc_z%vb3", "bc_z%ve1", "bc_z%ve2", "bc_z%ve3"]) + $:GPU_DECLARE(create=["bc_x%vb1", "bc_x%vb2", "bc_x%vb3", "bc_x%ve1", "bc_x%ve2", "bc_x%ve3"]) + $:GPU_DECLARE(create=["bc_y%vb1", "bc_y%vb2", "bc_y%vb3", "bc_y%ve1", "bc_y%ve2", "bc_y%ve3"]) + $:GPU_DECLARE(create=["bc_z%vb1", "bc_z%vb2", "bc_z%vb3", "bc_z%ve1", "bc_z%ve2", "bc_z%ve3"]) type(bounds_info) :: x_domain, y_domain, z_domain real(wp) :: x_a, y_a, z_a @@ -256,20 +256,20 @@ module m_global_parameters integer :: c_idx !< Index of color function integer :: damage_idx !< Index of damage state variable (D) for continuum damage model !> @} - $:DECLARE(create=["sys_size","E_idx","n_idx","bub_idx","alf_idx","gamma_idx"]) - $:DECLARE(create=["pi_inf_idx","B_idx","stress_idx","xi_idx","b_size"]) - $:DECLARE(create=["tensor_size","species_idx","c_idx"]) + $:GPU_DECLARE(create=["sys_size","E_idx","n_idx","bub_idx","alf_idx","gamma_idx"]) + $:GPU_DECLARE(create=["pi_inf_idx","B_idx","stress_idx","xi_idx","b_size"]) + $:GPU_DECLARE(create=["tensor_size","species_idx","c_idx"]) ! Cell Indices for the (local) interior points (O-m, O-n, 0-p). ! Stands for "InDices With INTerior". type(int_bounds_info) :: idwint(1:3) - $:DECLARE(create=["idwint"]) + $:GPU_DECLARE(create=["idwint"]) ! Cell Indices for the entire (local) domain. In simulation and post_process, ! this includes the buffer region. idwbuff and idwint are the same otherwise. ! Stands for "InDices With BUFFer". type(int_bounds_info) :: idwbuff(1:3) - $:DECLARE(create=["idwbuff"]) + $:GPU_DECLARE(create=["idwbuff"]) !> @name The number of fluids, along with their identifying indexes, respectively, !! for which viscous effects, e.g. the shear and/or the volume Reynolds (Re) @@ -279,7 +279,7 @@ module m_global_parameters integer, allocatable, dimension(:, :) :: Re_idx !> @} - $:DECLARE(create=["Re_size","Re_idx"]) + $:GPU_DECLARE(create=["Re_size","Re_idx"]) ! The WENO average (WA) flag regulates whether the calculation of any cell- ! average spatial derivatives is carried out in each cell by utilizing the @@ -290,7 +290,7 @@ module m_global_parameters real(wp) :: wa_flg !> @{ - $:DECLARE(create=["wa_flg"]) + $:GPU_DECLARE(create=["wa_flg"]) !> @name The coordinate direction indexes and flags (flg), respectively, for which !! the configurations will be determined with respect to a working direction @@ -302,14 +302,14 @@ module m_global_parameters integer, dimension(3) :: dir_idx_tau !!used for hypoelasticity=true !> @} - $:DECLARE(create=["dir_idx","dir_flg","dir_idx_tau"]) + $:GPU_DECLARE(create=["dir_idx","dir_flg","dir_idx_tau"]) integer :: buff_size !< !! The number of cells that are necessary to be able to store enough boundary !! conditions data to march the solution in the physical computational domain !! to the next time-step. - $:DECLARE(create=["buff_size"]) + $:GPU_DECLARE(create=["buff_size"]) integer :: shear_num !! Number of shear stress components integer, dimension(3) :: shear_indices !< @@ -320,7 +320,7 @@ module m_global_parameters !! Indices of shear stress components to reflect for boundary conditions. !! Size: (1:3, 1:shear_BC_flip_num) for (x/y/z, [indices]) - $:DECLARE(create=["shear_num","shear_indices","shear_BC_flip_num","shear_BC_flip_indices"]) + $:GPU_DECLARE(create=["shear_num","shear_indices","shear_BC_flip_num","shear_BC_flip_indices"]) ! END: Simulation Algorithm Parameters @@ -340,7 +340,7 @@ module m_global_parameters !! The finite-difference number is given by MAX(1, fd_order/2). Essentially, !! it is a measure of the half-size of the finite-difference stencil for the !! selected order of accuracy. - $:DECLARE(create=["fd_order","fd_number"]) + $:GPU_DECLARE(create=["fd_order","fd_number"]) logical :: probe_wrt logical :: integral_wrt @@ -353,7 +353,7 @@ module m_global_parameters !> @{ real(wp) :: rhoref, pref !> @} - $:DECLARE(create=["rhoref","pref"]) + $:GPU_DECLARE(create=["rhoref","pref"]) !> @name Immersed Boundaries !> @{ @@ -368,7 +368,7 @@ module m_global_parameters !! the maximum allowable number of patches, num_patches_max, may be changed !! in the module m_derived_types.f90. - $:DECLARE(create=["ib","num_ibs","patch_ib"]) + $:GPU_DECLARE(create=["ib","num_ibs","patch_ib"]) !> @} !> @name Bubble modeling @@ -383,31 +383,31 @@ module m_global_parameters real(wp) :: Ca !< Cavitation number real(wp) :: Web !< Weber number real(wp) :: Re_inv !< Inverse Reynolds number - $:DECLARE(create=["R0ref","Ca","Web","Re_inv"]) + $:GPU_DECLARE(create=["R0ref","Ca","Web","Re_inv"]) real(wp), dimension(:), allocatable :: weight !< Simpson quadrature weights real(wp), dimension(:), allocatable :: R0 !< Bubble sizes real(wp), dimension(:), allocatable :: V0 !< Bubble velocities - $:DECLARE(create=["weight","R0","V0"]) + $:GPU_DECLARE(create=["weight","R0","V0"]) logical :: bubbles_euler !< Bubbles euler on/off logical :: polytropic !< Polytropic switch logical :: polydisperse !< Polydisperse bubbles - $:DECLARE(create=["bubbles_euler","polytropic","polydisperse"]) + $:GPU_DECLARE(create=["bubbles_euler","polytropic","polydisperse"]) logical :: adv_n !< Solve the number density equation and compute alpha from number density logical :: adap_dt !< Adaptive step size control real(wp) :: adap_dt_tol !< Tolerance to control adaptive step size - $:DECLARE(create=["adv_n","adap_dt","adap_dt_tol"]) + $:GPU_DECLARE(create=["adv_n","adap_dt","adap_dt_tol"]) integer :: bubble_model !< Gilmore or Keller--Miksis bubble model integer :: thermal !< Thermal behavior. 1 = adiabatic, 2 = isotherm, 3 = transfer - $:DECLARE(create=["bubble_model","thermal"]) + $:GPU_DECLARE(create=["bubble_model","thermal"]) real(wp), allocatable, dimension(:, :, :) :: ptil !< Pressure modification real(wp) :: poly_sigma !< log normal sigma for polydisperse PDF - $:DECLARE(create=["ptil", "poly_sigma"]) + $:GPU_DECLARE(create=["ptil", "poly_sigma"]) logical :: qbmm !< Quadrature moment method integer, parameter :: nmom = 6 !< Number of carried moments per R0 location @@ -416,39 +416,39 @@ module m_global_parameters integer :: R0_type real(wp) :: pi_fac !< Factor for artificial pi_inf - $:DECLARE(create=["qbmm", "nmomsp","nmomtot","R0_type","pi_fac"]) + $:GPU_DECLARE(create=["qbmm", "nmomsp","nmomtot","R0_type","pi_fac"]) #:if not MFC_CASE_OPTIMIZATION - $:DECLARE(create=["nb"]) + $:GPU_DECLARE(create=["nb"]) #:endif type(scalar_field), allocatable, dimension(:) :: mom_sp type(scalar_field), allocatable, dimension(:, :, :) :: mom_3d - $:DECLARE(create=["mom_sp","mom_3d"]) + $:GPU_DECLARE(create=["mom_sp","mom_3d"]) !> @} type(chemistry_parameters) :: chem_params - $:DECLARE(create=["chem_params"]) + $:GPU_DECLARE(create=["chem_params"]) !> @name Physical bubble parameters (see Ando 2010, Preston 2007) !> @{ real(wp) :: R_n, R_v, phi_vn, phi_nv, Pe_c, Tw, pv, M_n, M_v, k_vl, k_nl, cp_n, cp_v - $:DECLARE(create=["R_n","R_v","phi_vn","phi_nv","Pe_c","Tw",]) - $:DECLARE(create=["pv","M_n", "M_v","k_vl","k_nl","cp_n","cp_v"]) + $:GPU_DECLARE(create=["R_n","R_v","phi_vn","phi_nv","Pe_c","Tw",]) + $:GPU_DECLARE(create=["pv","M_n", "M_v","k_vl","k_nl","cp_n","cp_v"]) real(wp), dimension(:), allocatable :: k_n, k_v, pb0, mass_n0, mass_v0, Pe_T real(wp), dimension(:), allocatable :: Re_trans_T, Re_trans_c, Im_trans_T, Im_trans_c, omegaN - $:DECLARE(create=["k_n","k_v","pb0","mass_n0","mass_v0","Pe_T"]) - $:DECLARE(create=["Re_trans_T","Re_trans_c","Im_trans_T","Im_trans_c","omegaN"]) + $:GPU_DECLARE(create=["k_n","k_v","pb0","mass_n0","mass_v0","Pe_T"]) + $:GPU_DECLARE(create=["Re_trans_T","Re_trans_c","Im_trans_T","Im_trans_c","omegaN"]) real(wp) :: mul0, ss, gamma_v, mu_v real(wp) :: gamma_m, gamma_n, mu_n real(wp) :: gam !> @} - $:DECLARE(create=["mul0","ss","gamma_v","mu_v","gamma_m","gamma_n","mu_n","gam"]) + $:GPU_DECLARE(create=["mul0","ss","gamma_v","mu_v","gamma_m","gamma_n","mu_n","gam"]) !> @name Acoustic acoustic_source parameters !> @{ @@ -456,14 +456,14 @@ module m_global_parameters type(acoustic_parameters), dimension(num_probes_max) :: acoustic !< Acoustic source parameters integer :: num_source !< Number of acoustic sources !> @} - $:DECLARE(create=["acoustic_source","acoustic","num_source"]) + $:GPU_DECLARE(create=["acoustic_source","acoustic","num_source"]) !> @name Surface tension parameters !> @{ real(wp) :: sigma logical :: surface_tension - $:DECLARE(create=["sigma","surface_tension"]) + $:GPU_DECLARE(create=["sigma","surface_tension"]) !> @} integer :: momxb, momxe @@ -474,13 +474,13 @@ module m_global_parameters integer :: strxb, strxe integer :: chemxb, chemxe integer :: xibeg, xiend - $:DECLARE(create=["momxb","momxe","advxb","advxe","contxb","contxe"]) - $:DECLARE(create=["intxb","intxe", "bubxb","bubxe"]) - $:DECLARE(create=["strxb","strxe","chemxb","chemxe"]) - $:DECLARE(create=["xibeg","xiend"]) + $:GPU_DECLARE(create=["momxb","momxe","advxb","advxe","contxb","contxe"]) + $:GPU_DECLARE(create=["intxb","intxe", "bubxb","bubxe"]) + $:GPU_DECLARE(create=["strxb","strxe","chemxb","chemxe"]) + $:GPU_DECLARE(create=["xibeg","xiend"]) real(wp), allocatable, dimension(:) :: gammas, gs_min, pi_infs, ps_inf, cvs, qvs, qvps - $:DECLARE(create=["gammas","gs_min","pi_infs","ps_inf","cvs","qvs","qvps"]) + $:GPU_DECLARE(create=["gammas","gs_min","pi_infs","ps_inf","cvs","qvs","qvps"]) real(wp) :: mytime !< Current simulation time real(wp) :: finaltime !< Final simulation time @@ -491,25 +491,25 @@ module m_global_parameters type(pres_field), allocatable, dimension(:) :: mv_ts - $:DECLARE(create=["pb_ts","mv_ts"]) + $:GPU_DECLARE(create=["pb_ts","mv_ts"]) !> @name lagrangian subgrid bubble parameters !> @{! logical :: bubbles_lagrange !< Lagrangian subgrid bubble model switch type(bubbles_lagrange_parameters) :: lag_params !< Lagrange bubbles' parameters - $:DECLARE(create=["bubbles_lagrange","lag_params"]) + $:GPU_DECLARE(create=["bubbles_lagrange","lag_params"]) !> @} real(wp) :: Bx0 !< Constant magnetic field in the x-direction (1D) logical :: powell !< Powell‐correction for div B = 0 - $:DECLARE(create=["Bx0","powell"]) + $:GPU_DECLARE(create=["Bx0","powell"]) !> @name Continuum damage model parameters !> @{! real(wp) :: tau_star !< Stress threshold for continuum damage modeling real(wp) :: cont_damage_s !< Exponent s for continuum damage modeling real(wp) :: alpha_bar !< Damage rate factor for continuum damage modeling - $:DECLARE(create=["tau_star","cont_damage_s","alpha_bar"]) + $:GPU_DECLARE(create=["tau_star","cont_damage_s","alpha_bar"]) !> @} contains @@ -802,10 +802,10 @@ contains else weno_num_stencils = weno_polyn end if - $:UPDATE(device=["weno_polyn"]) - $:UPDATE(device=["weno_num_stencils"]) - $:UPDATE(device=["nb"]) - $:UPDATE(device=["num_dims","num_vels","num_fluids"]) + $:GPU_UPDATE(device=["weno_polyn"]) + $:GPU_UPDATE(device=["weno_num_stencils"]) + $:GPU_UPDATE(device=["nb"]) + $:GPU_UPDATE(device=["num_dims","num_vels","num_fluids"]) #:endif ! Initializing the number of fluids for which viscous effects will @@ -1041,7 +1041,7 @@ contains if (Re_size(1) > 0._wp) shear_stress = .true. if (Re_size(2) > 0._wp) bulk_stress = .true. - $:UPDATE(device=["Re_size","viscous","shear_stress","bulk_stress"]) + $:GPU_UPDATE(device=["Re_size","viscous","shear_stress","bulk_stress"]) ! Bookkeeping the indexes of any viscous fluids and any pairs of ! fluids whose interface will support effects of surface tension @@ -1097,7 +1097,7 @@ contains ! y-dir: flip tau_xy and tau_yz ! z-dir: flip tau_xz and tau_yz end if - $:UPDATE(device=["shear_num","shear_indices","shear_BC_flip_num","shear_BC_flip_indices"]) + $:GPU_UPDATE(device=["shear_num","shear_indices","shear_BC_flip_num","shear_BC_flip_indices"]) end if if (hyperelasticity) then @@ -1164,7 +1164,7 @@ contains ! cell-boundary values or otherwise, the unaltered left and right, ! WENO-reconstructed, cell-boundary values wa_flg = 0._wp; if (weno_avg) wa_flg = 1._wp - $:UPDATE(device=["wa_flg"]) + $:GPU_UPDATE(device=["wa_flg"]) ! Resort to default WENO-JS if no other WENO scheme is selected #:if not MFC_CASE_OPTIMIZATION @@ -1174,7 +1174,7 @@ contains if (ib) allocate (MPI_IO_IB_DATA%var%sf(0:m, 0:n, 0:p)) Np = 0 - $:UPDATE(device=["Re_size"]) + $:GPU_UPDATE(device=["Re_size"]) if (elasticity) then fd_number = max(1, fd_order/2) @@ -1192,7 +1192,7 @@ contains idwint, idwbuff, viscous, & bubbles_lagrange, m, n, p, & num_dims) - $:UPDATE(device=["idwint", "idwbuff"]) + $:GPU_UPDATE(device=["idwint", "idwbuff"]) ! Configuring Coordinate Direction Indexes if (bubbles_euler) then @@ -1202,7 +1202,7 @@ contains & idwbuff(3)%beg:idwbuff(3)%end)) end if - $:UPDATE(device=["fd_order", "fd_number"]) + $:GPU_UPDATE(device=["fd_order", "fd_number"]) if (cyl_coord .neqv. .true.) then ! Cartesian grid grid_geometry = 1 @@ -1229,17 +1229,17 @@ contains chemxb = species_idx%beg chemxe = species_idx%end - $:UPDATE(device=["momxb","momxe","advxb","advxe","contxb","contxe", & + $:GPU_UPDATE(device=["momxb","momxe","advxb","advxe","contxb","contxe", & & "bubxb","bubxe","intxb","intxe","sys_size","buff_size","E_idx", & & "alf_idx","n_idx","adv_n","adap_dt","pi_fac","strxb","strxe", & & "chemxb","chemxe","c_idx"]) - $:UPDATE(device=["b_size","xibeg","xiend","tensor_size"]) + $:GPU_UPDATE(device=["b_size","xibeg","xiend","tensor_size"]) - $:UPDATE(device=["species_idx"]) - $:UPDATE(device=["cfl_target","m","n","p"]) + $:GPU_UPDATE(device=["species_idx"]) + $:GPU_UPDATE(device=["cfl_target","m","n","p"]) - $:UPDATE(device=["alt_soundspeed","acoustic_source","num_source"]) - $:UPDATE(device=["dt","sys_size","buff_size","pref","rhoref", & + $:GPU_UPDATE(device=["alt_soundspeed","acoustic_source","num_source"]) + $:GPU_UPDATE(device=["dt","sys_size","buff_size","pref","rhoref", & & "gamma_idx","pi_inf_idx","E_idx","alf_idx","stress_idx", & & "mpp_lim","bubbles_euler","hypoelasticity","alt_soundspeed", & & "avg_state","num_fluids","model_eqns","num_dims","num_vels", & @@ -1247,14 +1247,14 @@ contains & "teno_CT","hyperelasticity","hyper_model","elasticity","xi_idx", & & "B_idx","low_Mach"]) - $:UPDATE(device=["Bx0", "powell"]) + $:GPU_UPDATE(device=["Bx0", "powell"]) - $:UPDATE(device=["cont_damage","tau_star","cont_damage_s","alpha_bar"]) + $:GPU_UPDATE(device=["cont_damage","tau_star","cont_damage_s","alpha_bar"]) #:if not MFC_CASE_OPTIMIZATION - $:UPDATE(device=["wenojs","mapped_weno","wenoz","teno"]) - $:UPDATE(device=["wenoz_q"]) - $:UPDATE(device=["mhd", "relativity"]) + $:GPU_UPDATE(device=["wenojs","mapped_weno","wenoz","teno"]) + $:GPU_UPDATE(device=["wenoz_q"]) + $:GPU_UPDATE(device=["mhd", "relativity"]) #:endif !$acc enter data copyin(nb, R0ref, Ca, Web, Re_inv, weight, R0, V0, bubbles_euler, polytropic, polydisperse, qbmm, R0_type, ptil, bubble_model, thermal, poly_sigma) diff --git a/src/simulation/m_hyperelastic.fpp b/src/simulation/m_hyperelastic.fpp index 994412a567..dd232a7cd8 100644 --- a/src/simulation/m_hyperelastic.fpp +++ b/src/simulation/m_hyperelastic.fpp @@ -27,14 +27,14 @@ module m_hyperelastic !! The btensor at the cell-interior Gaussian quadrature points. !! These tensor is needed to be calculated once and make the code DRY. type(vector_field) :: btensor !< - $:DECLARE(create=["btensor"]) + $:GPU_DECLARE(create=["btensor"]) real(wp), allocatable, dimension(:, :) :: fd_coeff_x real(wp), allocatable, dimension(:, :) :: fd_coeff_y real(wp), allocatable, dimension(:, :) :: fd_coeff_z - $:DECLARE(create=["fd_coeff_x","fd_coeff_y", "fd_coeff_z"]) + $:GPU_DECLARE(create=["fd_coeff_x","fd_coeff_y", "fd_coeff_z"]) real(wp), allocatable, dimension(:) :: Gs - $:DECLARE(create=["Gs"]) + $:GPU_DECLARE(create=["Gs"]) contains @@ -56,11 +56,11 @@ contains @:ACC_SETUP_VFs(btensor) @:ALLOCATE(Gs(1:num_fluids)) - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids Gs(i) = fluid_pp(i)%G end do - $:UPDATE(device=["Gs"]) + $:GPU_UPDATE(device=["Gs"]) @:ALLOCATE(fd_coeff_x(-fd_number:fd_number, 0:m)) if (n > 0) then @@ -73,16 +73,16 @@ contains ! Computing centered finite difference coefficients call s_compute_finite_difference_coefficients(m, x_cc, fd_coeff_x, buff_size, & fd_number, fd_order) - $:UPDATE(device=["fd_coeff_x"]) + $:GPU_UPDATE(device=["fd_coeff_x"]) if (n > 0) then call s_compute_finite_difference_coefficients(n, y_cc, fd_coeff_y, buff_size, & fd_number, fd_order) - $:UPDATE(device=["fd_coeff_y"]) + $:GPU_UPDATE(device=["fd_coeff_y"]) end if if (p > 0) then call s_compute_finite_difference_coefficients(p, z_cc, fd_coeff_z, buff_size, & fd_number, fd_order) - $:UPDATE(device=["fd_coeff_z"]) + $:GPU_UPDATE(device=["fd_coeff_z"]) end if end subroutine s_initialize_hyperelastic_module @@ -107,12 +107,12 @@ contains real(wp) :: G integer :: j, k, l, i, r - $:PARALLEL_LOOP(collapse=3, private=["alpha_K", "alpha_rho_K", "rho", & + $:GPU_PARALLEL_LOOP(collapse=3, private=["alpha_K", "alpha_rho_K", "rho", & "gamma", "pi_inf", "qv", "G", "Re", "tensora", "tensorb"]) do l = 0, p do k = 0, n do j = 0, m - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids alpha_rho_k(i) = q_cons_vf(i)%sf(j, k, l) alpha_k(i) = q_cons_vf(advxb + i - 1)%sf(j, k, l) @@ -125,7 +125,7 @@ contains !if ( G <= verysmall ) G_K = 0_wp if (G > verysmall) then - $:LOOP() + $:GPU_LOOP() do i = 1, tensor_size tensora(i) = 0_wp end do @@ -134,7 +134,7 @@ contains ! number for the tensor 1-3: dxix_dx, dxiy_dx, dxiz_dx ! 4-6 : dxix_dy, dxiy_dy, dxiz_dy ! 7-9 : dxix_dz, dxiy_dz, dxiz_dz - $:LOOP() + $:GPU_LOOP() do r = -fd_number, fd_number ! derivatives in the x-direction tensora(1) = tensora(1) + q_prim_vf(xibeg)%sf(j + r, k, l)*fd_coeff_x(r, j) @@ -168,7 +168,7 @@ contains if (tensorb(tensor_size) > verysmall) then ! STEP 2c: computing the inverse of grad_xi tensor = F ! tensorb is the adjoint, tensora becomes F - $:LOOP() + $:GPU_LOOP() do i = 1, tensor_size - 1 tensora(i) = tensorb(i)/tensorb(tensor_size) end do @@ -199,7 +199,7 @@ contains q_prim_vf(E_idx)%sf(j, k, l) = q_prim_vf(E_idx)%sf(j, k, l) - & G*q_prim_vf(xiend + 1)%sf(j, k, l)/gamma ! STEP 5c: updating the Cauchy stress conservative scalar field - $:LOOP() + $:GPU_LOOP() do i = 1, b_size - 1 q_cons_vf(strxb + i - 1)%sf(j, k, l) = & rho*q_prim_vf(strxb + i - 1)%sf(j, k, l) @@ -221,7 +221,7 @@ contains !! calculate the FFtranspose to obtain the btensor, btensor is nxn tensor !! btensor is symmetric, save the data space pure subroutine s_neoHookean_cauchy_solver(btensor, q_prim_vf, G, j, k, l) - $:ROUTINE() + $:GPU_ROUTINE() type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf type(scalar_field), dimension(b_size), intent(inout) :: btensor real(wp), intent(in) :: G @@ -240,7 +240,7 @@ contains #:endfor ! dividing by the jacobian for neo-Hookean model ! setting the tensor to the stresses for riemann solver - $:LOOP() + $:GPU_LOOP() do i = 1, b_size - 1 q_prim_vf(strxb + i - 1)%sf(j, k, l) = & G*btensor(i)%sf(j, k, l)/btensor(b_size)%sf(j, k, l) @@ -260,7 +260,7 @@ contains !! calculate the FFtranspose to obtain the btensor, btensor is nxn tensor !! btensor is symmetric, save the data space pure subroutine s_Mooney_Rivlin_cauchy_solver(btensor, q_prim_vf, G, j, k, l) - $:ROUTINE() + $:GPU_ROUTINE() type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf type(scalar_field), dimension(b_size), intent(inout) :: btensor real(wp), intent(in) :: G @@ -281,7 +281,7 @@ contains ! dividing by the jacobian for neo-Hookean model ! setting the tensor to the stresses for riemann solver - $:LOOP() + $:GPU_LOOP() do i = 1, b_size - 1 q_prim_vf(strxb + i - 1)%sf(j, k, l) = & G*btensor(i)%sf(j, k, l)/btensor(b_size)%sf(j, k, l) diff --git a/src/simulation/m_hypoelastic.fpp b/src/simulation/m_hypoelastic.fpp index 16c3008687..73f6383f69 100644 --- a/src/simulation/m_hypoelastic.fpp +++ b/src/simulation/m_hypoelastic.fpp @@ -21,20 +21,20 @@ module m_hypoelastic s_compute_damage_state real(wp), allocatable, dimension(:) :: Gs - $:DECLARE(create=["Gs"]) + $:GPU_DECLARE(create=["Gs"]) real(wp), allocatable, dimension(:, :, :) :: du_dx, du_dy, du_dz real(wp), allocatable, dimension(:, :, :) :: dv_dx, dv_dy, dv_dz real(wp), allocatable, dimension(:, :, :) :: dw_dx, dw_dy, dw_dz - $:DECLARE(create=["du_dx","du_dy","du_dz","dv_dx","dv_dy","dv_dz","dw_dx","dw_dy","dw_dz"]) + $:GPU_DECLARE(create=["du_dx","du_dy","du_dz","dv_dx","dv_dy","dv_dz","dw_dx","dw_dy","dw_dz"]) real(wp), allocatable, dimension(:, :, :) :: rho_K_field, G_K_field - $:DECLARE(create=["rho_K_field","G_K_field"]) + $:GPU_DECLARE(create=["rho_K_field","G_K_field"]) real(wp), allocatable, dimension(:, :) :: fd_coeff_x_h real(wp), allocatable, dimension(:, :) :: fd_coeff_y_h real(wp), allocatable, dimension(:, :) :: fd_coeff_z_h - $:DECLARE(create=["fd_coeff_x_h","fd_coeff_y_h","fd_coeff_z_h"]) + $:GPU_DECLARE(create=["fd_coeff_x_h","fd_coeff_y_h","fd_coeff_z_h"]) contains @@ -56,7 +56,7 @@ contains do i = 1, num_fluids Gs(i) = fluid_pp(i)%G end do - $:UPDATE(device=["Gs"]) + $:GPU_UPDATE(device=["Gs"]) @:ALLOCATE(fd_coeff_x_h(-fd_number:fd_number, 0:m)) if (n > 0) then @@ -69,16 +69,16 @@ contains ! Computing centered finite difference coefficients call s_compute_finite_difference_coefficients(m, x_cc, fd_coeff_x_h, buff_size, & fd_number, fd_order) - $:UPDATE(device=["fd_coeff_x_h"]) + $:GPU_UPDATE(device=["fd_coeff_x_h"]) if (n > 0) then call s_compute_finite_difference_coefficients(n, y_cc, fd_coeff_y_h, buff_size, & fd_number, fd_order) - $:UPDATE(device=["fd_coeff_y_h"]) + $:GPU_UPDATE(device=["fd_coeff_y_h"]) end if if (p > 0) then call s_compute_finite_difference_coefficients(p, z_cc, fd_coeff_z_h, buff_size, & fd_number, fd_order) - $:UPDATE(device=["fd_coeff_z_h"]) + $:GPU_UPDATE(device=["fd_coeff_z_h"]) end if end subroutine s_initialize_hypoelastic_module @@ -105,7 +105,7 @@ contains ! calculate velocity gradients + rho_K and G_K ! TODO: re-organize these loops one by one for GPU efficiency if possible? - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -115,11 +115,11 @@ contains end do !$acc end parallel loop - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do q = 0, p do l = 0, n do k = 0, m - $:LOOP() + $:GPU_LOOP() do r = -fd_number, fd_number du_dx(k, l, q) = du_dx(k, l, q) & + q_prim_vf(momxb)%sf(k + r, l, q)*fd_coeff_x_h(r, k) @@ -131,7 +131,7 @@ contains !$acc end parallel loop if (ndirs > 1) then - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -141,11 +141,11 @@ contains end do !$acc end parallel loop - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do q = 0, p do l = 0, n do k = 0, m - $:LOOP() + $:GPU_LOOP() do r = -fd_number, fd_number du_dy(k, l, q) = du_dy(k, l, q) & + q_prim_vf(momxb)%sf(k, l + r, q)*fd_coeff_y_h(r, l) @@ -162,7 +162,7 @@ contains ! 3D if (ndirs == 3) then - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -173,11 +173,11 @@ contains end do !$acc end parallel loop - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do q = 0, p do l = 0, n do k = 0, m - $:LOOP() + $:GPU_LOOP() do r = -fd_number, fd_number du_dz(k, l, q) = du_dz(k, l, q) & + q_prim_vf(momxb)%sf(k, l, q + r)*fd_coeff_z_h(r, q) @@ -197,7 +197,7 @@ contains end if end if - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -221,7 +221,7 @@ contains end do ! apply rhs source term to elastic stress equation - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -235,7 +235,7 @@ contains end do elseif (idir == 2) then - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -270,7 +270,7 @@ contains end do elseif (idir == 3) then - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -338,7 +338,7 @@ contains if (cyl_coord .and. idir == 2) then - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -400,13 +400,13 @@ contains if (n == 0) then l = 0; q = 0 - $:PARALLEL_LOOP() + $:GPU_PARALLEL_LOOP() do k = 0, m rhs_vf(damage_idx)%sf(k, l, q) = (alpha_bar*max(abs(q_cons_vf(stress_idx%beg)%sf(k, l, q)) - tau_star, 0._wp))**cont_damage_s end do elseif (p == 0) then q = 0 - $:PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(collapse=2) do l = 0, n do k = 0, m ! Maximum principal stress @@ -420,7 +420,7 @@ contains end do end do else - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do q = 0, p do l = 0, n do k = 0, m diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 1db453b8a0..bc9a9e401e 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -36,15 +36,15 @@ module m_ibm type(integer_field), public :: ib_markers type(levelset_field), public :: levelset type(levelset_norm_field), public :: levelset_norm - $:DECLARE(create=["ib_markers","levelset","levelset_norm"]) + $:GPU_DECLARE(create=["ib_markers","levelset","levelset_norm"]) type(ghost_point), dimension(:), allocatable :: ghost_points type(ghost_point), dimension(:), allocatable :: inner_points - $:DECLARE(create=["ghost_points","inner_points"]) + $:GPU_DECLARE(create=["ghost_points","inner_points"]) integer :: num_gps !< Number of ghost points integer :: num_inner_gps !< Number of ghost points - $:DECLARE(create=["gp_layers","num_gps","num_inner_gps"]) + $:GPU_DECLARE(create=["gp_layers","num_gps","num_inner_gps"]) contains @@ -81,31 +81,31 @@ contains integer :: i, j, k - $:UPDATE(device=["ib_markers%sf"]) - $:UPDATE(device=["levelset%sf"]) - $:UPDATE(device=["levelset_norm%sf"]) + $:GPU_UPDATE(device=["ib_markers%sf"]) + $:GPU_UPDATE(device=["levelset%sf"]) + $:GPU_UPDATE(device=["levelset_norm%sf"]) ! Get neighboring IB variables from other processors call s_mpi_sendrecv_ib_buffers(ib_markers, gp_layers) - $:UPDATE(host=["ib_markers%sf"]) + $:GPU_UPDATE(host=["ib_markers%sf"]) call s_find_num_ghost_points(num_gps, num_inner_gps) - $:UPDATE(device=["num_gps", "num_inner_gps"]) + $:GPU_UPDATE(device=["num_gps", "num_inner_gps"]) @:ALLOCATE(ghost_points(1:num_gps)) @:ALLOCATE(inner_points(1:num_inner_gps)) !$acc enter data copyin(ghost_points, inner_points) call s_find_ghost_points(ghost_points, inner_points) - $:UPDATE(device=["ghost_points", "inner_points"]) + $:GPU_UPDATE(device=["ghost_points", "inner_points"]) call s_compute_image_points(ghost_points, levelset, levelset_norm) - $:UPDATE(device=["ghost_points"]) + $:GPU_UPDATE(device=["ghost_points"]) call s_compute_interpolation_coeffs(ghost_points) - $:UPDATE(device=["ghost_points"]) + $:GPU_UPDATE(device=["ghost_points"]) end subroutine s_ibm_setup @@ -153,7 +153,7 @@ contains type(ghost_point) :: gp type(ghost_point) :: innerp - $:PARALLEL_LOOP(private=["physical_loc","dyn_pres","alpha_rho_IP", & + $:GPU_PARALLEL_LOOP(private=["physical_loc","dyn_pres","alpha_rho_IP", & & "alpha_IP","pres_IP","vel_IP","vel_g","vel_norm_IP","r_IP", & & "v_IP","pb_IP","mv_IP","nmom_IP","presb_IP","massv_IP","rho", & & "gamma","pi_inf","Re_K","G_K","Gs","gp","innerp","norm","buf", & @@ -194,7 +194,7 @@ contains dyn_pres = 0._wp ! Set q_prim_vf params at GP so that mixture vars calculated properly - $:LOOP() + $:GPU_LOOP() do q = 1, num_fluids q_prim_vf(q)%sf(j, k, l) = alpha_rho_IP(q) q_prim_vf(advxb + q - 1)%sf(j, k, l) = alpha_IP(q) @@ -230,7 +230,7 @@ contains end if ! Set momentum - $:LOOP() + $:GPU_LOOP() do q = momxb, momxe q_cons_vf(q)%sf(j, k, l) = rho*vel_g(q - momxb + 1) dyn_pres = dyn_pres + q_cons_vf(q)%sf(j, k, l)* & @@ -238,7 +238,7 @@ contains end do ! Set continuity and adv vars - $:LOOP() + $:GPU_LOOP() do q = 1, num_fluids q_cons_vf(q)%sf(j, k, l) = alpha_rho_IP(q) q_cons_vf(advxb + q - 1)%sf(j, k, l) = alpha_IP(q) @@ -292,7 +292,7 @@ contains end if if (model_eqns == 3) then - $:LOOP() + $:GPU_LOOP() do q = intxb, intxe q_cons_vf(q)%sf(j, k, l) = alpha_IP(q - intxb + 1)*(gammas(q - intxb + 1)*pres_IP & + pi_infs(q - intxb + 1)) @@ -301,7 +301,7 @@ contains end do !Correct the state of the inner points in IBs - $:PARALLEL_LOOP(private=["physical_loc","dyn_pres","alpha_rho_IP", & + $:GPU_PARALLEL_LOOP(private=["physical_loc","dyn_pres","alpha_rho_IP", & & "alpha_IP","vel_g","rho","gamma","pi_inf","Re_K","innerp", & & "j","k","l","q"]) do i = 1, num_inner_gps @@ -320,7 +320,7 @@ contains physical_loc = [x_cc(j), y_cc(k), 0._wp] end if - $:LOOP() + $:GPU_LOOP() do q = 1, num_fluids q_prim_vf(q)%sf(j, k, l) = alpha_rho_IP(q) q_prim_vf(advxb + q - 1)%sf(j, k, l) = alpha_IP(q) @@ -335,7 +335,7 @@ contains dyn_pres = 0._wp - $:LOOP() + $:GPU_LOOP() do q = momxb, momxe q_cons_vf(q)%sf(j, k, l) = rho*vel_g(q - momxb + 1) dyn_pres = dyn_pres + q_cons_vf(q)%sf(j, k, l)* & @@ -751,7 +751,7 @@ contains !> Function that uses the interpolation coefficients and the current state !! at the cell centers in order to estimate the state at the image point pure subroutine s_interpolate_image_point(q_prim_vf, gp, alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP, r_IP, v_IP, pb_IP, mv_IP, nmom_IP, pb, mv, presb_IP, massv_IP) - $:ROUTINE() + $:GPU_ROUTINE() type(scalar_field), & dimension(sys_size), & intent(IN) :: q_prim_vf !< Primitive Variables @@ -804,11 +804,11 @@ contains end if end if - $:LOOP() + $:GPU_LOOP() do i = i1, i2 - $:LOOP() + $:GPU_LOOP() do j = j1, j2 - $:LOOP() + $:GPU_LOOP() do k = k1, k2 coeff = gp%interp_coeffs(i - i1 + 1, j - j1 + 1, k - k1 + 1) @@ -816,13 +816,13 @@ contains pres_IP = pres_IP + coeff* & q_prim_vf(E_idx)%sf(i, j, k) - $:LOOP() + $:GPU_LOOP() do q = momxb, momxe vel_IP(q + 1 - momxb) = vel_IP(q + 1 - momxb) + coeff* & q_prim_vf(q)%sf(i, j, k) end do - $:LOOP() + $:GPU_LOOP() do l = contxb, contxe alpha_rho_IP(l) = alpha_rho_IP(l) + coeff* & q_prim_vf(l)%sf(i, j, k) @@ -835,7 +835,7 @@ contains end if if (bubbles_euler .and. .not. qbmm) then - $:LOOP() + $:GPU_LOOP() do l = 1, nb if (polytropic) then r_IP(l) = r_IP(l) + coeff*q_prim_vf(bubxb + (l - 1)*2)%sf(i, j, k) diff --git a/src/simulation/m_mhd.fpp b/src/simulation/m_mhd.fpp index 6a6821e602..88f8621dbd 100644 --- a/src/simulation/m_mhd.fpp +++ b/src/simulation/m_mhd.fpp @@ -25,12 +25,12 @@ module m_mhd real(wp), allocatable, dimension(:, :, :) :: du_dx, du_dy, du_dz real(wp), allocatable, dimension(:, :, :) :: dv_dx, dv_dy, dv_dz real(wp), allocatable, dimension(:, :, :) :: dw_dx, dw_dy, dw_dz - $:DECLARE(create=["du_dx","du_dy","du_dz","dv_dx","dv_dy","dv_dz","dw_dx","dw_dy","dw_dz"]) + $:GPU_DECLARE(create=["du_dx","du_dy","du_dz","dv_dx","dv_dy","dv_dz","dw_dx","dw_dy","dw_dz"]) real(wp), allocatable, dimension(:, :) :: fd_coeff_x_h real(wp), allocatable, dimension(:, :) :: fd_coeff_y_h real(wp), allocatable, dimension(:, :) :: fd_coeff_z_h - $:DECLARE(create=["fd_coeff_x_h","fd_coeff_y_h","fd_coeff_z_h"]) + $:GPU_DECLARE(create=["fd_coeff_x_h","fd_coeff_y_h","fd_coeff_z_h"]) contains @@ -53,12 +53,12 @@ contains ! Computing centered finite difference coefficients call s_compute_finite_difference_coefficients(m, x_cc, fd_coeff_x_h, buff_size, fd_number, fd_order) - $:UPDATE(device=["fd_coeff_x_h"]) + $:GPU_UPDATE(device=["fd_coeff_x_h"]) call s_compute_finite_difference_coefficients(n, y_cc, fd_coeff_y_h, buff_size, fd_number, fd_order) - $:UPDATE(device=["fd_coeff_y_h"]) + $:GPU_UPDATE(device=["fd_coeff_y_h"]) if (p > 0) then call s_compute_finite_difference_coefficients(p, z_cc, fd_coeff_z_h, buff_size, fd_number, fd_order) - $:UPDATE(device=["fd_coeff_z_h"]) + $:GPU_UPDATE(device=["fd_coeff_z_h"]) end if end subroutine s_initialize_mhd_powell_module @@ -77,22 +77,22 @@ contains real(wp), dimension(3) :: v, B real(wp) :: divB, vdotB - $:PARALLEL_LOOP(collapse=3, private=["v", "B"]) + $:GPU_PARALLEL_LOOP(collapse=3, private=["v", "B"]) do q = 0, p do l = 0, n do k = 0, m divB = 0._wp - $:LOOP() + $:GPU_LOOP() do r = -fd_number, fd_number divB = divB + q_prim_vf(B_idx%beg)%sf(k + r, l, q)*fd_coeff_x_h(r, k) end do - $:LOOP() + $:GPU_LOOP() do r = -fd_number, fd_number divB = divB + q_prim_vf(B_idx%beg + 1)%sf(k, l + r, q)*fd_coeff_y_h(r, l) end do if (p > 0) then - $:LOOP() + $:GPU_LOOP() do r = -fd_number, fd_number divB = divB + q_prim_vf(B_idx%beg + 2)%sf(k, l, q + r)*fd_coeff_z_h(r, q) end do diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index f271cb696e..c8e943e8d9 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -47,7 +47,7 @@ module m_mpi_proxy !> @{ integer, private :: err_code, ierr, v_size !> @} - $:DECLARE(create=["v_size"]) + $:GPU_DECLARE(create=["v_size"]) contains @@ -799,7 +799,7 @@ contains if (bc_x%end >= 0) then ! PBC at the beginning and end ! Packing buffer to be sent to bc_x%end - $:PARALLEL_LOOP(collapse=3,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=3,private=["r"]) do l = 0, p do k = 0, n do j = m - gp_layers + 1, m @@ -835,7 +835,7 @@ contains else #endif - $:UPDATE(host=["ib_buff_send","ib_buff_send"]) + $:GPU_UPDATE(host=["ib_buff_send","ib_buff_send"]) ! Send/receive buffer to/from bc_x%end/bc_x%beg call MPI_SENDRECV( & @@ -854,7 +854,7 @@ contains else ! PBC at the beginning only ! Packing buffer to be sent to bc_x%beg - $:PARALLEL_LOOP(collapse=3,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=3,private=["r"]) do l = 0, p do k = 0, n do j = 0, gp_layers - 1 @@ -889,7 +889,7 @@ contains !$acc wait else #endif - $:UPDATE(host=["ib_buff_send"]) + $:GPU_UPDATE(host=["ib_buff_send"]) ! Send/receive buffer to/from bc_x%end/bc_x%beg call MPI_SENDRECV( & @@ -909,12 +909,12 @@ contains #if defined(MFC_OpenACC) if (rdma_mpi .eqv. .false.) then - $:UPDATE(device=["ib_buff_recv"]) + $:GPU_UPDATE(device=["ib_buff_recv"]) end if #endif ! Unpacking buffer received from bc_x%beg - $:PARALLEL_LOOP(collapse=3,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=3,private=["r"]) do l = 0, p do k = 0, n do j = -gp_layers, -1 @@ -930,7 +930,7 @@ contains if (bc_x%beg >= 0) then ! PBC at the end and beginning - $:PARALLEL_LOOP(collapse=3,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=3,private=["r"]) ! Packing buffer to be sent to bc_x%beg do l = 0, p do k = 0, n @@ -967,7 +967,7 @@ contains else #endif - $:UPDATE(host=["ib_buff_send"]) + $:GPU_UPDATE(host=["ib_buff_send"]) call MPI_SENDRECV( & ib_buff_send(0), & gp_layers*(n + 1)*(p + 1), & @@ -984,7 +984,7 @@ contains else ! PBC at the end only ! Packing buffer to be sent to bc_x%end - $:PARALLEL_LOOP(collapse=3,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=3,private=["r"]) do l = 0, p do k = 0, n do j = m - gp_layers + 1, m @@ -1020,7 +1020,7 @@ contains else #endif - $:UPDATE(host=["ib_buff_send"]) + $:GPU_UPDATE(host=["ib_buff_send"]) call MPI_SENDRECV( & ib_buff_send(0), & @@ -1038,11 +1038,11 @@ contains end if if (rdma_mpi .eqv. .false.) then - $:UPDATE(device=["ib_buff_recv"]) + $:GPU_UPDATE(device=["ib_buff_recv"]) end if ! Unpacking buffer received from bc_x%end - $:PARALLEL_LOOP(collapse=3,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=3,private=["r"]) do l = 0, p do k = 0, n do j = m + 1, m + gp_layers @@ -1062,7 +1062,7 @@ contains if (bc_y%end >= 0) then ! PBC at the beginning and end ! Packing buffer to be sent to bc_y%end - $:PARALLEL_LOOP(collapse=3,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=3,private=["r"]) do l = 0, p do k = n - gp_layers + 1, n do j = -gp_layers, m + gp_layers @@ -1099,7 +1099,7 @@ contains else #endif - $:UPDATE(host=["ib_buff_send"]) + $:GPU_UPDATE(host=["ib_buff_send"]) ! Send/receive buffer to/from bc_x%end/bc_x%beg call MPI_SENDRECV( & @@ -1118,7 +1118,7 @@ contains else ! PBC at the beginning only ! Packing buffer to be sent to bc_y%beg - $:PARALLEL_LOOP(collapse=3,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=3,private=["r"]) do l = 0, p do k = 0, gp_layers - 1 do j = -gp_layers, m + gp_layers @@ -1155,7 +1155,7 @@ contains else #endif - $:UPDATE(host=["ib_buff_send"]) + $:GPU_UPDATE(host=["ib_buff_send"]) ! Send/receive buffer to/from bc_x%end/bc_x%beg call MPI_SENDRECV( & @@ -1175,12 +1175,12 @@ contains #if defined(MFC_OpenACC) if (rdma_mpi .eqv. .false.) then - $:UPDATE(device=["ib_buff_recv"]) + $:GPU_UPDATE(device=["ib_buff_recv"]) end if #endif ! Unpacking buffer received from bc_y%beg - $:PARALLEL_LOOP(collapse=3,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=3,private=["r"]) do l = 0, p do k = -gp_layers, -1 do j = -gp_layers, m + gp_layers @@ -1198,7 +1198,7 @@ contains if (bc_y%beg >= 0) then ! PBC at the end and beginning ! Packing buffer to be sent to bc_y%beg - $:PARALLEL_LOOP(collapse=3,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=3,private=["r"]) do l = 0, p do k = 0, gp_layers - 1 do j = -gp_layers, m + gp_layers @@ -1235,7 +1235,7 @@ contains else #endif - $:UPDATE(host=["ib_buff_send"]) + $:GPU_UPDATE(host=["ib_buff_send"]) ! Send/receive buffer to/from bc_x%end/bc_x%beg call MPI_SENDRECV( & @@ -1254,7 +1254,7 @@ contains else ! PBC at the end only ! Packing buffer to be sent to bc_y%end - $:PARALLEL_LOOP(collapse=3,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=3,private=["r"]) do l = 0, p do k = n - gp_layers + 1, n do j = -gp_layers, m + gp_layers @@ -1291,7 +1291,7 @@ contains else #endif - $:UPDATE(host=["ib_buff_send"]) + $:GPU_UPDATE(host=["ib_buff_send"]) ! Send/receive buffer to/from bc_x%end/bc_x%beg call MPI_SENDRECV( & @@ -1311,12 +1311,12 @@ contains #if defined(MFC_OpenACC) if (rdma_mpi .eqv. .false.) then - $:UPDATE(device=["ib_buff_recv"]) + $:GPU_UPDATE(device=["ib_buff_recv"]) end if #endif ! Unpacking buffer received form bc_y%end - $:PARALLEL_LOOP(collapse=3,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=3,private=["r"]) do l = 0, p do k = n + 1, n + gp_layers do j = -gp_layers, m + gp_layers @@ -1336,7 +1336,7 @@ contains if (bc_z%end >= 0) then ! PBC at the beginning and end ! Packing buffer to be sent to bc_z%end - $:PARALLEL_LOOP(collapse=3,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=3,private=["r"]) do l = p - gp_layers + 1, p do k = -gp_layers, n + gp_layers do j = -gp_layers, m + gp_layers @@ -1374,7 +1374,7 @@ contains else #endif - $:UPDATE(host=["ib_buff_send"]) + $:GPU_UPDATE(host=["ib_buff_send"]) ! Send/receive buffer to/from bc_x%end/bc_x%beg call MPI_SENDRECV( & @@ -1393,7 +1393,7 @@ contains else ! PBC at the beginning only ! Packing buffer to be sent to bc_z%beg - $:PARALLEL_LOOP(collapse=3,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=3,private=["r"]) do l = 0, gp_layers - 1 do k = -gp_layers, n + gp_layers do j = -gp_layers, m + gp_layers @@ -1430,7 +1430,7 @@ contains else #endif - $:UPDATE(host=["ib_buff_send"]) + $:GPU_UPDATE(host=["ib_buff_send"]) ! Send/receive buffer to/from bc_x%end/bc_x%beg call MPI_SENDRECV( & @@ -1450,12 +1450,12 @@ contains #if defined(MFC_OpenACC) if (rdma_mpi .eqv. .false.) then - $:UPDATE(device=["ib_buff_recv"]) + $:GPU_UPDATE(device=["ib_buff_recv"]) end if #endif ! Unpacking buffer from bc_z%beg - $:PARALLEL_LOOP(collapse=3,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=3,private=["r"]) do l = -gp_layers, -1 do k = -gp_layers, n + gp_layers do j = -gp_layers, m + gp_layers @@ -1474,7 +1474,7 @@ contains if (bc_z%beg >= 0) then ! PBC at the end and beginning ! Packing buffer to be sent to bc_z%beg - $:PARALLEL_LOOP(collapse=3,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=3,private=["r"]) do l = 0, gp_layers - 1 do k = -gp_layers, n + gp_layers do j = -gp_layers, m + gp_layers @@ -1510,7 +1510,7 @@ contains !$acc wait else #endif - $:UPDATE(host=["ib_buff_send"]) + $:GPU_UPDATE(host=["ib_buff_send"]) ! Send/receive buffer to/from bc_x%end/bc_x%beg call MPI_SENDRECV( & @@ -1529,7 +1529,7 @@ contains else ! PBC at the end only ! Packing buffer to be sent to bc_z%end - $:PARALLEL_LOOP(collapse=3,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=3,private=["r"]) do l = p - gp_layers + 1, p do k = -gp_layers, n + gp_layers do j = -gp_layers, m + gp_layers @@ -1566,7 +1566,7 @@ contains !$acc wait else #endif - $:UPDATE(host=["ib_buff_send"]) + $:GPU_UPDATE(host=["ib_buff_send"]) ! Send/receive buffer to/from bc_x%end/bc_x%beg call MPI_SENDRECV( & @@ -1586,12 +1586,12 @@ contains #if defined(MFC_OpenACC) if (rdma_mpi .eqv. .false.) then - $:UPDATE(device=["ib_buff_recv"]) + $:GPU_UPDATE(device=["ib_buff_recv"]) end if #endif ! Unpacking buffer received from bc_z%end - $:PARALLEL_LOOP(collapse=3,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=3,private=["r"]) do l = p + 1, p + gp_layers do k = -gp_layers, n + gp_layers do j = -gp_layers, m + gp_layers diff --git a/src/simulation/m_qbmm.fpp b/src/simulation/m_qbmm.fpp index dbc78a69d1..86ef374e1c 100644 --- a/src/simulation/m_qbmm.fpp +++ b/src/simulation/m_qbmm.fpp @@ -26,21 +26,21 @@ module m_qbmm private; public :: s_initialize_qbmm_module, s_mom_inv, s_coeff, s_compute_qbmm_rhs real(wp), allocatable, dimension(:, :, :, :, :) :: momrhs - $:DECLARE(create=["momrhs"]) + $:GPU_DECLARE(create=["momrhs"]) #:if MFC_CASE_OPTIMIZATION integer, parameter :: nterms = ${nterms}$ #:else integer :: nterms - $:DECLARE(create=["nterms"]) + $:GPU_DECLARE(create=["nterms"]) #:endif type(int_bounds_info) :: is1_qbmm, is2_qbmm, is3_qbmm - $:DECLARE(create=["is1_qbmm","is2_qbmm","is3_qbmm"]) + $:GPU_DECLARE(create=["is1_qbmm","is2_qbmm","is3_qbmm"]) integer, allocatable, dimension(:) :: bubrs integer, allocatable, dimension(:, :) :: bubmoms - $:DECLARE(create=["bubrs","bubmoms"]) + $:GPU_DECLARE(create=["bubrs","bubmoms"]) contains @@ -59,7 +59,7 @@ contains end if !$acc enter data copyin(nterms) - $:UPDATE(device=["nterms"]) + $:GPU_UPDATE(device=["nterms"]) #:endif @@ -393,7 +393,7 @@ contains end do end if - $:UPDATE(device=["momrhs"]) + $:GPU_UPDATE(device=["momrhs"]) @:ALLOCATE(bubrs(1:nb)) @:ALLOCATE(bubmoms(1:nb, 1:nmom)) @@ -401,14 +401,14 @@ contains do i = 1, nb bubrs(i) = bub_idx%rs(i) end do - $:UPDATE(device=["bubrs"]) + $:GPU_UPDATE(device=["bubrs"]) do j = 1, nmom do i = 1, nb bubmoms(i, j) = bub_idx%moms(i, j) end do end do - $:UPDATE(device=["bubmoms"]) + $:GPU_UPDATE(device=["bubmoms"]) end subroutine s_initialize_qbmm_module @@ -434,7 +434,7 @@ contains end select if (.not. polytropic) then - $:PARALLEL_LOOP(collapse=5,private=["nb_q","nR","nR2","R","R2","nb_dot","nR_dot","nR2_dot","var","AX"]) + $:GPU_PARALLEL_LOOP(collapse=5,private=["nb_q","nR","nR2","R","R2","nb_dot","nR_dot","nR2_dot","var","AX"]) do i = 1, nb do q = 1, nnode do l = 0, p @@ -567,7 +567,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_coeff_nonpoly #else - $:ROUTINE() + $:GPU_ROUTINE() #endif real(wp), intent(in) :: pres, rho, c real(wp), dimension(nterms, 0:2, 0:2), intent(out) :: coeffs @@ -640,7 +640,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_coeff #else - $:ROUTINE() + $:GPU_ROUTINE() #endif real(wp), intent(in) :: pres, rho, c @@ -716,9 +716,9 @@ contains integer :: id1, id2, id3, i1, i2, j, q, r is1_qbmm = ix; is2_qbmm = iy; is3_qbmm = iz - $:UPDATE(device=["is1_qbmm","is2_qbmm","is3_qbmm"]) + $:GPU_UPDATE(device=["is1_qbmm","is2_qbmm","is3_qbmm"]) - $:PARALLEL_LOOP(collapse=3, private=["moms", "msum", "wght", "abscX", & + $:GPU_PARALLEL_LOOP(collapse=3, private=["moms", "msum", "wght", "abscX", & "abscY", "wght_pb", "wght_mv", "wght_ht", "coeff", "ht", "r", "q", & "n_tait", "B_tait", "pres", "rho", "nbub", "c", "alf", "momsum", & "drdt", "drdt2", "chi_vw", "x_vw", "rho_mw", "k_mw", "T_bar", "grad_T"]) @@ -741,10 +741,10 @@ contains if (alf > small_alf) then nbub = q_cons_vf(bubxb)%sf(id1, id2, id3) - $:LOOP() + $:GPU_LOOP() do q = 1, nb ! Gather moments for this bubble bin - $:LOOP() + $:GPU_LOOP() do r = 2, nmom moms(r) = q_prim_vf(bubmoms(q, r))%sf(id1, id2, id3) end do @@ -752,12 +752,12 @@ contains call s_chyqmom(moms, wght(:, q), abscX(:, q), abscY(:, q)) if (polytropic) then - $:LOOP() + $:GPU_LOOP() do j = 1, nnode wght_pb(j, q) = wght(j, q)*(pb0(q) - pv) end do else - $:LOOP() + $:GPU_LOOP() do j = 1, nnode chi_vw = 1._wp/(1._wp + R_v/R_n*(pb(id1, id2, id3, j, q)/pv - 1._wp)) x_vw = M_n*chi_vw/(M_v + (M_n - M_v)*chi_vw) @@ -776,13 +776,13 @@ contains ! Compute change in moments due to bubble dynamics r = 1 - $:LOOP() + $:GPU_LOOP() do i2 = 0, 2 - $:LOOP() + $:GPU_LOOP() do i1 = 0, 2 if ((i1 + i2) <= 2) then momsum = 0._wp - $:LOOP() + $:GPU_LOOP() do j = 1, nterms select case (bubble_model) case (3) @@ -812,7 +812,7 @@ contains ! Compute change in pb and mv for non-polytropic model if (.not. polytropic) then - $:LOOP() + $:GPU_LOOP() do j = 1, nnode drdt = msum(2) drdt2 = merge(-1._wp, 1._wp, j == 1 .or. j == 2)/(2._wp*sqrt(merge(moms(4) - moms(2)**2._wp, verysmall, moms(4) - moms(2)**2._wp > 0._wp))) @@ -840,11 +840,11 @@ contains end if end if else - $:LOOP() + $:GPU_LOOP() do q = 1, nb - $:LOOP() + $:GPU_LOOP() do i1 = 0, 2 - $:LOOP() + $:GPU_LOOP() do i2 = 0, 2 moms3d(i1, i2, q)%sf(id1, id2, id3) = 0._wp end do @@ -865,7 +865,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_chyqmom #else - $:ROUTINE() + $:GPU_ROUTINE() #endif real(wp), intent(in) :: pres, rho, c real(wp), dimension(nterms, 0:2, 0:2), intent(out) :: coeff @@ -881,7 +881,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_chyqmom #else - $:ROUTINE() + $:GPU_ROUTINE() #endif real(wp), dimension(nmom), intent(in) :: momin real(wp), dimension(nnode), intent(inout) :: wght, abscX, abscY @@ -941,7 +941,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_hyqmom #else - $:ROUTINE() + $:GPU_ROUTINE() #endif real(wp), dimension(2), intent(inout) :: frho, fup real(wp), dimension(3), intent(in) :: fmom @@ -960,7 +960,7 @@ contains end subroutine s_hyqmom pure function f_quad(abscX, abscY, wght_in, q, r, s) - $:ROUTINE() + $:GPU_ROUTINE() real(wp), dimension(nnode, nb), intent(in) :: abscX, abscY, wght_in real(wp), intent(in) :: q, r, s @@ -976,7 +976,7 @@ contains end function f_quad pure function f_quad2D(abscX, abscY, wght_in, pow) - $:ROUTINE() + $:GPU_ROUTINE() real(wp), dimension(nnode), intent(in) :: abscX, abscY, wght_in real(wp), dimension(3), intent(in) :: pow diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index e3023de18c..d87ac93811 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -76,13 +76,13 @@ module m_rhs !! conservative variables, which are located in q_cons_vf, at cell-interior !! Gaussian quadrature points (QP). type(vector_field) :: q_cons_qp !< - $:DECLARE(create=["q_cons_qp"]) + $:GPU_DECLARE(create=["q_cons_qp"]) !! The primitive variables at cell-interior Gaussian quadrature points. These !! are calculated from the conservative variables and gradient magnitude (GM) !! of the volume fractions, q_cons_qp and gm_alpha_qp, respectively. type(vector_field) :: q_prim_qp !< - $:DECLARE(create=["q_prim_qp"]) + $:GPU_DECLARE(create=["q_prim_qp"]) !> @name The first-order spatial derivatives of the primitive variables at cell- !! interior Gaussian quadrature points. These are WENO-reconstructed from @@ -91,7 +91,7 @@ module m_rhs !! of the primitive variables, located in qK_prim_n, where K = L or R. !> @{ type(vector_field), allocatable, dimension(:) :: dq_prim_dx_qp, dq_prim_dy_qp, dq_prim_dz_qp - $:DECLARE(create=["dq_prim_dx_qp","dq_prim_dy_qp","dq_prim_dz_qp"]) + $:GPU_DECLARE(create=["dq_prim_dx_qp","dq_prim_dy_qp","dq_prim_dz_qp"]) !> @} !> @name The left and right WENO-reconstructed cell-boundary values of the cell- @@ -101,26 +101,26 @@ module m_rhs !> @{ type(vector_field), allocatable, dimension(:) :: dqL_prim_dx_n, dqL_prim_dy_n, dqL_prim_dz_n type(vector_field), allocatable, dimension(:) :: dqR_prim_dx_n, dqR_prim_dy_n, dqR_prim_dz_n - $:DECLARE(create=["dqL_prim_dx_n","dqL_prim_dy_n","dqL_prim_dz_n"]) - $:DECLARE(create=["dqR_prim_dx_n","dqR_prim_dy_n","dqR_prim_dz_n"]) + $:GPU_DECLARE(create=["dqL_prim_dx_n","dqL_prim_dy_n","dqL_prim_dz_n"]) + $:GPU_DECLARE(create=["dqR_prim_dx_n","dqR_prim_dy_n","dqR_prim_dz_n"]) !> @} type(scalar_field), allocatable, dimension(:) :: tau_Re_vf - $:DECLARE(create=["tau_Re_vf"]) + $:GPU_DECLARE(create=["tau_Re_vf"]) type(vector_field) :: gm_alpha_qp !< !! The gradient magnitude of the volume fractions at cell-interior Gaussian !! quadrature points. gm_alpha_qp is calculated from individual first-order !! spatial derivatives located in dq_prim_ds_qp. - $:DECLARE(create=["gm_alpha_qp"]) + $:GPU_DECLARE(create=["gm_alpha_qp"]) !> @name The left and right WENO-reconstructed cell-boundary values of the cell- !! average gradient magnitude of volume fractions, located in gm_alpha_qp. !> @{ type(vector_field), allocatable, dimension(:) :: gm_alphaL_n type(vector_field), allocatable, dimension(:) :: gm_alphaR_n - $:DECLARE(create=["gm_alphaL_n","gm_alphaR_n"]) + $:GPU_DECLARE(create=["gm_alphaL_n","gm_alphaR_n"]) !> @} !> @name The cell-boundary values of the fluxes (src - source, gsrc - geometrical @@ -130,38 +130,38 @@ module m_rhs type(vector_field), allocatable, dimension(:) :: flux_n type(vector_field), allocatable, dimension(:) :: flux_src_n type(vector_field), allocatable, dimension(:) :: flux_gsrc_n - $:DECLARE(create=["flux_n","flux_src_n","flux_gsrc_n"]) + $:GPU_DECLARE(create=["flux_n","flux_src_n","flux_gsrc_n"]) !> @} type(vector_field), allocatable, dimension(:) :: qL_prim, qR_prim - $:DECLARE(create=["qL_prim","qR_prim"]) + $:GPU_DECLARE(create=["qL_prim","qR_prim"]) type(int_bounds_info) :: iv !< Vector field indical bounds - $:DECLARE(create=["iv"]) + $:GPU_DECLARE(create=["iv"]) !> @name Indical bounds in the x-, y- and z-directions !> @{ type(int_bounds_info) :: irx, iry, irz - $:DECLARE(create=["irx","iry","irz"]) + $:GPU_DECLARE(create=["irx","iry","irz"]) type(int_bounds_info) :: is1, is2, is3 - $:DECLARE(create=["is1","is2","is3"]) + $:GPU_DECLARE(create=["is1","is2","is3"]) !> @name Saved fluxes for testing !> @{ type(scalar_field) :: alf_sum !> @} - $:DECLARE(create=["alf_sum"]) + $:GPU_DECLARE(create=["alf_sum"]) real(wp), allocatable, dimension(:, :, :) :: blkmod1, blkmod2, alpha1, alpha2, Kterm real(wp), allocatable, dimension(:, :, :, :) :: qL_rsx_vf, qL_rsy_vf, qL_rsz_vf, qR_rsx_vf, qR_rsy_vf, qR_rsz_vf real(wp), allocatable, dimension(:, :, :, :) :: dqL_rsx_vf, dqL_rsy_vf, dqL_rsz_vf, dqR_rsx_vf, dqR_rsy_vf, dqR_rsz_vf - $:DECLARE(create=["blkmod1","blkmod2","alpha1","alpha2","Kterm"]) - $:DECLARE(create=["qL_rsx_vf","qL_rsy_vf","qL_rsz_vf","qR_rsx_vf","qR_rsy_vf","qR_rsz_vf"]) - $:DECLARE(create=["dqL_rsx_vf","dqL_rsy_vf","dqL_rsz_vf","dqR_rsx_vf","dqR_rsy_vf","dqR_rsz_vf"]) + $:GPU_DECLARE(create=["blkmod1","blkmod2","alpha1","alpha2","Kterm"]) + $:GPU_DECLARE(create=["qL_rsx_vf","qL_rsy_vf","qL_rsz_vf","qR_rsx_vf","qR_rsy_vf","qR_rsz_vf"]) + $:GPU_DECLARE(create=["dqL_rsx_vf","dqL_rsy_vf","dqL_rsz_vf","dqR_rsx_vf","dqR_rsy_vf","dqR_rsz_vf"]) real(wp), allocatable, dimension(:, :, :) :: nbub !< Bubble number density - $:DECLARE(create=["nbub"]) + $:GPU_DECLARE(create=["nbub"]) contains @@ -175,7 +175,7 @@ contains integer :: num_eqns_after_adv !$acc enter data copyin(idwbuff, idwbuff) - $:UPDATE(device=["idwbuff", "idwbuff"]) + $:GPU_UPDATE(device=["idwbuff", "idwbuff"]) @:ALLOCATE(q_cons_qp%vf(1:sys_size)) @:ALLOCATE(q_prim_qp%vf(1:sys_size)) @@ -569,7 +569,7 @@ contains call s_initialize_pressure_relaxation_module - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do id = 1, num_dims do i = 1, sys_size do l = idwbuff(3)%beg, idwbuff(3)%end @@ -609,7 +609,7 @@ contains call cpu_time(t_start) ! Association/Population of Working Variables - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end @@ -623,16 +623,16 @@ contains ! Converting Conservative to Primitive Variables if (mpp_lim .and. bubbles_euler) then - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end do j = idwbuff(1)%beg, idwbuff(1)%end alf_sum%sf(j, k, l) = 0._wp - $:LOOP() + $:GPU_LOOP() do i = advxb, advxe - 1 alf_sum%sf(j, k, l) = alf_sum%sf(j, k, l) + q_cons_qp%vf(i)%sf(j, k, l) end do - $:LOOP() + $:GPU_LOOP() do i = advxb, advxe - 1 q_cons_qp%vf(i)%sf(j, k, l) = q_cons_qp%vf(i)%sf(j, k, l)*(1._wp - q_cons_qp%vf(alf_idx)%sf(j, k, l)) & /alf_sum%sf(j, k, l) @@ -841,7 +841,7 @@ contains ! END: Dimensional Splitting Loop if (ib) then - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -905,7 +905,7 @@ contains ! END: Additional physics and source terms if (run_time_info .or. probe_wrt .or. ib .or. bubbles_lagrange) then - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end @@ -945,7 +945,7 @@ contains real(wp) :: advected_qty_val, pressure_val, velocity_val if (alt_soundspeed) then - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do q_loop = 0, p do l_loop = 0, n do k_loop = 0, m @@ -979,7 +979,7 @@ contains call s_cbc(q_prim_vf%vf, flux_n(idir)%vf, flux_src_n_vf%vf, idir, 1, irx, iry, irz) end if - $:PARALLEL_LOOP(collapse=4,private=["inv_ds","flux_face1","flux_face2"]) + $:GPU_PARALLEL_LOOP(collapse=4,private=["inv_ds","flux_face1","flux_face2"]) do j = 1, sys_size do q_loop = 0, p do l_loop = 0, n @@ -994,7 +994,7 @@ contains end do if (model_eqns == 3) then - $:PARALLEL_LOOP(collapse=4,private=["inv_ds","advected_qty_val", & + $:GPU_PARALLEL_LOOP(collapse=4,private=["inv_ds","advected_qty_val", & & "pressure_val","flux_face1","flux_face2"]) do q_loop = 0, p do l_loop = 0, n @@ -1024,7 +1024,7 @@ contains call s_cbc(q_prim_vf%vf, flux_n(idir)%vf, flux_src_n_vf%vf, idir, 1, irx, iry, irz) end if - $:PARALLEL_LOOP(collapse=4,private=["inv_ds","flux_face1","flux_face2"]) + $:GPU_PARALLEL_LOOP(collapse=4,private=["inv_ds","flux_face1","flux_face2"]) do j = 1, sys_size do l = 0, p do k = 0, n @@ -1039,7 +1039,7 @@ contains end do if (model_eqns == 3) then - $:PARALLEL_LOOP(collapse=4,private=["inv_ds","advected_qty_val", & + $:GPU_PARALLEL_LOOP(collapse=4,private=["inv_ds","advected_qty_val", & & "pressure_val","flux_face1","flux_face2"]) do l = 0, p do k = 0, n @@ -1065,7 +1065,7 @@ contains end if if (cyl_coord) then - $:PARALLEL_LOOP(collapse=4,private=["flux_face1","flux_face2"]) + $:GPU_PARALLEL_LOOP(collapse=4,private=["flux_face1","flux_face2"]) do j = 1, sys_size do l = 0, p do k = 0, n @@ -1091,7 +1091,7 @@ contains end if if (grid_geometry == 3) then ! Cylindrical Coordinates - $:PARALLEL_LOOP(collapse=4,private=["inv_ds","velocity_val", & + $:GPU_PARALLEL_LOOP(collapse=4,private=["inv_ds","velocity_val", & & "flux_face1","flux_face2"]) do j = 1, sys_size do k = 0, p @@ -1107,7 +1107,7 @@ contains end do end do end do - $:PARALLEL_LOOP(collapse=4,private=["flux_face1","flux_face2"]) + $:GPU_PARALLEL_LOOP(collapse=4,private=["flux_face1","flux_face2"]) do j = 1, sys_size do k = 0, p do q = 0, n @@ -1121,7 +1121,7 @@ contains end do end do else ! Cartesian Coordinates - $:PARALLEL_LOOP(collapse=4,private=["inv_ds","flux_face1","flux_face2"]) + $:GPU_PARALLEL_LOOP(collapse=4,private=["inv_ds","flux_face1","flux_face2"]) do j = 1, sys_size do k = 0, p do q = 0, n @@ -1137,7 +1137,7 @@ contains end if if (model_eqns == 3) then - $:PARALLEL_LOOP(collapse=4,private=["inv_ds","advected_qty_val", & + $:GPU_PARALLEL_LOOP(collapse=4,private=["inv_ds","advected_qty_val", & & "pressure_val","flux_face1","flux_face2"]) do k = 0, p do q = 0, n @@ -1182,7 +1182,7 @@ contains case (1) ! x-direction use_standard_riemann = (riemann_solver == 1 .or. riemann_solver == 4) if (use_standard_riemann) then - $:PARALLEL_LOOP(collapse=4,private=["local_inv_ds", & + $:GPU_PARALLEL_LOOP(collapse=4,private=["local_inv_ds", & & "local_term_coeff","local_flux1","local_flux2"]) do j_adv = advxb, advxe do q_idx = 0, p ! z_extent @@ -1201,7 +1201,7 @@ contains else ! Other Riemann solvers if (alt_soundspeed) then if (bubbles_euler .neqv. .true.) then - $:PARALLEL_LOOP(collapse=3, private=["local_inv_ds", & + $:GPU_PARALLEL_LOOP(collapse=3, private=["local_inv_ds", & "local_q_cons_val", "local_k_term_val", & "local_term_coeff", "local_flux1", "local_flux2"]) do q_idx = 0, p; do l_idx = 0, n; do k_idx = 0, m @@ -1215,7 +1215,7 @@ contains local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) end do; end do; end do - $:PARALLEL_LOOP(collapse=3, private=["local_inv_ds",& + $:GPU_PARALLEL_LOOP(collapse=3, private=["local_inv_ds",& "local_q_cons_val", "local_k_term_val", & "local_term_coeff", "local_flux1", & "local_flux2"]) @@ -1231,7 +1231,7 @@ contains end do; end do; end do end if else ! NOT alt_soundspeed - $:PARALLEL_LOOP(collapse=4,private=["local_inv_ds", & + $:GPU_PARALLEL_LOOP(collapse=4,private=["local_inv_ds", & & "local_term_coeff","local_flux1","local_flux2"]) do j_adv = advxb, advxe do q_idx = 0, p; do l_idx = 0, n; do k_idx = 0, m @@ -1249,7 +1249,7 @@ contains case (2) ! y-direction: loops q_idx (x), k_idx (y), l_idx (z); sf(q_idx, k_idx, l_idx); dy(k_idx); Kterm(q_idx,k_idx,l_idx) use_standard_riemann = (riemann_solver == 1 .or. riemann_solver == 4) if (use_standard_riemann) then - $:PARALLEL_LOOP(collapse=4,private=["local_inv_ds", & + $:GPU_PARALLEL_LOOP(collapse=4,private=["local_inv_ds", & & "local_term_coeff","local_flux1","local_flux2"]) do j_adv = advxb, advxe do l_idx = 0, p ! z_extent @@ -1268,7 +1268,7 @@ contains else ! Other Riemann solvers if (alt_soundspeed) then if (bubbles_euler .neqv. .true.) then - $:PARALLEL_LOOP(collapse=3, private=["local_inv_ds", & + $:GPU_PARALLEL_LOOP(collapse=3, private=["local_inv_ds", & "local_q_cons_val", "local_k_term_val", & "local_term_coeff", "local_flux1", & "local_flux2"]) @@ -1287,7 +1287,7 @@ contains end if end do; end do; end do - $:PARALLEL_LOOP(collapse=3, private=["local_inv_ds", & + $:GPU_PARALLEL_LOOP(collapse=3, private=["local_inv_ds", & "local_q_cons_val", "local_k_term_val", & "local_term_coeff", "local_flux1", & "local_flux2"]) @@ -1307,7 +1307,7 @@ contains end do; end do; end do end if else ! NOT alt_soundspeed - $:PARALLEL_LOOP(collapse=4,private=["local_inv_ds", & + $:GPU_PARALLEL_LOOP(collapse=4,private=["local_inv_ds", & & "local_term_coeff","local_flux1","local_flux2"]) do j_adv = advxb, advxe do l_idx = 0, p; do k_idx = 0, n; do q_idx = 0, m @@ -1330,7 +1330,7 @@ contains end if if (use_standard_riemann) then - $:PARALLEL_LOOP(collapse=4,private=["local_inv_ds", & + $:GPU_PARALLEL_LOOP(collapse=4,private=["local_inv_ds", & & "local_term_coeff","local_flux1","local_flux2"]) do j_adv = advxb, advxe do k_idx = 0, p ! z_extent @@ -1349,7 +1349,7 @@ contains else ! Other Riemann solvers if (alt_soundspeed) then if (bubbles_euler .neqv. .true.) then - $:PARALLEL_LOOP(collapse=3, private=["local_inv_ds", & + $:GPU_PARALLEL_LOOP(collapse=3, private=["local_inv_ds", & "local_q_cons_val", "local_k_term_val", & "local_term_coeff", "local_flux1", & "local_flux2"]) @@ -1364,7 +1364,7 @@ contains local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) end do; end do; end do - $:PARALLEL_LOOP(collapse=3, private=["local_inv_ds", & + $:GPU_PARALLEL_LOOP(collapse=3, private=["local_inv_ds", & "local_q_cons_val", "local_k_term_val", & "local_term_coeff", "local_flux1", & "local_flux2"]) @@ -1380,7 +1380,7 @@ contains end do; end do; end do end if else ! NOT alt_soundspeed - $:PARALLEL_LOOP(collapse=4,private=["local_inv_ds", & + $:GPU_PARALLEL_LOOP(collapse=4,private=["local_inv_ds", & & "local_term_coeff","local_flux1","local_flux2"]) do j_adv = advxb, advxe do k_idx = 0, p; do q_idx = 0, n; do l_idx = 0, m @@ -1413,7 +1413,7 @@ contains if (idir == 1) then ! x-direction if (surface_tension) then - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -1427,11 +1427,11 @@ contains end do end if - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m - $:LOOP() + $:GPU_LOOP() do i = momxb, E_idx rhs_vf(i)%sf(j, k, l) = & rhs_vf(i)%sf(j, k, l) + 1._wp/dx(j)* & @@ -1445,7 +1445,7 @@ contains elseif (idir == 2) then ! y-direction if (surface_tension) then - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -1477,10 +1477,10 @@ contains idwbuff(1), idwbuff(2), idwbuff(3)) end if - $:PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(collapse=2) do l = 0, p do j = 0, m - $:LOOP() + $:GPU_LOOP() do i = momxb, E_idx rhs_vf(i)%sf(j, 0, l) = & rhs_vf(i)%sf(j, 0, l) + 1._wp/(y_cc(1) - y_cc(-1))* & @@ -1492,11 +1492,11 @@ contains end if - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 1, n do j = 0, m - $:LOOP() + $:GPU_LOOP() do i = momxb, E_idx rhs_vf(i)%sf(j, k, l) = & rhs_vf(i)%sf(j, k, l) + 1._wp/dy(k)* & @@ -1508,11 +1508,11 @@ contains end do else - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m - $:LOOP() + $:GPU_LOOP() do i = momxb, E_idx rhs_vf(i)%sf(j, k, l) = & rhs_vf(i)%sf(j, k, l) + 1._wp/dy(k)* & @@ -1529,11 +1529,11 @@ contains if (cyl_coord) then if ((bc_y%beg == BC_REFLECTIVE) .or. (bc_y%beg == BC_AXIS)) then - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 1, n do j = 0, m - $:LOOP() + $:GPU_LOOP() do i = momxb, E_idx rhs_vf(i)%sf(j, k, l) = & rhs_vf(i)%sf(j, k, l) - 5e-1_wp/y_cc(k)* & @@ -1545,10 +1545,10 @@ contains end do if (viscous) then - $:PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(collapse=2) do l = 0, p do j = 0, m - $:LOOP() + $:GPU_LOOP() do i = momxb, E_idx rhs_vf(i)%sf(j, 0, l) = & rhs_vf(i)%sf(j, 0, l) - 1._wp/y_cc(0)* & @@ -1559,11 +1559,11 @@ contains end if else - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m - $:LOOP() + $:GPU_LOOP() do i = momxb, E_idx rhs_vf(i)%sf(j, k, l) = & rhs_vf(i)%sf(j, k, l) - 5e-1_wp/y_cc(k)* & @@ -1580,7 +1580,7 @@ contains elseif (idir == 3) then ! z-direction if (surface_tension) then - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -1594,11 +1594,11 @@ contains end do end if - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m - $:LOOP() + $:GPU_LOOP() do i = momxb, E_idx rhs_vf(i)%sf(j, k, l) = & rhs_vf(i)%sf(j, k, l) + 1._wp/dz(l)* & @@ -1610,7 +1610,7 @@ contains end do if (grid_geometry == 3) then - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -1730,10 +1730,10 @@ contains end if - $:UPDATE(device=["is1","is2","is3","iv"]) + $:GPU_UPDATE(device=["is1","is2","is3","iv"]) if (recon_dir == 1) then - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -1746,7 +1746,7 @@ contains end do !$acc end parallel loop else if (recon_dir == 2) then - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -1759,7 +1759,7 @@ contains end do !$acc end parallel loop else if (recon_dir == 3) then - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end do l = is3%beg, is3%end do k = is2%beg, is2%end diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp index 6d0c7fb7d8..d81659c2b4 100644 --- a/src/simulation/m_riemann_solvers.fpp +++ b/src/simulation/m_riemann_solvers.fpp @@ -66,7 +66,7 @@ module m_riemann_solvers real(wp), allocatable, dimension(:, :, :, :) :: flux_rsx_vf, flux_src_rsx_vf real(wp), allocatable, dimension(:, :, :, :) :: flux_rsy_vf, flux_src_rsy_vf real(wp), allocatable, dimension(:, :, :, :) :: flux_rsz_vf, flux_src_rsz_vf - $:DECLARE(create=["flux_rsx_vf","flux_src_rsx_vf","flux_rsy_vf","flux_src_rsy_vf","flux_rsz_vf","flux_src_rsz_vf"]) + $:GPU_DECLARE(create=["flux_rsx_vf","flux_src_rsx_vf","flux_rsy_vf","flux_src_rsy_vf","flux_rsz_vf","flux_src_rsz_vf"]) !> @} !> The cell-boundary values of the geometrical source flux that are computed @@ -77,7 +77,7 @@ module m_riemann_solvers real(wp), allocatable, dimension(:, :, :, :) :: flux_gsrc_rsx_vf !< real(wp), allocatable, dimension(:, :, :, :) :: flux_gsrc_rsy_vf !< real(wp), allocatable, dimension(:, :, :, :) :: flux_gsrc_rsz_vf !< - $:DECLARE(create=["flux_gsrc_rsx_vf","flux_gsrc_rsy_vf","flux_gsrc_rsz_vf"]) + $:GPU_DECLARE(create=["flux_gsrc_rsx_vf","flux_gsrc_rsy_vf","flux_gsrc_rsz_vf"]) !> @} ! The cell-boundary values of the velocity. vel_src_rs_vf is determined as @@ -86,17 +86,17 @@ module m_riemann_solvers real(wp), allocatable, dimension(:, :, :, :) :: vel_src_rsx_vf real(wp), allocatable, dimension(:, :, :, :) :: vel_src_rsy_vf real(wp), allocatable, dimension(:, :, :, :) :: vel_src_rsz_vf - $:DECLARE(create=["vel_src_rsx_vf","vel_src_rsy_vf","vel_src_rsz_vf"]) + $:GPU_DECLARE(create=["vel_src_rsx_vf","vel_src_rsy_vf","vel_src_rsz_vf"]) real(wp), allocatable, dimension(:, :, :, :) :: mom_sp_rsx_vf real(wp), allocatable, dimension(:, :, :, :) :: mom_sp_rsy_vf real(wp), allocatable, dimension(:, :, :, :) :: mom_sp_rsz_vf - $:DECLARE(create=["mom_sp_rsx_vf","mom_sp_rsy_vf","mom_sp_rsz_vf"]) + $:GPU_DECLARE(create=["mom_sp_rsx_vf","mom_sp_rsy_vf","mom_sp_rsz_vf"]) real(wp), allocatable, dimension(:, :, :, :) :: Re_avg_rsx_vf real(wp), allocatable, dimension(:, :, :, :) :: Re_avg_rsy_vf real(wp), allocatable, dimension(:, :, :, :) :: Re_avg_rsz_vf - $:DECLARE(create=["Re_avg_rsx_vf","Re_avg_rsy_vf","Re_avg_rsz_vf"]) + $:GPU_DECLARE(create=["Re_avg_rsx_vf","Re_avg_rsy_vf","Re_avg_rsz_vf"]) !> @name Indical bounds in the s1-, s2- and s3-directions !> @{ @@ -104,13 +104,13 @@ module m_riemann_solvers type(int_bounds_info) :: isx, isy, isz !> @} - $:DECLARE(create=["is1","is2","is3","isx","isy","isz"]) + $:GPU_DECLARE(create=["is1","is2","is3","isx","isy","isz"]) real(wp), allocatable, dimension(:) :: Gs - $:DECLARE(create=["Gs"]) + $:GPU_DECLARE(create=["Gs"]) real(wp), allocatable, dimension(:, :) :: Res - $:DECLARE(create=["Res"]) + $:GPU_DECLARE(create=["Res"]) contains @@ -355,7 +355,7 @@ contains #:for NORM_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (norm_dir == ${NORM_DIR}$) then - $:PARALLEL_LOOP(collapse=3, private=["alpha_rho_L", "alpha_rho_R", & + $:GPU_PARALLEL_LOOP(collapse=3, private=["alpha_rho_L", "alpha_rho_R", & "vel_L", "vel_R", "alpha_L", "alpha_R", "tau_e_L", "tau_e_R", & "G_L", "G_R", "Re_L", "Re_R", "rho_avg", "h_avg", "gamma_avg", & "s_L", "s_R", "s_S", "Ys_L", "Ys_R", "xi_field_L", "xi_field_R", & @@ -366,13 +366,13 @@ contains do l = is3%beg, is3%end do k = is2%beg, is2%end do j = is1%beg, is1%end - $:LOOP() + $:GPU_LOOP() do i = 1, contxe alpha_rho_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, i) alpha_rho_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) end do - $:LOOP() + $:GPU_LOOP() do i = 1, num_vels vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) @@ -380,13 +380,13 @@ contains vel_L_rms = 0._wp; vel_R_rms = 0._wp - $:LOOP() + $:GPU_LOOP() do i = 1, num_vels vel_L_rms = vel_L_rms + vel_L(i)**2._wp vel_R_rms = vel_R_rms + vel_R(i)**2._wp end do - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) @@ -430,7 +430,7 @@ contains pres_mag%R = 0._wp if (mpp_lim) then - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids alpha_rho_L(i) = max(0._wp, alpha_rho_L(i)) alpha_L(i) = min(max(0._wp, alpha_L(i)), 1._wp) @@ -439,7 +439,7 @@ contains alpha_L = alpha_L/max(alpha_L_sum, sgm_eps) - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids alpha_rho_R(i) = max(0._wp, alpha_rho_R(i)) alpha_R(i) = min(max(0._wp, alpha_R(i)), 1._wp) @@ -449,7 +449,7 @@ contains alpha_R = alpha_R/max(alpha_R_sum, sgm_eps) end if - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids rho_L = rho_L + alpha_rho_L(i) gamma_L = gamma_L + alpha_L(i)*gammas(i) @@ -463,13 +463,13 @@ contains end do if (viscous) then - $:LOOP() + $:GPU_LOOP() do i = 1, 2 Re_L(i) = dflt_real if (Re_size(i) > 0) Re_L(i) = 0._wp - $:LOOP() + $:GPU_LOOP() do q = 1, Re_size(i) Re_L(i) = alpha_L(Re_idx(i, q))/Res(i, q) & + Re_L(i) @@ -479,13 +479,13 @@ contains end do - $:LOOP() + $:GPU_LOOP() do i = 1, 2 Re_R(i) = dflt_real if (Re_size(i) > 0) Re_R(i) = 0._wp - $:LOOP() + $:GPU_LOOP() do q = 1, Re_size(i) Re_R(i) = alpha_R(Re_idx(i, q))/Res(i, q) & + Re_R(i) @@ -496,7 +496,7 @@ contains end if if (chemistry) then - $:LOOP() + $:GPU_LOOP() do i = chemxb, chemxe Ys_L(i - chemxb + 1) = qL_prim_rs${XYZ}$_vf(j, k, l, i) Ys_R(i - chemxb + 1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) @@ -593,7 +593,7 @@ contains if (hypoelasticity) then G_L = 0._wp; G_R = 0._wp - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids G_L = G_L + alpha_L(i)*Gs(i) G_R = G_R + alpha_R(i)*Gs(i) @@ -626,7 +626,7 @@ contains ! G_L = 0._wp ! G_R = 0._wp ! - ! $:LOOP() + ! $:GPU_LOOP() ! do i = 1, num_fluids ! G_L = G_L + alpha_L(i)*Gs(i) ! G_R = G_R + alpha_R(i)*Gs(i) @@ -635,17 +635,17 @@ contains ! if ((G_L > 1e-3_wp) .and. (G_R > 1e-3_wp)) then ! E_L = E_L + G_L*qL_prim_rs${XYZ}$_vf(j, k, l, xiend + 1) ! E_R = E_R + G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, xiend + 1) - ! $:LOOP() + ! $:GPU_LOOP() ! do i = 1, b_size-1 ! tau_e_L(i) = G_L*qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) ! tau_e_R(i) = G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) ! end do - ! $:LOOP() + ! $:GPU_LOOP() ! do i = 1, b_size-1 ! tau_e_L(i) = 0_wp ! tau_e_R(i) = 0_wp ! end do - ! $:LOOP() + ! $:GPU_LOOP() ! do i = 1, num_dims ! xi_field_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) ! xi_field_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, xibeg - 1 + i) @@ -673,7 +673,7 @@ contains end if if (viscous) then - $:LOOP() + $:GPU_LOOP() do i = 1, 2 Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) end do @@ -752,7 +752,7 @@ contains ! Mass if (.not. relativity) then - $:LOOP() + $:GPU_LOOP() do i = 1, contxe flux_rs${XYZ}$_vf(j, k, l, i) = & (s_M*alpha_rho_R(i)*vel_R(norm_dir) & @@ -762,7 +762,7 @@ contains /(s_M - s_P) end do elseif (relativity) then - $:LOOP() + $:GPU_LOOP() do i = 1, contxe flux_rs${XYZ}$_vf(j, k, l, i) = & (s_M*Ga%R*alpha_rho_R(i)*vel_R(norm_dir) & @@ -843,7 +843,7 @@ contains + s_M*s_P*(cm%L(3) - cm%R(3))) & /(s_M - s_P) elseif (bubbles_euler) then - $:LOOP() + $:GPU_LOOP() do i = 1, num_vels flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & (s_M*(rho_R*vel_R(dir_idx(1)) & @@ -858,7 +858,7 @@ contains + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R(dir_idx(i)) - vel_L(dir_idx(i))) end do else if (hypoelasticity) then - $:LOOP() + $:GPU_LOOP() do i = 1, num_vels flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & (s_M*(rho_R*vel_R(dir_idx(1)) & @@ -874,7 +874,7 @@ contains /(s_M - s_P) end do else - $:LOOP() + $:GPU_LOOP() do i = 1, num_vels flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & (s_M*(rho_R*vel_R(dir_idx(1)) & @@ -970,7 +970,7 @@ contains end if ! Advection - $:LOOP() + $:GPU_LOOP() do i = advxb, advxe flux_rs${XYZ}$_vf(j, k, l, i) = & (qL_prim_rs${XYZ}$_vf(j, k, l, i) & @@ -995,7 +995,7 @@ contains !end if ! Div(U)? - $:LOOP() + $:GPU_LOOP() do i = 1, num_vels vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(i)) = & (xi_M*(rho_L*vel_L(dir_idx(i))* & @@ -1016,7 +1016,7 @@ contains end if if (chemistry) then - $:LOOP() + $:GPU_LOOP() do i = chemxb, chemxe Y_L = qL_prim_rs${XYZ}$_vf(j, k, l, i) Y_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) @@ -1066,7 +1066,7 @@ contains #:if (NORM_DIR == 2) if (cyl_coord) then !Substituting the advective flux into the inviscid geometrical source flux - $:LOOP() + $:GPU_LOOP() do i = 1, E_idx flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) end do @@ -1075,7 +1075,7 @@ contains flux_rs${XYZ}$_vf(j, k, l, contxe + 2) & - (s_M*pres_R - s_P*pres_L)/(s_M - s_P) ! Geometrical source of the void fraction(s) is zero - $:LOOP() + $:GPU_LOOP() do i = advxb, advxe flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) end do @@ -1088,7 +1088,7 @@ contains (s_M*tau_e_R(4) - s_P*tau_e_L(4)) & /(s_M - s_P) - $:LOOP() + $:GPU_LOOP() do i = strxb, strxe flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) end do @@ -1284,7 +1284,7 @@ contains ! 6-EQUATION MODEL WITH HLLC if (model_eqns == 3) then !ME3 - $:PARALLEL_LOOP(collapse=3, private=["vel_L", "vel_R", & + $:GPU_PARALLEL_LOOP(collapse=3, private=["vel_L", "vel_R", & "vel_K_Star", "Re_L", "Re_R", "rho_avg", "h_avg", & "gamma_avg", "s_L", "s_R", "s_S", "vel_avg_rms", & "alpha_L", "alpha_R", "Ys_L", "Ys_R", "Xs_L", "Xs_R", & @@ -1300,7 +1300,7 @@ contains vel_L_rms = 0._wp; vel_R_rms = 0._wp - $:LOOP() + $:GPU_LOOP() do i = 1, num_dims vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) @@ -1325,32 +1325,32 @@ contains alpha_R_sum = 0._wp if (mpp_lim) then - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids qL_prim_rs${XYZ}$_vf(j, k, l, i) = max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, i)) qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = min(max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)), 1._wp) alpha_L_sum = alpha_L_sum + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) end do - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)/max(alpha_L_sum, sgm_eps) end do - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) = max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)) qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = min(max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)), 1._wp) alpha_R_sum = alpha_R_sum + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) end do - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)/max(alpha_R_sum, sgm_eps) end do end if - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) @@ -1367,13 +1367,13 @@ contains end do if (viscous) then - $:LOOP() + $:GPU_LOOP() do i = 1, 2 Re_L(i) = dflt_real if (Re_size(i) > 0) Re_L(i) = 0._wp - $:LOOP() + $:GPU_LOOP() do q = 1, Re_size(i) Re_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q))/Res(i, q) & + Re_L(i) @@ -1383,13 +1383,13 @@ contains end do - $:LOOP() + $:GPU_LOOP() do i = 1, 2 Re_R(i) = dflt_real if (Re_size(i) > 0) Re_R(i) = 0._wp - $:LOOP() + $:GPU_LOOP() do q = 1, Re_size(i) Re_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q))/Res(i, q) & + Re_R(i) @@ -1405,18 +1405,18 @@ contains ! ENERGY ADJUSTMENTS FOR HYPOELASTIC ENERGY if (hypoelasticity) then - $:LOOP() + $:GPU_LOOP() do i = 1, strxe - strxb + 1 tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) end do G_L = 0_wp; G_R = 0_wp - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids G_L = G_L + alpha_L(i)*Gs(i) G_R = G_R + alpha_R(i)*Gs(i) end do - $:LOOP() + $:GPU_LOOP() do i = 1, strxe - strxb + 1 ! Elastic contribution to energy if G large enough if ((G_L > verysmall) .and. (G_R > verysmall)) then @@ -1433,13 +1433,13 @@ contains ! ENERGY ADJUSTMENTS FOR HYPERELASTIC ENERGY if (hyperelasticity) then - $:LOOP() + $:GPU_LOOP() do i = 1, num_dims xi_field_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) xi_field_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, xibeg - 1 + i) end do G_L = 0_wp; G_R = 0_wp; - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids ! Mixture left and right shear modulus G_L = G_L + alpha_L(i)*Gs(i) @@ -1450,7 +1450,7 @@ contains E_L = E_L + G_L*qL_prim_rs${XYZ}$_vf(j, k, l, xiend + 1) E_R = E_R + G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, xiend + 1) end if - $:LOOP() + $:GPU_LOOP() do i = 1, b_size - 1 tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) @@ -1474,7 +1474,7 @@ contains vel_avg_rms, 0._wp, c_avg) if (viscous) then - $:LOOP() + $:GPU_LOOP() do i = 1, 2 Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) end do @@ -1569,7 +1569,7 @@ contains ! COMPUTING FLUXES ! MASS FLUX. - $:LOOP() + $:GPU_LOOP() do i = 1, contxe flux_rs${XYZ}$_vf(j, k, l, i) = & xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i)*(vel_L(idx1) + s_M*(xi_L - 1._wp)) + & @@ -1578,7 +1578,7 @@ contains ! MOMENTUM FLUX. ! f = \rho u u - \sigma, q = \rho u, q_star = \xi * \rho*(s_star, v, w) - $:LOOP() + $:GPU_LOOP() do i = 1, num_dims idxi = dir_idx(i) flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) = rho_Star*vel_K_Star* & @@ -1594,7 +1594,7 @@ contains ! ELASTICITY. Elastic shear stress additions for the momentum and energy flux if (elasticity) then flux_ene_e = 0_wp; - $:LOOP() + $:GPU_LOOP() do i = 1, num_dims idxi = dir_idx(i) ! MOMENTUM ELASTIC FLUX. @@ -1612,7 +1612,7 @@ contains end if ! VOLUME FRACTION FLUX. - $:LOOP() + $:GPU_LOOP() do i = advxb, advxe flux_rs${XYZ}$_vf(j, k, l, i) = & xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i)*s_S + & @@ -1620,7 +1620,7 @@ contains end do ! SOURCE TERM FOR VOLUME FRACTION ADVECTION FLUX. - $:LOOP() + $:GPU_LOOP() do i = 1, num_dims idxi = dir_idx(i) vel_src_rs${XYZ}$_vf(j, k, l, idxi) = & @@ -1630,7 +1630,7 @@ contains ! INTERNAL ENERGIES ADVECTION FLUX. ! K-th pressure and velocity in preparation for the internal energy flux - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids p_K_Star = xi_M*(xi_MP*((pres_L + pi_infs(i)/(1_wp + gammas(i)))* & xi_L**(1_wp/gammas(i) + 1_wp) - pi_infs(i)/(1_wp + gammas(i)) - pres_L) + pres_L) + & @@ -1649,7 +1649,7 @@ contains ! HYPOELASTIC STRESS EVOLUTION FLUX. if (hypoelasticity) then - $:LOOP() + $:GPU_LOOP() do i = 1, strxe - strxb + 1 flux_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) = & xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*tau_e_L(i) - rho_L*vel_L(idx1)*tau_e_L(i)) + & @@ -1659,7 +1659,7 @@ contains ! REFERENCE MAP FLUX. if (hyperelasticity) then - $:LOOP() + $:GPU_LOOP() do i = 1, num_dims flux_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) = & xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*xi_field_L(i) & @@ -1680,11 +1680,11 @@ contains #:if (NORM_DIR == 2) if (cyl_coord) then !Substituting the advective flux into the inviscid geometrical source flux - $:LOOP() + $:GPU_LOOP() do i = 1, E_idx flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) end do - $:LOOP() + $:GPU_LOOP() do i = intxb, intxe flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) end do @@ -1692,7 +1692,7 @@ contains flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb - 1 + dir_idx(1)) = & flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb - 1 + dir_idx(1)) - p_Star ! Geometrical source of the void fraction(s) is zero - $:LOOP() + $:GPU_LOOP() do i = advxb, advxe flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0_wp end do @@ -1700,7 +1700,7 @@ contains #:endif #:if (NORM_DIR == 3) if (grid_geometry == 3) then - $:LOOP() + $:GPU_LOOP() do i = 1, sys_size flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0_wp end do @@ -1717,7 +1717,7 @@ contains elseif (model_eqns == 4) then !ME4 - $:PARALLEL_LOOP(collapse=3, private=["alpha_rho_L", & + $:GPU_PARALLEL_LOOP(collapse=3, private=["alpha_rho_L", & "alpha_rho_R", "vel_L", "vel_R", "alpha_L", "alpha_R", & "rho_avg", "h_avg", "gamma_avg", "s_L", "s_R", "s_S", & "vel_avg_rms", "nbub_L", "nbub_R", "ptilde_L", "ptilde_R"]) @@ -1725,26 +1725,26 @@ contains do k = is2%beg, is2%end do j = is1%beg, is1%end - $:LOOP() + $:GPU_LOOP() do i = 1, contxe alpha_rho_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, i) alpha_rho_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) end do - $:LOOP() + $:GPU_LOOP() do i = 1, num_dims vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) end do vel_L_rms = 0._wp; vel_R_rms = 0._wp - $:LOOP() + $:GPU_LOOP() do i = 1, num_dims vel_L_rms = vel_L_rms + vel_L(i)**2._wp vel_R_rms = vel_R_rms + vel_R(i)**2._wp end do - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) @@ -1757,7 +1757,7 @@ contains gamma_L = 0._wp pi_inf_L = 0._wp qv_L = 0._wp - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids rho_L = rho_L + alpha_rho_L(i) gamma_L = gamma_L + alpha_L(i)*gammas(i) @@ -1769,7 +1769,7 @@ contains gamma_R = 0._wp pi_inf_R = 0._wp qv_R = 0._wp - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids rho_R = rho_R + alpha_rho_R(i) gamma_R = gamma_R + alpha_R(i)*gammas(i) @@ -1844,7 +1844,7 @@ contains xi_M = (5e-1_wp + sign(5e-1_wp, s_S)) xi_P = (5e-1_wp - sign(5e-1_wp, s_S)) - $:LOOP() + $:GPU_LOOP() do i = 1, contxe flux_rs${XYZ}$_vf(j, k, l, i) = & xi_M*alpha_rho_L(i) & @@ -1855,7 +1855,7 @@ contains ! Momentum flux. ! f = \rho u u + p I, q = \rho u, q_star = \xi * \rho*(s_star, v, w) - $:LOOP() + $:GPU_LOOP() do i = 1, num_dims flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & xi_M*(rho_L*(vel_L(dir_idx(1))* & @@ -1874,7 +1874,7 @@ contains if (bubbles_euler) then ! Put p_tilde in - $:LOOP() + $:GPU_LOOP() do i = 1, num_dims flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) + & @@ -1885,7 +1885,7 @@ contains flux_rs${XYZ}$_vf(j, k, l, E_idx) = 0._wp - $:LOOP() + $:GPU_LOOP() do i = alf_idx, alf_idx !only advect the void fraction flux_rs${XYZ}$_vf(j, k, l, i) = & xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & @@ -1895,7 +1895,7 @@ contains end do ! Source for volume fraction advection equation - $:LOOP() + $:GPU_LOOP() do i = 1, num_dims vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(i)) = 0._wp @@ -1906,7 +1906,7 @@ contains ! Add advection flux for bubble variables if (bubbles_euler) then - $:LOOP() + $:GPU_LOOP() do i = bubxb, bubxe flux_rs${XYZ}$_vf(j, k, l, i) = & xi_M*nbub_L*qL_prim_rs${XYZ}$_vf(j, k, l, i) & @@ -1921,7 +1921,7 @@ contains #:if (NORM_DIR == 2) if (cyl_coord) then ! Substituting the advective flux into the inviscid geometrical source flux - $:LOOP() + $:GPU_LOOP() do i = 1, E_idx flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) end do @@ -1938,7 +1938,7 @@ contains (1._wp - dir_flg(dir_idx(1)))* & vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) ! Geometrical source of the void fraction(s) is zero - $:LOOP() + $:GPU_LOOP() do i = advxb, advxe flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp end do @@ -1946,7 +1946,7 @@ contains #:endif #:if (NORM_DIR == 3) if (grid_geometry == 3) then - $:LOOP() + $:GPU_LOOP() do i = 1, sys_size flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp end do @@ -1970,7 +1970,7 @@ contains !$acc end parallel loop elseif (model_eqns == 2 .and. bubbles_euler) then - $:PARALLEL_LOOP(collapse=3, private=["R0_L", "R0_R", "V0_L", & + $:GPU_PARALLEL_LOOP(collapse=3, private=["R0_L", "R0_R", "V0_L", & "V0_R", "P0_L", "P0_R", "pbw_L", "pbw_R", "vel_L", & "vel_R", "rho_avg", "alpha_L", "alpha_R", "h_avg", & "gamma_avg", "s_L", "s_R", "s_S", "nbub_L", "nbub_R", & @@ -1980,7 +1980,7 @@ contains do k = is2%beg, is2%end do j = is1%beg, is1%end - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) @@ -1988,7 +1988,7 @@ contains vel_L_rms = 0._wp; vel_R_rms = 0._wp - $:LOOP() + $:GPU_LOOP() do i = 1, num_dims vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) @@ -2006,7 +2006,7 @@ contains ! Retain this in the refactor if (mpp_lim .and. (num_fluids > 2)) then - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) @@ -2014,7 +2014,7 @@ contains qv_L = qv_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)*qvs(i) end do else if (num_fluids > 2) then - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids - 1 rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) @@ -2034,7 +2034,7 @@ contains qv_R = 0._wp if (mpp_lim .and. (num_fluids > 2)) then - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*gammas(i) @@ -2042,7 +2042,7 @@ contains qv_R = qv_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*qvs(i) end do else if (num_fluids > 2) then - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids - 1 rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*gammas(i) @@ -2058,13 +2058,13 @@ contains if (viscous) then if (num_fluids == 1) then ! Need to consider case with num_fluids >= 2 - $:LOOP() + $:GPU_LOOP() do i = 1, 2 Re_L(i) = dflt_real if (Re_size(i) > 0) Re_L(i) = 0._wp - $:LOOP() + $:GPU_LOOP() do q = 1, Re_size(i) Re_L(i) = (1._wp - qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q)))/Res(i, q) & + Re_L(i) @@ -2074,13 +2074,13 @@ contains end do - $:LOOP() + $:GPU_LOOP() do i = 1, 2 Re_R(i) = dflt_real if (Re_size(i) > 0) Re_R(i) = 0._wp - $:LOOP() + $:GPU_LOOP() do q = 1, Re_size(i) Re_R(i) = (1._wp - qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q)))/Res(i, q) & + Re_R(i) @@ -2099,7 +2099,7 @@ contains H_R = (E_R + pres_R)/rho_R if (avg_state == 2) then - $:LOOP() + $:GPU_LOOP() do i = 1, nb R0_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, rs(i)) R0_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, rs(i)) @@ -2119,7 +2119,7 @@ contains else nbub_L_denom = 0._wp nbub_R_denom = 0._wp - $:LOOP() + $:GPU_LOOP() do i = 1, nb nbub_L_denom = nbub_L_denom + (R0_L(i)**3._wp)*weight(i) nbub_R_denom = nbub_R_denom + (R0_R(i)**3._wp)*weight(i) @@ -2133,7 +2133,7 @@ contains nbub_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, bubxb) end if - $:LOOP() + $:GPU_LOOP() do i = 1, nb if (.not. qbmm) then if (polytropic) then @@ -2166,7 +2166,7 @@ contains R3V2Lbar = 0._wp R3V2Rbar = 0._wp - $:LOOP() + $:GPU_LOOP() do i = 1, nb PbwR3Lbar = PbwR3Lbar + pbw_L(i)*(R0_L(i)**3._wp)*weight(i) PbwR3Rbar = PbwR3Rbar + pbw_R(i)*(R0_R(i)**3._wp)*weight(i) @@ -2201,7 +2201,7 @@ contains gamma_avg = 5e-1_wp*(gamma_L + gamma_R) vel_avg_rms = 0._wp - $:LOOP() + $:GPU_LOOP() do i = 1, num_dims vel_avg_rms = vel_avg_rms + (5e-1_wp*(vel_L(i) + vel_R(i)))**2._wp end do @@ -2220,7 +2220,7 @@ contains vel_avg_rms, 0._wp, c_avg) if (viscous) then - $:LOOP() + $:GPU_LOOP() do i = 1, 2 Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) end do @@ -2284,7 +2284,7 @@ contains pcorr = 0._wp end if - $:LOOP() + $:GPU_LOOP() do i = 1, contxe flux_rs${XYZ}$_vf(j, k, l, i) = & xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & @@ -2303,7 +2303,7 @@ contains ! Include p_tilde - $:LOOP() + $:GPU_LOOP() do i = 1, num_dims flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & xi_M*(rho_L*(vel_L(dir_idx(1))* & @@ -2335,7 +2335,7 @@ contains + (s_M/s_L)*(s_P/s_R)*pcorr*s_S ! Volume fraction flux - $:LOOP() + $:GPU_LOOP() do i = advxb, advxe flux_rs${XYZ}$_vf(j, k, l, i) = & xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & @@ -2345,7 +2345,7 @@ contains end do ! Source for volume fraction advection equation - $:LOOP() + $:GPU_LOOP() do i = 1, num_dims vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(i)) = & xi_M*(vel_L(dir_idx(i)) + & @@ -2361,7 +2361,7 @@ contains flux_src_rs${XYZ}$_vf(j, k, l, advxb) = vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(1)) ! Add advection flux for bubble variables - $:LOOP() + $:GPU_LOOP() do i = bubxb, bubxe flux_rs${XYZ}$_vf(j, k, l, i) = & xi_M*nbub_L*qL_prim_rs${XYZ}$_vf(j, k, l, i) & @@ -2390,7 +2390,7 @@ contains #:if (NORM_DIR == 2) if (cyl_coord) then ! Substituting the advective flux into the inviscid geometrical source flux - $:LOOP() + $:GPU_LOOP() do i = 1, E_idx flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) end do @@ -2407,7 +2407,7 @@ contains (1._wp - dir_flg(dir_idx(1)))* & vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) ! Geometrical source of the void fraction(s) is zero - $:LOOP() + $:GPU_LOOP() do i = advxb, advxe flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp end do @@ -2415,7 +2415,7 @@ contains #:endif #:if (NORM_DIR == 3) if (grid_geometry == 3) then - $:LOOP() + $:GPU_LOOP() do i = 1, sys_size flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp end do @@ -2441,7 +2441,7 @@ contains !$acc end parallel loop else ! 5-EQUATION MODEL WITH HLLC - $:PARALLEL_LOOP(collapse=3, private=["vel_L", "vel_R", & + $:GPU_PARALLEL_LOOP(collapse=3, private=["vel_L", "vel_R", & "Re_L", "Re_R", "rho_avg", "h_avg", "gamma_avg", & "alpha_L", "alpha_R", "s_L", "s_R", "s_S", & "vel_avg_rms", "pcorr", "zcoef", "vel_L_tmp", & @@ -2456,14 +2456,14 @@ contains !idx1 = 1; if (dir_idx(1) == 2) idx1 = 2; if (dir_idx(1) == 3) idx1 = 3 - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) end do vel_L_rms = 0._wp; vel_R_rms = 0._wp - $:LOOP() + $:GPU_LOOP() do i = 1, num_dims vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) @@ -2490,32 +2490,32 @@ contains ! Change this by splitting it into the cases ! present in the bubbles_euler if (mpp_lim) then - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids qL_prim_rs${XYZ}$_vf(j, k, l, i) = max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, i)) qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = min(max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)), 1._wp) alpha_L_sum = alpha_L_sum + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) end do - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)/max(alpha_L_sum, sgm_eps) end do - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) = max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)) qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = min(max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)), 1._wp) alpha_R_sum = alpha_R_sum + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) end do - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)/max(alpha_R_sum, sgm_eps) end do end if - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) @@ -2529,13 +2529,13 @@ contains end do if (viscous) then - $:LOOP() + $:GPU_LOOP() do i = 1, 2 Re_L(i) = dflt_real if (Re_size(i) > 0) Re_L(i) = 0._wp - $:LOOP() + $:GPU_LOOP() do q = 1, Re_size(i) Re_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q))/Res(i, q) & + Re_L(i) @@ -2545,13 +2545,13 @@ contains end do - $:LOOP() + $:GPU_LOOP() do i = 1, 2 Re_R(i) = dflt_real if (Re_size(i) > 0) Re_R(i) = 0._wp - $:LOOP() + $:GPU_LOOP() do q = 1, Re_size(i) Re_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q))/Res(i, q) & + Re_R(i) @@ -2563,7 +2563,7 @@ contains if (chemistry) then c_sum_Yi_Phi = 0.0_wp - $:LOOP() + $:GPU_LOOP() do i = chemxb, chemxe Ys_L(i - chemxb + 1) = qL_prim_rs${XYZ}$_vf(j, k, l, i) Ys_R(i - chemxb + 1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) @@ -2622,19 +2622,19 @@ contains ! ENERGY ADJUSTMENTS FOR HYPOELASTIC ENERGY if (hypoelasticity) then - $:LOOP() + $:GPU_LOOP() do i = 1, strxe - strxb + 1 tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) end do G_L = 0_wp G_R = 0_wp - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids G_L = G_L + alpha_L(i)*Gs(i) G_R = G_R + alpha_R(i)*Gs(i) end do - $:LOOP() + $:GPU_LOOP() do i = 1, strxe - strxb + 1 ! Elastic contribution to energy if G large enough if ((G_L > verysmall) .and. (G_R > verysmall)) then @@ -2651,14 +2651,14 @@ contains ! ENERGY ADJUSTMENTS FOR HYPERELASTIC ENERGY if (hyperelasticity) then - $:LOOP() + $:GPU_LOOP() do i = 1, num_dims xi_field_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) xi_field_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, xibeg - 1 + i) end do G_L = 0_wp G_R = 0_wp - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids ! Mixture left and right shear modulus G_L = G_L + alpha_L(i)*Gs(i) @@ -2669,7 +2669,7 @@ contains E_L = E_L + G_L*qL_prim_rs${XYZ}$_vf(j, k, l, xiend + 1) E_R = E_R + G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, xiend + 1) end if - $:LOOP() + $:GPU_LOOP() do i = 1, b_size - 1 tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) @@ -2693,7 +2693,7 @@ contains vel_avg_rms, c_sum_Yi_Phi, c_avg) if (viscous) then - $:LOOP() + $:GPU_LOOP() do i = 1, 2 Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) end do @@ -2769,7 +2769,7 @@ contains ! COMPUTING THE HLLC FLUXES ! MASS FLUX. - $:LOOP() + $:GPU_LOOP() do i = 1, contxe flux_rs${XYZ}$_vf(j, k, l, i) = & xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & @@ -2780,7 +2780,7 @@ contains ! MOMENTUM FLUX. ! f = \rho u u - \sigma, q = \rho u, q_star = \xi * \rho*(s_star, v, w) - $:LOOP() + $:GPU_LOOP() do i = 1, num_dims idxi = dir_idx(i) flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) = & @@ -2815,7 +2815,7 @@ contains ! ELASTICITY. Elastic shear stress additions for the momentum and energy flux if (elasticity) then flux_ene_e = 0_wp - $:LOOP() + $:GPU_LOOP() do i = 1, num_dims idxi = dir_idx(i) ! MOMENTUM ELASTIC FLUX. @@ -2834,7 +2834,7 @@ contains ! HYPOELASTIC STRESS EVOLUTION FLUX. if (hypoelasticity) then - $:LOOP() + $:GPU_LOOP() do i = 1, strxe - strxb + 1 flux_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) = & xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*tau_e_L(i) - rho_L*vel_L(idx1)*tau_e_L(i)) + & @@ -2843,7 +2843,7 @@ contains end if ! VOLUME FRACTION FLUX. - $:LOOP() + $:GPU_LOOP() do i = advxb, advxe flux_rs${XYZ}$_vf(j, k, l, i) = & xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & @@ -2853,7 +2853,7 @@ contains end do ! VOLUME FRACTION SOURCE FLUX. - $:LOOP() + $:GPU_LOOP() do i = 1, num_dims idxi = dir_idx(i) vel_src_rs${XYZ}$_vf(j, k, l, idxi) = & @@ -2876,7 +2876,7 @@ contains ! REFERENCE MAP FLUX. if (hyperelasticity) then - $:LOOP() + $:GPU_LOOP() do i = 1, num_dims flux_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) = & xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*xi_field_L(i) & @@ -2889,7 +2889,7 @@ contains flux_src_rs${XYZ}$_vf(j, k, l, advxb) = vel_src_rs${XYZ}$_vf(j, k, l, idx1) if (chemistry) then - $:LOOP() + $:GPU_LOOP() do i = chemxb, chemxe Y_L = qL_prim_rs${XYZ}$_vf(j, k, l, i) Y_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) @@ -2904,7 +2904,7 @@ contains #:if (NORM_DIR == 2) if (cyl_coord) then !Substituting the advective flux into the inviscid geometrical source flux - $:LOOP() + $:GPU_LOOP() do i = 1, E_idx flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) end do @@ -2921,7 +2921,7 @@ contains (1._wp - dir_flg(idx1))* & vel_R(idx1)) - vel_R(idx1)))) ! Geometrical source of the void fraction(s) is zero - $:LOOP() + $:GPU_LOOP() do i = advxb, advxe flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp end do @@ -2929,7 +2929,7 @@ contains #:endif #:if (NORM_DIR == 3) if (grid_geometry == 3) then - $:LOOP() + $:GPU_LOOP() do i = 1, sys_size flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp end do @@ -3066,7 +3066,7 @@ contains #:for NORM_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (norm_dir == ${NORM_DIR}$) then - $:PARALLEL_LOOP(collapse=3, private=["alpha_rho_L", & + $:GPU_PARALLEL_LOOP(collapse=3, private=["alpha_rho_L", & "alpha_rho_R", "vel", "alpha_L", "alpha_R", "rho", "pres", & "E", "H_no_mag", "gamma", "pi_inf", "qv", "vel_rms", "B", & "c", "c_fast", "pres_mag", "U_L", "U_R", "U_starL", & @@ -3121,7 +3121,7 @@ contains ! Sum properties of all fluid components rho%L = 0._wp; gamma%L = 0._wp; pi_inf%L = 0._wp; qv%L = 0._wp rho%R = 0._wp; gamma%R = 0._wp; pi_inf%R = 0._wp; qv%R = 0._wp - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids rho%L = rho%L + alpha_rho_L(i) gamma%L = gamma%L + alpha_L(i)*gammas(i) @@ -3291,7 +3291,7 @@ contains ! Energy flux_rs${XYZ}$_vf(j, k, l, E_idx) = F_hlld(7) ! Partial fraction - $:LOOP() + $:GPU_LOOP() do i = advxb, advxe flux_rs${XYZ}$_vf(j, k, l, i) = 0._wp ! TODO multi-component (zero for now) end do @@ -3323,7 +3323,7 @@ contains do i = 1, num_fluids Gs(i) = fluid_pp(i)%G end do - $:UPDATE(device=["Gs"]) + $:GPU_UPDATE(device=["Gs"]) if (viscous) then @:ALLOCATE(Res(1:2, 1:maxval(Re_size))) @@ -3335,7 +3335,7 @@ contains Res(i, j) = fluid_pp(Re_idx(i, j))%Re(i) end do end do - $:UPDATE(device=["Res","Re_idx","Re_size"]) + $:GPU_UPDATE(device=["Res","Re_idx","Re_size"]) end if !$acc enter data copyin(is1, is2, is3, isx, isy, isz) @@ -3481,7 +3481,7 @@ contains dir_idx = (/3, 1, 2/); dir_flg = (/0._wp, 0._wp, 1._wp/) end if - $:UPDATE(device=["is1","is2","is3"]) + $:GPU_UPDATE(device=["is1","is2","is3"]) if (elasticity) then if (norm_dir == 1) then @@ -3495,15 +3495,15 @@ contains isx = ix; isy = iy; isz = iz ! for stuff in the same module - $:UPDATE(device=["isx","isy","isz"]) + $:GPU_UPDATE(device=["isx","isy","isz"]) ! for stuff in different modules - $:UPDATE(device=["dir_idx","dir_flg","dir_idx_tau"]) + $:GPU_UPDATE(device=["dir_idx","dir_flg","dir_idx_tau"]) ! Population of Buffers in x-direction if (norm_dir == 1) then if (bc_x%beg == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at beginning - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do i = 1, sys_size do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -3514,7 +3514,7 @@ contains end do if (viscous) then - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do k = isy%beg, isy%end @@ -3526,7 +3526,7 @@ contains end do if (n > 0) then - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do k = isy%beg, isy%end @@ -3538,7 +3538,7 @@ contains end do if (p > 0) then - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do k = isy%beg, isy%end @@ -3558,7 +3558,7 @@ contains if (bc_x%end == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at end - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do i = 1, sys_size do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -3570,7 +3570,7 @@ contains if (viscous) then - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do k = isy%beg, isy%end @@ -3582,7 +3582,7 @@ contains end do if (n > 0) then - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do k = isy%beg, isy%end @@ -3594,7 +3594,7 @@ contains end do if (p > 0) then - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do k = isy%beg, isy%end @@ -3617,7 +3617,7 @@ contains elseif (norm_dir == 2) then if (bc_y%beg == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at beginning - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do i = 1, sys_size do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -3629,7 +3629,7 @@ contains if (viscous) then - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do j = isx%beg, isx%end @@ -3639,7 +3639,7 @@ contains end do end do - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do j = isx%beg, isx%end @@ -3650,7 +3650,7 @@ contains end do if (p > 0) then - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do j = isx%beg, isx%end @@ -3667,7 +3667,7 @@ contains if (bc_y%end == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at end - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do i = 1, sys_size do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -3679,7 +3679,7 @@ contains if (viscous) then - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do j = isx%beg, isx%end @@ -3689,7 +3689,7 @@ contains end do end do - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do j = isx%beg, isx%end @@ -3700,7 +3700,7 @@ contains end do if (p > 0) then - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do j = isx%beg, isx%end @@ -3720,7 +3720,7 @@ contains else if (bc_z%beg == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at beginning - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do i = 1, sys_size do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -3731,7 +3731,7 @@ contains end do if (viscous) then - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe do k = isy%beg, isy%end do j = isx%beg, isx%end @@ -3740,7 +3740,7 @@ contains end do end do end do - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe do k = isy%beg, isy%end do j = isx%beg, isx%end @@ -3749,7 +3749,7 @@ contains end do end do end do - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe do k = isy%beg, isy%end do j = isx%beg, isx%end @@ -3764,7 +3764,7 @@ contains if (bc_z%end == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at end - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do i = 1, sys_size do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -3775,7 +3775,7 @@ contains end do if (viscous) then - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe do k = isy%beg, isy%end do j = isx%beg, isx%end @@ -3785,7 +3785,7 @@ contains end do end do - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe do k = isy%beg, isy%end do j = isx%beg, isx%end @@ -3795,7 +3795,7 @@ contains end do end do - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe do k = isy%beg, isy%end do j = isx%beg, isx%end @@ -3847,7 +3847,7 @@ contains if (viscous .or. (surface_tension)) then - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = momxb, E_idx do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -3861,7 +3861,7 @@ contains if (qbmm) then - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = 1, 4 do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -3877,7 +3877,7 @@ contains elseif (norm_dir == 2) then if (viscous .or. (surface_tension)) then - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = momxb, E_idx do l = is3%beg, is3%end do j = is1%beg, is1%end @@ -3890,7 +3890,7 @@ contains end if if (qbmm) then - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = 1, 4 do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -3906,7 +3906,7 @@ contains else if (viscous .or. (surface_tension)) then - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = momxb, E_idx do j = is1%beg, is1%end do k = is2%beg, is2%end @@ -3919,7 +3919,7 @@ contains end if if (qbmm) then - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = 1, 4 do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -3985,7 +3985,7 @@ contains integer :: i_vel !!< Loop iterator for velocity components. integer :: idx_rp(3) !!< Indices $(j,k,l)$ of 'right' point for averaging. - $:PARALLEL_LOOP(collapse=3, private=["idx_rp", "avg_v_int", & + $:GPU_PARALLEL_LOOP(collapse=3, private=["idx_rp", "avg_v_int", & "avg_dvdx_int", "avg_dvdy_int", "avg_dvdz_int", "Re_s", "Re_b", & "vel_src_int", "r_eff", "divergence_cyl", "stress_vector_shear", & "stress_normal_bulk", "div_v_term_const"]) @@ -3999,7 +3999,7 @@ contains ! Average velocities and their derivatives at the interface ! For cylindrical: x-dir ~ axial (z_cyl), y-dir ~ radial (r_cyl), z-dir ~ azimuthal (theta_cyl) - $:LOOP() + $:GPU_LOOP() do i_vel = 1, num_dims avg_v_int(i_vel) = 0.5_wp*(velL_vf(i_vel)%sf(j, k, l) + velR_vf(i_vel)%sf(idx_rp(1), idx_rp(2), idx_rp(3))) @@ -4077,7 +4077,7 @@ contains end if end select - $:LOOP() + $:GPU_LOOP() do i_vel = 1, num_dims flux_src_vf(momxb + i_vel - 1)%sf(j, k, l) = flux_src_vf(momxb + i_vel - 1)%sf(j, k, l) - stress_vector_shear(i_vel) flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - vel_src_int(i_vel)*stress_vector_shear(i_vel) @@ -4148,7 +4148,7 @@ contains real(wp) :: divergence_v !< Velocity divergence at interface. - $:PARALLEL_LOOP(collapse=3, private=["idx_right_phys", "vel_grad_avg", & + $:GPU_PARALLEL_LOOP(collapse=3, private=["idx_right_phys", "vel_grad_avg", & "current_tau_shear", "current_tau_bulk", "vel_src_at_interface", & "Re_shear", "Re_bulk", "divergence_v", "i_dim", "vel_comp_idx"]) do l_loop = isz%beg, isz%end @@ -4242,7 +4242,7 @@ contains !! @param[in] divergence_v Velocity divergence (du/dx + dv/dy + dw/dz). !! @param[out] tau_shear_out Calculated shear stress tensor (stress on i-face, j-direction). pure subroutine s_calculate_shear_stress_tensor(vel_grad_avg, Re_shear, divergence_v, tau_shear_out) - $:ROUTINE() + $:GPU_ROUTINE() implicit none @@ -4276,7 +4276,7 @@ contains !! @param[in] divergence_v Velocity divergence (du/dx + dv/dy + dw/dz). !! @param[out] tau_bulk_out Calculated bulk stress tensor (stress on i-face, i-direction). pure subroutine s_calculate_bulk_stress_tensor(Re_bulk, divergence_v, tau_bulk_out) - $:ROUTINE() + $:GPU_ROUTINE() implicit none @@ -4316,7 +4316,7 @@ contains ! Reshaping Outputted Data in y-direction if (norm_dir == 2) then - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size do l = is3%beg, is3%end do j = is1%beg, is1%end @@ -4329,7 +4329,7 @@ contains end do if (cyl_coord) then - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size do l = is3%beg, is3%end do j = is1%beg, is1%end @@ -4342,7 +4342,7 @@ contains end do end if - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = is3%beg, is3%end do j = is1%beg, is1%end do k = is2%beg, is2%end @@ -4353,7 +4353,7 @@ contains end do if (riemann_solver == 1 .or. riemann_solver == 4) then - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = advxb + 1, advxe do l = is3%beg, is3%end do j = is1%beg, is1%end @@ -4368,7 +4368,7 @@ contains end if ! Reshaping Outputted Data in z-direction elseif (norm_dir == 3) then - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size do j = is1%beg, is1%end do k = is2%beg, is2%end @@ -4381,7 +4381,7 @@ contains end do end do if (grid_geometry == 3) then - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size do j = is1%beg, is1%end do k = is2%beg, is2%end @@ -4395,7 +4395,7 @@ contains end do end if - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do j = is1%beg, is1%end do k = is2%beg, is2%end do l = is3%beg, is3%end @@ -4406,7 +4406,7 @@ contains end do if (riemann_solver == 1 .or. riemann_solver == 4) then - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = advxb + 1, advxe do j = is1%beg, is1%end do k = is2%beg, is2%end @@ -4420,7 +4420,7 @@ contains end if elseif (norm_dir == 1) then - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -4432,7 +4432,7 @@ contains end do end do - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = is3%beg, is3%end do k = is2%beg, is2%end do j = is1%beg, is1%end @@ -4443,7 +4443,7 @@ contains end do if (riemann_solver == 1 .or. riemann_solver == 4) then - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = advxb + 1, advxe do l = is3%beg, is3%end do k = is2%beg, is2%end diff --git a/src/simulation/m_sim_helpers.fpp b/src/simulation/m_sim_helpers.fpp index 5dc7ee30a7..2ff31ea283 100644 --- a/src/simulation/m_sim_helpers.fpp +++ b/src/simulation/m_sim_helpers.fpp @@ -34,7 +34,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_enthalpy #else - $:ROUTINE() + $:GPU_ROUTINE() #endif type(scalar_field), intent(in), dimension(sys_size) :: q_prim_vf @@ -49,7 +49,7 @@ contains integer :: i - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids alpha_rho(i) = q_prim_vf(i)%sf(j, k, l) alpha(i) = q_prim_vf(E_idx + i)%sf(j, k, l) @@ -64,13 +64,13 @@ contains call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv, alpha, alpha_rho, Re) end if - $:LOOP() + $:GPU_LOOP() do i = 1, num_vels vel(i) = q_prim_vf(contxe + i)%sf(j, k, l) end do vel_sum = 0._wp - $:LOOP() + $:GPU_LOOP() do i = 1, num_vels vel_sum = vel_sum + vel(i)**2._wp end do @@ -99,7 +99,7 @@ contains !! @param vcfl_sf (optional) cell centered viscous cfl number !! @param Rc_sf (optional) cell centered Rc pure subroutine s_compute_stability_from_dt(vel, c, rho, Re_l, j, k, l, icfl_sf, vcfl_sf, Rc_sf) - $:ROUTINE() + $:GPU_ROUTINE() real(wp), intent(in), dimension(num_vels) :: vel real(wp), intent(in) :: c, rho real(wp), dimension(0:m, 0:n, 0:p), intent(inout) :: icfl_sf @@ -196,7 +196,7 @@ contains !! @param k y coordinate !! @param l z coordinate pure subroutine s_compute_dt_from_cfl(vel, c, max_dt, rho, Re_l, j, k, l) - $:ROUTINE() + $:GPU_ROUTINE() real(wp), dimension(num_vels), intent(in) :: vel real(wp), intent(in) :: c, rho real(wp), dimension(0:m, 0:n, 0:p), intent(inout) :: max_dt @@ -275,17 +275,17 @@ contains bc_type(1, -1)%sf(:, :, :) = bc_x%beg bc_type(1, 1)%sf(:, :, :) = bc_x%end - $:UPDATE(device=["bc_type(1,-1)%sf","bc_type(1,1)%sf"]) + $:GPU_UPDATE(device=["bc_type(1,-1)%sf","bc_type(1,1)%sf"]) if (n > 0) then bc_type(2, -1)%sf(:, :, :) = bc_y%beg bc_type(2, 1)%sf(:, :, :) = bc_y%end - $:UPDATE(device=["bc_type(2,-1)%sf","bc_type(2,1)%sf"]) + $:GPU_UPDATE(device=["bc_type(2,-1)%sf","bc_type(2,1)%sf"]) if (p > 0) then bc_type(3, -1)%sf(:, :, :) = bc_z%beg bc_type(3, 1)%sf(:, :, :) = bc_z%end - $:UPDATE(device=["bc_type(3,-1)%sf","bc_type(3,1)%sf"]) + $:GPU_UPDATE(device=["bc_type(3,-1)%sf","bc_type(3,1)%sf"]) end if end if diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index bd4ea50666..981c9f8020 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -1263,12 +1263,12 @@ contains if (cfl_dt) then if ((mytime + dt) >= t_stop) then dt = t_stop - mytime - $:UPDATE(device=["dt"]) + $:GPU_UPDATE(device=["dt"]) end if else if ((mytime + dt) >= finaltime) then dt = finaltime - mytime - $:UPDATE(device=["dt"]) + $:GPU_UPDATE(device=["dt"]) end if end if @@ -1292,7 +1292,7 @@ contains if (probe_wrt) then do i = 1, sys_size - $:UPDATE(host=["q_cons_ts(1)%vf(i)%sf"]) + $:GPU_UPDATE(host=["q_cons_ts(1)%vf(i)%sf"]) end do end if @@ -1395,7 +1395,7 @@ contains call cpu_time(start) call nvtxStartRange("SAVE-DATA") do i = 1, sys_size - $:UPDATE(host=["q_cons_ts(1)%vf(i)%sf"]) + $:GPU_UPDATE(host=["q_cons_ts(1)%vf(i)%sf"]) do l = 0, p do k = 0, n do j = 0, m @@ -1409,8 +1409,8 @@ contains end do if (qbmm .and. .not. polytropic) then - $:UPDATE(host=["pb_ts(1)%sf"]) - $:UPDATE(host=["mv_ts(1)%sf"]) + $:GPU_UPDATE(host=["pb_ts(1)%sf"]) + $:GPU_UPDATE(host=["mv_ts(1)%sf"]) end if if (cfl_dt) then @@ -1420,16 +1420,16 @@ contains end if if (bubbles_lagrange) then - $:UPDATE(host=["intfc_rad"]) + $:GPU_UPDATE(host=["intfc_rad"]) do i = 1, nBubs if (ieee_is_nan(intfc_rad(i, 1)) .or. intfc_rad(i, 1) <= 0._wp) then call s_mpi_abort("Bubble radius is negative or NaN, please reduce dt.") end if end do - $:UPDATE(host=["q_beta%vf(1)%sf"]) + $:GPU_UPDATE(host=["q_beta%vf(1)%sf"]) call s_write_data_files(q_cons_ts(1)%vf, q_T_sf, q_prim_vf, save_count, q_beta%vf(1)) - $:UPDATE(host=["Rmax_stats","Rmin_stats","gas_p","gas_mv","intfc_vel"]) + $:GPU_UPDATE(host=["Rmax_stats","Rmin_stats","gas_p","gas_mv","intfc_vel"]) call s_write_restart_lag_bubbles(save_count) !parallel if (lag_params%write_bubbles_stats) call s_write_lag_bubble_stats() else @@ -1626,44 +1626,44 @@ contains integer :: i !Update GPU DATA do i = 1, sys_size - $:UPDATE(device=["q_cons_ts(1)%vf(i)%sf"]) + $:GPU_UPDATE(device=["q_cons_ts(1)%vf(i)%sf"]) end do if (qbmm .and. .not. polytropic) then - $:UPDATE(device=["pb_ts(1)%sf","mv_ts(1)%sf"]) + $:GPU_UPDATE(device=["pb_ts(1)%sf","mv_ts(1)%sf"]) end if if (chemistry) then - $:UPDATE(device=["q_T_sf%sf"]) + $:GPU_UPDATE(device=["q_T_sf%sf"]) end if - $:UPDATE(device=["nb","R0ref","Ca","Web","Re_inv","weight","R0","V0", & + $:GPU_UPDATE(device=["nb","R0ref","Ca","Web","Re_inv","weight","R0","V0", & & "bubbles_euler","polytropic","polydisperse","qbmm","R0_type", & & "ptil","bubble_model","thermal","poly_sigma","adv_n","adap_dt", & & "adap_dt_tol","n_idx","pi_fac","low_Mach"]) - $:UPDATE(device=["R_n","R_v","phi_vn","phi_nv","Pe_c","Tw","pv","M_n", & + $:GPU_UPDATE(device=["R_n","R_v","phi_vn","phi_nv","Pe_c","Tw","pv","M_n", & & "M_v","k_n","k_v","pb0","mass_n0","mass_v0","Pe_T","Re_trans_T", & & "Re_trans_c","Im_trans_T","Im_trans_c","omegaN","mul0","ss", & & "gamma_v","mu_v","gamma_m","gamma_n","mu_n","gam"]) - $:UPDATE(device=["acoustic_source", "num_source"]) - $:UPDATE(device=["sigma", "surface_tension"]) + $:GPU_UPDATE(device=["acoustic_source", "num_source"]) + $:GPU_UPDATE(device=["sigma", "surface_tension"]) - $:UPDATE(device=["dx","dy","dz","x_cb","x_cc","y_cb","y_cc","z_cb","z_cc"]) + $:GPU_UPDATE(device=["dx","dy","dz","x_cb","x_cc","y_cb","y_cc","z_cb","z_cc"]) - $:UPDATE(device=["bc_x%vb1","bc_x%vb2","bc_x%vb3","bc_x%ve1","bc_x%ve2","bc_x%ve3"]) - $:UPDATE(device=["bc_y%vb1","bc_y%vb2","bc_y%vb3","bc_y%ve1","bc_y%ve2","bc_y%ve3"]) - $:UPDATE(device=["bc_z%vb1","bc_z%vb2","bc_z%vb3","bc_z%ve1","bc_z%ve2","bc_z%ve3"]) + $:GPU_UPDATE(device=["bc_x%vb1","bc_x%vb2","bc_x%vb3","bc_x%ve1","bc_x%ve2","bc_x%ve3"]) + $:GPU_UPDATE(device=["bc_y%vb1","bc_y%vb2","bc_y%vb3","bc_y%ve1","bc_y%ve2","bc_y%ve3"]) + $:GPU_UPDATE(device=["bc_z%vb1","bc_z%vb2","bc_z%vb3","bc_z%ve1","bc_z%ve2","bc_z%ve3"]) - $:UPDATE(device=["bc_x%grcbc_in","bc_x%grcbc_out","bc_x%grcbc_vel_out"]) - $:UPDATE(device=["bc_y%grcbc_in","bc_y%grcbc_out","bc_y%grcbc_vel_out"]) - $:UPDATE(device=["bc_z%grcbc_in","bc_z%grcbc_out","bc_z%grcbc_vel_out"]) + $:GPU_UPDATE(device=["bc_x%grcbc_in","bc_x%grcbc_out","bc_x%grcbc_vel_out"]) + $:GPU_UPDATE(device=["bc_y%grcbc_in","bc_y%grcbc_out","bc_y%grcbc_vel_out"]) + $:GPU_UPDATE(device=["bc_z%grcbc_in","bc_z%grcbc_out","bc_z%grcbc_vel_out"]) - $:UPDATE(device=["relax", "relax_model"]) + $:GPU_UPDATE(device=["relax", "relax_model"]) if (relax) then - $:UPDATE(device=["palpha_eps", "ptgalpha_eps"]) + $:GPU_UPDATE(device=["palpha_eps", "ptgalpha_eps"]) end if if (ib) then - $:UPDATE(device=["ib_markers%sf"]) + $:GPU_UPDATE(device=["ib_markers%sf"]) end if end subroutine s_initialize_gpu_vars diff --git a/src/simulation/m_surface_tension.fpp b/src/simulation/m_surface_tension.fpp index 6631933e2b..4433fc9926 100644 --- a/src/simulation/m_surface_tension.fpp +++ b/src/simulation/m_surface_tension.fpp @@ -30,16 +30,16 @@ module m_surface_tension !> @{ type(scalar_field), allocatable, dimension(:) :: c_divs !> @) - $:DECLARE(create=["c_divs"]) + $:GPU_DECLARE(create=["c_divs"]) !> @name cell boundary reconstructed gradient components and magnitude !> @{ real(wp), allocatable, dimension(:, :, :, :) :: gL_x, gR_x, gL_y, gR_y, gL_z, gR_z !> @} - $:DECLARE(create=["gL_x","gR_x","gL_y","gR_y","gL_z","gR_z"]) + $:GPU_DECLARE(create=["gL_x","gR_x","gL_y","gR_y","gL_z","gR_z"]) type(int_bounds_info) :: is1, is2, is3, iv - $:DECLARE(create=["is1","is2","is3","iv"]) + $:GPU_DECLARE(create=["is1","is2","is3","iv"]) contains @@ -86,7 +86,7 @@ contains integer :: j, k, l, i if (id == 1) then - $:PARALLEL_LOOP(collapse=3, private=["Omega", "w1L", "w2L", "w3L", & + $:GPU_PARALLEL_LOOP(collapse=3, private=["Omega", "w1L", "w2L", "w3L", & "w1R", "w2R", "w3R", "w1", "w2", "w3", "normWL", & "normWR", "normW"]) do l = isz%beg, isz%end @@ -133,7 +133,7 @@ contains elseif (id == 2) then - $:PARALLEL_LOOP(collapse=3, private=["Omega", "w1L", "w2L", "w3L", & + $:GPU_PARALLEL_LOOP(collapse=3, private=["Omega", "w1L", "w2L", "w3L", & "w1R", "w2R", "w3R", "w1", "w2", "w3", "normWL", "normWR", & "normW"]) do l = isz%beg, isz%end @@ -180,7 +180,7 @@ contains elseif (id == 3) then - $:PARALLEL_LOOP(collapse=3, private=["Omega", "w1L", "w2L", "w3L", & + $:GPU_PARALLEL_LOOP(collapse=3, private=["Omega", "w1L", "w2L", "w3L", & "w1R", "w2R", "w3R", "w1", "w2", "w3", "normWL", "normWR", & "normW"]) do l = isz%beg, isz%end @@ -244,7 +244,7 @@ contains isx%end = m; isy%end = n; isz%end = p ! compute gradient components - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -254,7 +254,7 @@ contains end do end do - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -265,7 +265,7 @@ contains end do if (p > 0) then - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -276,7 +276,7 @@ contains end do end if - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -336,10 +336,10 @@ contains end if - $:UPDATE(device=["is1","is2","is3","iv"]) + $:GPU_UPDATE(device=["is1","is2","is3","iv"]) if (recon_dir == 1) then - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -352,7 +352,7 @@ contains end do !$acc end parallel loop else if (recon_dir == 2) then - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -365,7 +365,7 @@ contains end do !$acc end parallel loop else if (recon_dir == 3) then - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end do l = is3%beg, is3%end do k = is2%beg, is2%end diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index b9d2594a05..4d9e28e6a5 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -78,7 +78,7 @@ module m_time_steppers integer, private :: num_ts !< !! Number of time stages in the time-stepping scheme - $:DECLARE(create=["q_cons_ts","q_prim_vf","q_T_sf","rhs_vf","q_prim_ts","rhs_mv","rhs_pb","max_dt"]) + $:GPU_DECLARE(create=["q_cons_ts","q_prim_vf","q_T_sf","rhs_vf","q_prim_ts","rhs_mv","rhs_pb","max_dt"]) contains @@ -380,7 +380,7 @@ contains if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=1) - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size do l = 0, p do k = 0, n @@ -395,7 +395,7 @@ contains !Evolve pb and mv for non-polytropic qbmm if (qbmm .and. (.not. polytropic)) then - $:PARALLEL_LOOP(collapse=5) + $:GPU_PARALLEL_LOOP(collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -412,7 +412,7 @@ contains end if if (qbmm .and. (.not. polytropic)) then - $:PARALLEL_LOOP(collapse=5) + $:GPU_PARALLEL_LOOP(collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -482,7 +482,7 @@ contains if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=1) - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size do l = 0, p do k = 0, n @@ -497,7 +497,7 @@ contains !Evolve pb and mv for non-polytropic qbmm if (qbmm .and. (.not. polytropic)) then - $:PARALLEL_LOOP(collapse=5) + $:GPU_PARALLEL_LOOP(collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -514,7 +514,7 @@ contains end if if (qbmm .and. (.not. polytropic)) then - $:PARALLEL_LOOP(collapse=5) + $:GPU_PARALLEL_LOOP(collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -554,7 +554,7 @@ contains if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=2) - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size do l = 0, p do k = 0, n @@ -569,7 +569,7 @@ contains end do if (qbmm .and. (.not. polytropic)) then - $:PARALLEL_LOOP(collapse=5) + $:GPU_PARALLEL_LOOP(collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -587,7 +587,7 @@ contains end if if (qbmm .and. (.not. polytropic)) then - $:PARALLEL_LOOP(collapse=5) + $:GPU_PARALLEL_LOOP(collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -664,7 +664,7 @@ contains if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=1) - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size do l = 0, p do k = 0, n @@ -679,7 +679,7 @@ contains !Evolve pb and mv for non-polytropic qbmm if (qbmm .and. (.not. polytropic)) then - $:PARALLEL_LOOP(collapse=5) + $:GPU_PARALLEL_LOOP(collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -696,7 +696,7 @@ contains end if if (qbmm .and. (.not. polytropic)) then - $:PARALLEL_LOOP(collapse=5) + $:GPU_PARALLEL_LOOP(collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -736,7 +736,7 @@ contains if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=2) - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size do l = 0, p do k = 0, n @@ -751,7 +751,7 @@ contains end do if (qbmm .and. (.not. polytropic)) then - $:PARALLEL_LOOP(collapse=5) + $:GPU_PARALLEL_LOOP(collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -769,7 +769,7 @@ contains end if if (qbmm .and. (.not. polytropic)) then - $:PARALLEL_LOOP(collapse=5) + $:GPU_PARALLEL_LOOP(collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -809,7 +809,7 @@ contains if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=3) - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size do l = 0, p do k = 0, n @@ -824,7 +824,7 @@ contains end do if (qbmm .and. (.not. polytropic)) then - $:PARALLEL_LOOP(collapse=5) + $:GPU_PARALLEL_LOOP(collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -842,7 +842,7 @@ contains end if if (qbmm .and. (.not. polytropic)) then - $:PARALLEL_LOOP(collapse=5) + $:GPU_PARALLEL_LOOP(collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -949,7 +949,7 @@ contains if (stage == 3) then if (lag_params%write_bubbles_stats) call s_calculate_lag_bubble_stats() if (lag_params%write_bubbles) then - $:UPDATE(host=["gas_p","gas_mv","intfc_rad","intfc_vel"]) + $:GPU_UPDATE(host=["gas_p","gas_mv","intfc_rad","intfc_vel"]) call s_write_lag_particles(mytime) end if call s_write_void_evol(mytime) @@ -982,7 +982,7 @@ contains q_prim_vf, & idwint) - $:PARALLEL_LOOP(collapse=3, private=["vel", "alpha", "Re"]) + $:GPU_PARALLEL_LOOP(collapse=3, private=["vel", "alpha", "Re"]) do l = 0, p do k = 0, n do j = 0, m @@ -1006,7 +1006,7 @@ contains call s_mpi_allreduce_min(dt_local, dt) end if - $:UPDATE(device=["dt"]) + $:GPU_UPDATE(device=["dt"]) end subroutine s_compute_dt @@ -1025,7 +1025,7 @@ contains call nvtxStartRange("RHS-BODYFORCES") call s_compute_body_forces_rhs(q_prim_vf, q_cons_vf, rhs_vf) - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = momxb, E_idx do l = 0, p do k = 0, n @@ -1051,7 +1051,7 @@ contains integer :: i !< Generic loop iterator do i = 1, sys_size - $:UPDATE(host=["q_prim_vf(i)%sf"]) + $:GPU_UPDATE(host=["q_prim_vf(i)%sf"]) end do if (t_step == t_step_start) then diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp index 2cc8797e27..ba41303c22 100644 --- a/src/simulation/m_viscous.fpp +++ b/src/simulation/m_viscous.fpp @@ -25,10 +25,10 @@ module m_viscous type(int_bounds_info) :: iv type(int_bounds_info) :: is1_viscous, is2_viscous, is3_viscous - $:DECLARE(create=["is1_viscous","is2_viscous","is3_viscous","iv"]) + $:GPU_DECLARE(create=["is1_viscous","is2_viscous","is3_viscous","iv"]) real(wp), allocatable, dimension(:, :) :: Res_viscous - $:DECLARE(create=["Res_viscous"]) + $:GPU_DECLARE(create=["Res_viscous"]) contains @@ -43,7 +43,7 @@ contains Res_viscous(i, j) = fluid_pp(Re_idx(i, j))%Re(i) end do end do - $:UPDATE(device=["Res_viscous","Re_idx","Re_size"]) + $:GPU_UPDATE(device=["Res_viscous","Re_idx","Re_size"]) !$acc enter data copyin(is1_viscous, is2_viscous, is3_viscous, iv) end subroutine s_initialize_viscous_module @@ -76,13 +76,13 @@ contains is1_viscous = ix; is2_viscous = iy; is3_viscous = iz - $:UPDATE(device=["is1_viscous","is2_viscous","is3_viscous"]) + $:GPU_UPDATE(device=["is1_viscous","is2_viscous","is3_viscous"]) - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg, is1_viscous%end - $:LOOP() + $:GPU_LOOP() do i = momxb, E_idx tau_Re_vf(i)%sf(j, k, l) = 0._wp end do @@ -90,13 +90,13 @@ contains end do end do if (shear_stress) then ! Shear stresses - $:PARALLEL_LOOP(collapse=3, private=["alpha_visc", & + $:GPU_PARALLEL_LOOP(collapse=3, private=["alpha_visc", & "alpha_rho_visc", "Re_visc", "tau_Re"]) do l = is3_viscous%beg, is3_viscous%end do k = -1, 1 do j = is1_viscous%beg, is1_viscous%end - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l) if (bubbles_euler .and. num_fluids == 1) then @@ -112,14 +112,14 @@ contains pi_inf_visc = 0._wp if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) end do else if ((model_eqns == 2) .and. (num_fluids > 2)) then - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids - 1 rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) @@ -138,7 +138,7 @@ contains alpha_visc_sum = 0._wp if (mpp_lim) then - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i)) alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp) @@ -149,7 +149,7 @@ contains end if - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) @@ -157,12 +157,12 @@ contains end do if (viscous) then - $:LOOP() + $:GPU_LOOP() do i = 1, 2 Re_visc(i) = dflt_real if (Re_size(i) > 0) Re_visc(i) = 0._wp - $:LOOP() + $:GPU_LOOP() do q = 1, Re_size(i) Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) & + Re_visc(i) @@ -182,7 +182,7 @@ contains - 2._wp*grad_x_vf(1)%sf(j, k, l) & - 2._wp*q_prim_vf(momxb + 1)%sf(j, k, l)/y_cc(k))/ & (3._wp*Re_visc(1)) - $:LOOP() + $:GPU_LOOP() do i = 1, 2 tau_Re_vf(contxe + i)%sf(j, k, l) = & tau_Re_vf(contxe + i)%sf(j, k, l) - & @@ -198,13 +198,13 @@ contains end if if (bulk_stress) then ! Bulk stresses - $:PARALLEL_LOOP(collapse=3, private=["alpha_visc", & + $:GPU_PARALLEL_LOOP(collapse=3, private=["alpha_visc", & "alpha_rho_visc", "Re_visc", "tau_Re"]) do l = is3_viscous%beg, is3_viscous%end do k = -1, 1 do j = is1_viscous%beg, is1_viscous%end - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l) if (bubbles_euler .and. num_fluids == 1) then @@ -220,14 +220,14 @@ contains pi_inf_visc = 0._wp if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) end do else if ((model_eqns == 2) .and. (num_fluids > 2)) then - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids - 1 rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) @@ -246,7 +246,7 @@ contains alpha_visc_sum = 0._wp if (mpp_lim) then - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i)) alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp) @@ -257,7 +257,7 @@ contains end if - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) @@ -265,12 +265,12 @@ contains end do if (viscous) then - $:LOOP() + $:GPU_LOOP() do i = 1, 2 Re_visc(i) = dflt_real if (Re_size(i) > 0) Re_visc(i) = 0._wp - $:LOOP() + $:GPU_LOOP() do q = 1, Re_size(i) Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) & + Re_visc(i) @@ -303,13 +303,13 @@ contains if (p == 0) return if (shear_stress) then ! Shear stresses - $:PARALLEL_LOOP(collapse=3, private=["alpha_visc", & + $:GPU_PARALLEL_LOOP(collapse=3, private=["alpha_visc", & "alpha_rho_visc", "Re_visc", "tau_Re"]) do l = is3_viscous%beg, is3_viscous%end do k = -1, 1 do j = is1_viscous%beg, is1_viscous%end - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l) if (bubbles_euler .and. num_fluids == 1) then @@ -325,14 +325,14 @@ contains pi_inf_visc = 0._wp if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) end do else if ((model_eqns == 2) .and. (num_fluids > 2)) then - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids - 1 rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) @@ -351,7 +351,7 @@ contains alpha_visc_sum = 0._wp if (mpp_lim) then - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i)) alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp) @@ -362,7 +362,7 @@ contains end if - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) @@ -370,12 +370,12 @@ contains end do if (viscous) then - $:LOOP() + $:GPU_LOOP() do i = 1, 2 Re_visc(i) = dflt_real if (Re_size(i) > 0) Re_visc(i) = 0._wp - $:LOOP() + $:GPU_LOOP() do q = 1, Re_size(i) Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) & + Re_visc(i) @@ -395,7 +395,7 @@ contains y_cc(k) + grad_y_vf(3)%sf(j, k, l))/ & Re_visc(1) - $:LOOP() + $:GPU_LOOP() do i = 2, 3 tau_Re_vf(contxe + i)%sf(j, k, l) = & tau_Re_vf(contxe + i)%sf(j, k, l) - & @@ -412,13 +412,13 @@ contains end if if (bulk_stress) then ! Bulk stresses - $:PARALLEL_LOOP(collapse=3, private=["alpha_visc", & + $:GPU_PARALLEL_LOOP(collapse=3, private=["alpha_visc", & "alpha_rho_visc", "Re_visc", "tau_Re"]) do l = is3_viscous%beg, is3_viscous%end do k = -1, 1 do j = is1_viscous%beg, is1_viscous%end - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l) if (bubbles_euler .and. num_fluids == 1) then @@ -434,14 +434,14 @@ contains pi_inf_visc = 0._wp if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) end do else if ((model_eqns == 2) .and. (num_fluids > 2)) then - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids - 1 rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) @@ -460,7 +460,7 @@ contains alpha_visc_sum = 0._wp if (mpp_lim) then - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i)) alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp) @@ -471,7 +471,7 @@ contains end if - $:LOOP() + $:GPU_LOOP() do i = 1, num_fluids rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) @@ -479,12 +479,12 @@ contains end do if (viscous) then - $:LOOP() + $:GPU_LOOP() do i = 1, 2 Re_visc(i) = dflt_real if (Re_size(i) > 0) Re_visc(i) = 0._wp - $:LOOP() + $:GPU_LOOP() do q = 1, Re_size(i) Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) & + Re_visc(i) @@ -550,7 +550,7 @@ contains iv%beg = mom_idx%beg; iv%end = mom_idx%end - $:UPDATE(device=["iv"]) + $:GPU_UPDATE(device=["iv"]) call s_reconstruct_cell_boundary_values_visc( & q_prim_qp%vf(iv%beg:iv%end), & @@ -588,17 +588,17 @@ contains else ! Compute velocity gradient at cell centers using finite differences iv%beg = mom_idx%beg; iv%end = mom_idx%end - $:UPDATE(device=["iv"]) + $:GPU_UPDATE(device=["iv"]) is1_viscous = ix; is2_viscous = iy; is3_viscous = iz - $:UPDATE(device=["is1_viscous","is2_viscous","is3_viscous"]) + $:GPU_UPDATE(device=["is1_viscous","is2_viscous","is3_viscous"]) - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = iy%beg, iy%end do j = is1_viscous%beg + 1, is1_viscous%end - $:LOOP() + $:GPU_LOOP() do i = iv%beg, iv%end dqL_prim_dx_n(1)%vf(i)%sf(j, k, l) = & (q_prim_qp%vf(i)%sf(j, k, l) - & @@ -609,11 +609,11 @@ contains end do end do - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg, is1_viscous%end - 1 - $:LOOP() + $:GPU_LOOP() do i = iv%beg, iv%end dqR_prim_dx_n(1)%vf(i)%sf(j, k, l) = & (q_prim_qp%vf(i)%sf(j + 1, k, l) - & @@ -626,11 +626,11 @@ contains if (n > 0) then - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end do j = is2_viscous%beg + 1, is2_viscous%end do k = is1_viscous%beg, is1_viscous%end - $:LOOP() + $:GPU_LOOP() do i = iv%beg, iv%end dqL_prim_dy_n(2)%vf(i)%sf(k, j, l) = & (q_prim_qp%vf(i)%sf(k, j, l) - & @@ -641,11 +641,11 @@ contains end do end do - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end do j = is2_viscous%beg, is2_viscous%end - 1 do k = is1_viscous%beg, is1_viscous%end - $:LOOP() + $:GPU_LOOP() do i = iv%beg, iv%end dqR_prim_dy_n(2)%vf(i)%sf(k, j, l) = & (q_prim_qp%vf(i)%sf(k, j + 1, l) - & @@ -656,11 +656,11 @@ contains end do end do - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end do j = is2_viscous%beg + 1, is2_viscous%end do k = is1_viscous%beg + 1, is1_viscous%end - 1 - $:LOOP() + $:GPU_LOOP() do i = iv%beg, iv%end dqL_prim_dx_n(2)%vf(i)%sf(k, j, l) = & (dqL_prim_dx_n(1)%vf(i)%sf(k, j, l) + & @@ -675,11 +675,11 @@ contains end do end do - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end do j = is2_viscous%beg, is2_viscous%end - 1 do k = is1_viscous%beg + 1, is1_viscous%end - 1 - $:LOOP() + $:GPU_LOOP() do i = iv%beg, iv%end dqR_prim_dx_n(2)%vf(i)%sf(k, j, l) = & (dqL_prim_dx_n(1)%vf(i)%sf(k, j + 1, l) + & @@ -695,11 +695,11 @@ contains end do end do - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg + 1, is2_viscous%end - 1 do j = is1_viscous%beg + 1, is1_viscous%end - $:LOOP() + $:GPU_LOOP() do i = iv%beg, iv%end dqL_prim_dy_n(1)%vf(i)%sf(j, k, l) = & (dqL_prim_dy_n(2)%vf(i)%sf(j, k, l) + & @@ -715,11 +715,11 @@ contains end do end do - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg + 1, is2_viscous%end - 1 do j = is1_viscous%beg, is1_viscous%end - 1 - $:LOOP() + $:GPU_LOOP() do i = iv%beg, iv%end dqR_prim_dy_n(1)%vf(i)%sf(j, k, l) = & (dqL_prim_dy_n(2)%vf(i)%sf(j + 1, k, l) + & @@ -737,11 +737,11 @@ contains if (p > 0) then - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do j = is3_viscous%beg + 1, is3_viscous%end do l = is2_viscous%beg, is2_viscous%end do k = is1_viscous%beg, is1_viscous%end - $:LOOP() + $:GPU_LOOP() do i = iv%beg, iv%end dqL_prim_dz_n(3)%vf(i)%sf(k, l, j) = & @@ -753,11 +753,11 @@ contains end do end do - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do j = is3_viscous%beg, is3_viscous%end - 1 do l = is2_viscous%beg, is2_viscous%end do k = is1_viscous%beg, is1_viscous%end - $:LOOP() + $:GPU_LOOP() do i = iv%beg, iv%end dqR_prim_dz_n(3)%vf(i)%sf(k, l, j) = & @@ -769,11 +769,11 @@ contains end do end do - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg + 1, is3_viscous%end - 1 do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg + 1, is1_viscous%end - $:LOOP() + $:GPU_LOOP() do i = iv%beg, iv%end dqL_prim_dz_n(1)%vf(i)%sf(j, k, l) = & @@ -790,11 +790,11 @@ contains end do end do - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg + 1, is3_viscous%end - 1 do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg, is1_viscous%end - 1 - $:LOOP() + $:GPU_LOOP() do i = iv%beg, iv%end dqR_prim_dz_n(1)%vf(i)%sf(j, k, l) = & @@ -811,11 +811,11 @@ contains end do end do - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg + 1, is3_viscous%end - 1 do j = is2_viscous%beg + 1, is2_viscous%end do k = is1_viscous%beg, is1_viscous%end - $:LOOP() + $:GPU_LOOP() do i = iv%beg, iv%end dqL_prim_dz_n(2)%vf(i)%sf(k, j, l) = & @@ -832,11 +832,11 @@ contains end do end do - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg + 1, is3_viscous%end - 1 do j = is2_viscous%beg, is2_viscous%end - 1 do k = is1_viscous%beg, is1_viscous%end - $:LOOP() + $:GPU_LOOP() do i = iv%beg, iv%end dqR_prim_dz_n(2)%vf(i)%sf(k, j, l) = & @@ -853,11 +853,11 @@ contains end do end do - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do j = is3_viscous%beg + 1, is3_viscous%end do l = is2_viscous%beg + 1, is2_viscous%end - 1 do k = is1_viscous%beg, is1_viscous%end - $:LOOP() + $:GPU_LOOP() do i = iv%beg, iv%end dqL_prim_dy_n(3)%vf(i)%sf(k, l, j) = & @@ -874,11 +874,11 @@ contains end do end do - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do j = is3_viscous%beg, is3_viscous%end - 1 do l = is2_viscous%beg + 1, is2_viscous%end - 1 do k = is1_viscous%beg, is1_viscous%end - $:LOOP() + $:GPU_LOOP() do i = iv%beg, iv%end dqR_prim_dy_n(3)%vf(i)%sf(k, l, j) = & @@ -894,11 +894,11 @@ contains end do end do end do - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do j = is3_viscous%beg + 1, is3_viscous%end do l = is2_viscous%beg, is2_viscous%end do k = is1_viscous%beg + 1, is1_viscous%end - 1 - $:LOOP() + $:GPU_LOOP() do i = iv%beg, iv%end dqL_prim_dx_n(3)%vf(i)%sf(k, l, j) = & @@ -914,11 +914,11 @@ contains end do end do end do - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do j = is3_viscous%beg, is3_viscous%end - 1 do l = is2_viscous%beg, is2_viscous%end do k = is1_viscous%beg + 1, is1_viscous%end - 1 - $:LOOP() + $:GPU_LOOP() do i = iv%beg, iv%end dqR_prim_dx_n(3)%vf(i)%sf(k, l, j) = & (dqL_prim_dx_n(1)%vf(i)%sf(k, l, j + 1) + & @@ -1024,7 +1024,7 @@ contains if (viscous) then if (weno_Re_flux) then if (norm_dir == 2) then - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end do l = is3_viscous%beg, is3_viscous%end do j = is1_viscous%beg, is1_viscous%end @@ -1036,7 +1036,7 @@ contains end do end do elseif (norm_dir == 3) then - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end do j = is1_viscous%beg, is1_viscous%end do k = is2_viscous%beg, is2_viscous%end @@ -1048,7 +1048,7 @@ contains end do end do elseif (norm_dir == 1) then - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg, is2_viscous%end @@ -1123,7 +1123,7 @@ contains if (viscous) then if (weno_Re_flux) then if (norm_dir == 2) then - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end do l = is3_viscous%beg, is3_viscous%end do j = is1_viscous%beg, is1_viscous%end @@ -1135,7 +1135,7 @@ contains end do end do elseif (norm_dir == 3) then - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end do j = is1_viscous%beg, is1_viscous%end do k = is2_viscous%beg, is2_viscous%end @@ -1147,7 +1147,7 @@ contains end do end do elseif (norm_dir == 1) then - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg, is2_viscous%end @@ -1211,11 +1211,11 @@ contains ! cell-boundaries, to calculate the cell-averaged first-order ! spatial derivatives inside the cell. - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg + 1, is1_viscous%end - 1 - $:LOOP() + $:GPU_LOOP() do i = iv%beg, iv%end dv_ds_vf(i)%sf(j, k, l) = & 1._wp/((1._wp + wa_flg)*dL(j)) & @@ -1239,11 +1239,11 @@ contains ! cell-boundaries, to calculate the cell-averaged first-order ! spatial derivatives inside the cell. - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg + 1, is2_viscous%end - 1 do j = is1_viscous%beg, is1_viscous%end - $:LOOP() + $:GPU_LOOP() do i = iv%beg, iv%end dv_ds_vf(i)%sf(j, k, l) = & 1._wp/((1._wp + wa_flg)*dL(k)) & @@ -1267,11 +1267,11 @@ contains ! cell-boundaries, to calculate the cell-averaged first-order ! spatial derivatives inside the cell. - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg + 1, is3_viscous%end - 1 do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg, is1_viscous%end - $:LOOP() + $:GPU_LOOP() do i = iv%beg, iv%end dv_ds_vf(i)%sf(j, k, l) = & 1._wp/((1._wp + wa_flg)*dL(l)) & @@ -1320,9 +1320,9 @@ contains is1_viscous = ix; is2_viscous = iy; is3_viscous = iz - $:UPDATE(device=["is1_viscous","is2_viscous","is3_viscous"]) + $:GPU_UPDATE(device=["is1_viscous","is2_viscous","is3_viscous"]) - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg, is1_viscous%end @@ -1334,7 +1334,7 @@ contains end do if (n > 0) then - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg, is1_viscous%end @@ -1347,7 +1347,7 @@ contains end if if (p > 0) then - $:PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg, is1_viscous%end @@ -1359,7 +1359,7 @@ contains end do end if - $:PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(collapse=2) do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end grad_x%sf(idwbuff(1)%beg, k, l) = & @@ -1371,7 +1371,7 @@ contains end do end do if (n > 0) then - $:PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(collapse=2) do l = idwbuff(3)%beg, idwbuff(3)%end do j = idwbuff(1)%beg, idwbuff(1)%end grad_y%sf(j, idwbuff(2)%beg, l) = & @@ -1383,7 +1383,7 @@ contains end do end do if (p > 0) then - $:PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(collapse=2) do k = idwbuff(2)%beg, idwbuff(2)%end do j = idwbuff(1)%beg, idwbuff(1)%end grad_z%sf(j, k, idwbuff(3)%beg) = & @@ -1398,7 +1398,7 @@ contains end if if (bc_x%beg <= BC_GHOST_EXTRAPOLATION) then - $:PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(collapse=2) do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end grad_x%sf(0, k, l) = (-3._wp*var%sf(0, k, l) + 4._wp*var%sf(1, k, l) - var%sf(2, k, l))/ & @@ -1407,7 +1407,7 @@ contains end do end if if (bc_x%end <= BC_GHOST_EXTRAPOLATION) then - $:PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(collapse=2) do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end grad_x%sf(m, k, l) = (3._wp*var%sf(m, k, l) - 4._wp*var%sf(m - 1, k, l) + var%sf(m - 2, k, l))/ & @@ -1417,7 +1417,7 @@ contains end if if (n > 0) then if (bc_y%beg <= BC_GHOST_EXTRAPOLATION .and. bc_y%beg /= BC_NULL) then - $:PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(collapse=2) do l = idwbuff(3)%beg, idwbuff(3)%end do j = idwbuff(1)%beg, idwbuff(1)%end grad_y%sf(j, 0, l) = (-3._wp*var%sf(j, 0, l) + 4._wp*var%sf(j, 1, l) - var%sf(j, 2, l))/ & @@ -1426,7 +1426,7 @@ contains end do end if if (bc_y%end <= BC_GHOST_EXTRAPOLATION) then - $:PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(collapse=2) do l = idwbuff(3)%beg, idwbuff(3)%end do j = idwbuff(1)%beg, idwbuff(1)%end grad_y%sf(j, n, l) = (3._wp*var%sf(j, n, l) - 4._wp*var%sf(j, n - 1, l) + var%sf(j, n - 2, l))/ & @@ -1436,7 +1436,7 @@ contains end if if (p > 0) then if (bc_z%beg <= BC_GHOST_EXTRAPOLATION) then - $:PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(collapse=2) do k = idwbuff(2)%beg, idwbuff(2)%end do j = idwbuff(1)%beg, idwbuff(1)%end grad_z%sf(j, k, 0) = & @@ -1446,7 +1446,7 @@ contains end do end if if (bc_z%end <= BC_GHOST_EXTRAPOLATION) then - $:PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(collapse=2) do k = idwbuff(2)%beg, idwbuff(2)%end do j = idwbuff(1)%beg, idwbuff(1)%end grad_z%sf(j, k, p) = & diff --git a/src/simulation/m_weno.fpp b/src/simulation/m_weno.fpp index 17e0583687..098a2b1836 100644 --- a/src/simulation/m_weno.fpp +++ b/src/simulation/m_weno.fpp @@ -43,7 +43,7 @@ module m_weno !> @{ real(wp), allocatable, dimension(:, :, :, :) :: v_rs_ws_x, v_rs_ws_y, v_rs_ws_z !> @} - $:DECLARE(create=["v_rs_ws_x","v_rs_ws_y","v_rs_ws_z"]) + $:GPU_DECLARE(create=["v_rs_ws_x","v_rs_ws_y","v_rs_ws_z"]) ! WENO Coefficients @@ -60,8 +60,8 @@ module m_weno real(wp), target, allocatable, dimension(:, :, :) :: poly_coef_cbR_y real(wp), target, allocatable, dimension(:, :, :) :: poly_coef_cbR_z !> @} - $:DECLARE(create=["poly_coef_cbL_x","poly_coef_cbL_y","poly_coef_cbL_z"]) - $:DECLARE(create=["poly_coef_cbR_x","poly_coef_cbR_y","poly_coef_cbR_z"]) + $:GPU_DECLARE(create=["poly_coef_cbL_x","poly_coef_cbL_y","poly_coef_cbL_z"]) + $:GPU_DECLARE(create=["poly_coef_cbR_x","poly_coef_cbR_y","poly_coef_cbR_z"]) !> @name The ideal weights at the left and the right cell-boundaries and at the !! left and the right quadrature points, in x-, y- and z-directions. Note @@ -76,7 +76,7 @@ module m_weno real(wp), target, allocatable, dimension(:, :) :: d_cbR_y real(wp), target, allocatable, dimension(:, :) :: d_cbR_z !> @} - $:DECLARE(create=["d_cbL_x","d_cbL_y","d_cbL_z","d_cbR_x","d_cbR_y","d_cbR_z"]) + $:GPU_DECLARE(create=["d_cbL_x","d_cbL_y","d_cbL_z","d_cbR_x","d_cbR_y","d_cbR_z"]) !> @name Smoothness indicator coefficients in the x-, y-, and z-directions. Note !! that the first array dimension identifies the smoothness indicator, the @@ -87,22 +87,22 @@ module m_weno real(wp), target, allocatable, dimension(:, :, :) :: beta_coef_y real(wp), target, allocatable, dimension(:, :, :) :: beta_coef_z !> @} - $:DECLARE(create=["beta_coef_x","beta_coef_y","beta_coef_z"]) + $:GPU_DECLARE(create=["beta_coef_x","beta_coef_y","beta_coef_z"]) ! END: WENO Coefficients integer :: v_size !< Number of WENO-reconstructed cell-average variables - $:DECLARE(create=["v_size"]) + $:GPU_DECLARE(create=["v_size"]) !> @name Indical bounds in the s1-, s2- and s3-directions !> @{ type(int_bounds_info) :: is1_weno, is2_weno, is3_weno - $:DECLARE(create=["is1_weno","is2_weno","is3_weno"]) + $:GPU_DECLARE(create=["is1_weno","is2_weno","is3_weno"]) ! !> @} real(wp) :: test - $:DECLARE(create=["test"]) + $:GPU_DECLARE(create=["test"]) contains @@ -625,11 +625,11 @@ contains #:endfor if (weno_dir == 1) then - $:UPDATE(device=["poly_coef_cbL_x","poly_coef_cbR_x","d_cbL_x","d_cbR_x","beta_coef_x"]) + $:GPU_UPDATE(device=["poly_coef_cbL_x","poly_coef_cbR_x","d_cbL_x","d_cbR_x","beta_coef_x"]) elseif (weno_dir == 2) then - $:UPDATE(device=["poly_coef_cbL_y","poly_coef_cbR_y","d_cbL_y","d_cbR_y","beta_coef_y"]) + $:GPU_UPDATE(device=["poly_coef_cbL_y","poly_coef_cbR_y","d_cbL_y","d_cbR_y","beta_coef_y"]) else - $:UPDATE(device=["poly_coef_cbL_z","poly_coef_cbR_z","d_cbL_z","d_cbR_z","beta_coef_z"]) + $:GPU_UPDATE(device=["poly_coef_cbL_z","poly_coef_cbR_z","d_cbL_z","d_cbR_z","beta_coef_z"]) end if ! Nullifying WENO coefficients and cell-boundary locations pointers @@ -663,7 +663,7 @@ contains is2_weno = is2_weno_d is3_weno = is3_weno_d - $:UPDATE(device=["is1_weno","is2_weno","is3_weno"]) + $:GPU_UPDATE(device=["is1_weno","is2_weno","is3_weno"]) if (weno_order /= 1) then call s_initialize_weno(v_vf, & @@ -672,7 +672,7 @@ contains if (weno_order == 1) then if (weno_dir == 1) then - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = 1, ubound(v_vf, 1) do l = is3_weno%beg, is3_weno%end do k = is2_weno%beg, is2_weno%end @@ -685,7 +685,7 @@ contains end do !$acc end parallel loop else if (weno_dir == 2) then - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = 1, ubound(v_vf, 1) do l = is3_weno%beg, is3_weno%end do k = is2_weno%beg, is2_weno%end @@ -698,7 +698,7 @@ contains end do !$acc end parallel loop else if (weno_dir == 3) then - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = 1, ubound(v_vf, 1) do l = is3_weno%beg, is3_weno%end do k = is2_weno%beg, is2_weno%end @@ -714,7 +714,7 @@ contains elseif (weno_order == 3) then #:for WENO_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (weno_dir == ${WENO_DIR}$) then - $:PARALLEL_LOOP(collapse=4,private=["beta","dvd","poly","omega","alpha","tau"]) + $:GPU_PARALLEL_LOOP(collapse=4,private=["beta","dvd","poly","omega","alpha","tau"]) do l = is3_weno%beg, is3_weno%end do k = is2_weno%beg, is2_weno%end do j = is1_weno%beg, is1_weno%end @@ -793,11 +793,11 @@ contains elseif (weno_order == 5) then #:for WENO_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (weno_dir == ${WENO_DIR}$) then - $:PARALLEL_LOOP(collapse=3,private=["dvd","poly","beta","alpha","omega","tau","delta"]) + $:GPU_PARALLEL_LOOP(collapse=3,private=["dvd","poly","beta","alpha","omega","tau","delta"]) do l = is3_weno%beg, is3_weno%end do k = is2_weno%beg, is2_weno%end do j = is1_weno%beg, is1_weno%end - $:LOOP() + $:GPU_LOOP() do i = 1, v_size ! reconstruct from left side @@ -913,11 +913,11 @@ contains elseif (weno_order == 7) then #:for WENO_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (weno_dir == ${WENO_DIR}$) then - $:PARALLEL_LOOP(collapse=3,private=["poly","beta","alpha","omega","tau","delta","dvd","v"]) + $:GPU_PARALLEL_LOOP(collapse=3,private=["poly","beta","alpha","omega","tau","delta","dvd","v"]) do l = is3_weno%beg, is3_weno%end do k = is2_weno%beg, is2_weno%end do j = is1_weno%beg, is1_weno%end - $:LOOP() + $:GPU_LOOP() do i = 1, v_size if (teno) v = v_rs_ws_${XYZ}$ (j - 3:j + 3, k, l, i) ! temporary field value array for clarity @@ -1133,10 +1133,10 @@ contains ! as to reshape the inputted data in the coordinate direction of ! the WENO reconstruction v_size = ubound(v_vf, 1) - $:UPDATE(device=["v_size"]) + $:GPU_UPDATE(device=["v_size"]) if (weno_dir == 1) then - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do j = 1, v_size do q = is3_weno%beg, is3_weno%end do l = is2_weno%beg, is2_weno%end @@ -1174,7 +1174,7 @@ contains end if else #endif - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do j = 1, v_size do q = is3_weno%beg, is3_weno%end do l = is2_weno%beg, is2_weno%end @@ -1204,7 +1204,7 @@ contains end block else #endif - $:PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do j = 1, v_size do q = is3_weno%beg, is3_weno%end do l = is2_weno%beg, is2_weno%end @@ -1267,7 +1267,7 @@ contains real(wp), parameter :: alpha_mp = 2._wp real(wp), parameter :: beta_mp = 4._wp/3._wp - $:PARALLEL_LOOP(collapse=4,private=["d"]) + $:GPU_PARALLEL_LOOP(collapse=4,private=["d"]) do l = is3_weno%beg, is3_weno%end do k = is2_weno%beg, is2_weno%end do j = is1_weno%beg, is1_weno%end From 444c4d6fd37f5a633fe87752476d83ee0a595d77 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Thu, 12 Jun 2025 19:34:14 -0400 Subject: [PATCH 42/75] Replace enter data directives and add cray inline support --- src/common/include/directive_macros.fpp | 17 +++++++++++++-- src/common/m_variables_conversion.fpp | 2 +- src/simulation/m_fftw.fpp | 2 +- src/simulation/m_global_parameters.fpp | 15 ++++++++----- src/simulation/m_ibm.fpp | 4 ++-- src/simulation/m_qbmm.fpp | 2 +- src/simulation/m_rhs.fpp | 28 ++++++++++++------------- src/simulation/m_riemann_solvers.fpp | 2 +- src/simulation/m_viscous.fpp | 2 +- 9 files changed, 46 insertions(+), 28 deletions(-) diff --git a/src/common/include/directive_macros.fpp b/src/common/include/directive_macros.fpp index 0ca82b305c..de4f20cafe 100644 --- a/src/common/include/directive_macros.fpp +++ b/src/common/include/directive_macros.fpp @@ -215,7 +215,8 @@ $:acc_directive #:enddef -#:def GPU_ROUTINE(parallelism=['seq'], nohost=False, extraAccArgs=None) +#:def GPU_ROUTINE(function_name=None, parallelism=['seq'], nohost=False, cray_inline=False, extraAccArgs=None) + #:assert isinstance(cray_inline, bool) #:set parallelism_val = GEN_PARALLELISM_STR(parallelism) #:assert isinstance(nohost, bool) @@ -230,7 +231,19 @@ #:set clause_val = parallelism_val.strip('\n') + nohost_val.strip('\n') #:set acc_directive = '!$acc routine ' + & & clause_val + extraAccArgs_val.strip('\n') - $:acc_directive + #:if cray_inline == True + #:if not isinstance(function_name, str) + #:stop "When inlining for Cray Compiler, function name must be given and given as a string" + #:endif +#ifdef _CRAYFTN + #:set cray_directive = ('!DIR$ INLINEALWAYS ' + function_name).strip('\n') + $:cray_directive +#else + $:acc_directive +#endif + #:else + $:acc_directive + #:endif #:enddef #:def GPU_DECLARE(copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, present=None, deviceptr=None, link=None, extraAccArgs=None) diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp index f7d5e9075a..eb18cdd417 100644 --- a/src/common/m_variables_conversion.fpp +++ b/src/common/m_variables_conversion.fpp @@ -611,7 +611,7 @@ contains integer :: i, j -!$acc enter data copyin(is1b, is1e, is2b, is2e, is3b, is3e) +$:GPU_ENTER_DATA(copyin=["is1b","is1e","is2b","is2e","is3b","is3e"]) #ifdef MFC_SIMULATION @:ALLOCATE(gammas (1:num_fluids)) diff --git a/src/simulation/m_fftw.fpp b/src/simulation/m_fftw.fpp index b0614f880a..e26674edd2 100644 --- a/src/simulation/m_fftw.fpp +++ b/src/simulation/m_fftw.fpp @@ -90,7 +90,7 @@ contains gpu_fft_size(1) = real_size; iembed(1) = 0 oembed(1) = 0 - !$acc enter data copyin(real_size, cmplx_size, x_size, sys_size, batch_size, Nfq) + $:GPU_ENTER_DATA(copyin=["real_size","cmplx_size","x_size","sys_size","batch_size","Nfq"]) $:GPU_UPDATE(device=["real_size","cmplx_size","x_size","sys_size","batch_size"]) #else ! Allocate input and output DFT data sizes diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index a91b2d49b6..43674e61e0 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -1257,11 +1257,16 @@ contains $:GPU_UPDATE(device=["mhd", "relativity"]) #:endif - !$acc enter data copyin(nb, R0ref, Ca, Web, Re_inv, weight, R0, V0, bubbles_euler, polytropic, polydisperse, qbmm, R0_type, ptil, bubble_model, thermal, poly_sigma) - !$acc enter data copyin(R_n, R_v, phi_vn, phi_nv, Pe_c, Tw, pv, M_n, M_v, k_n, k_v, pb0, mass_n0, mass_v0, Pe_T, Re_trans_T, Re_trans_c, Im_trans_T, Im_trans_c, omegaN, mul0, ss, gamma_v, mu_v, gamma_m, gamma_n, mu_n, gam) - !$acc enter data copyin(dir_idx, dir_flg, dir_idx_tau) - - !$acc enter data copyin(relax, relax_model, palpha_eps,ptgalpha_eps) + $:GPU_ENTER_DATA(copyin=["nb","R0ref","Ca","Web","Re_inv","weight","R0", & + & "V0","bubbles_euler","polytropic","polydisperse","qbmm","R0_type", & + & "ptil","bubble_model","thermal","poly_sigma"]) + $:GPU_ENTER_DATA(copyin=["R_n","R_v","phi_vn","phi_nv","Pe_c","Tw","pv", & + & "M_n","M_v","k_n","k_v","pb0","mass_n0","mass_v0","Pe_T", & + & "Re_trans_T","Re_trans_c","Im_trans_T","Im_trans_c","omegaN", & + & "mul0","ss","gamma_v","mu_v","gamma_m","gamma_n","mu_n","gam"]) + $:GPU_ENTER_DATA(copyin=["dir_idx","dir_flg","dir_idx_tau"]) + + $:GPU_ENTER_DATA(copyin=["relax","relax_model","palpha_eps","ptgalpha_eps"]) ! Allocating grid variables for the x-, y- and z-directions @:ALLOCATE(x_cb(-1 - buff_size:m + buff_size)) diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index bc9a9e401e..b1d47a65e3 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -71,7 +71,7 @@ contains @:ACC_SETUP_SFs(levelset) @:ACC_SETUP_SFs(levelset_norm) - !$acc enter data copyin(num_gps, num_inner_gps) + $:GPU_ENTER_DATA(copyin=["num_gps","num_inner_gps"]) end subroutine s_initialize_ibm_module @@ -96,7 +96,7 @@ contains @:ALLOCATE(ghost_points(1:num_gps)) @:ALLOCATE(inner_points(1:num_inner_gps)) - !$acc enter data copyin(ghost_points, inner_points) + $:GPU_ENTER_DATA(copyin=["ghost_points","inner_points"]) call s_find_ghost_points(ghost_points, inner_points) $:GPU_UPDATE(device=["ghost_points", "inner_points"]) diff --git a/src/simulation/m_qbmm.fpp b/src/simulation/m_qbmm.fpp index 86ef374e1c..08476068b2 100644 --- a/src/simulation/m_qbmm.fpp +++ b/src/simulation/m_qbmm.fpp @@ -58,7 +58,7 @@ contains nterms = 7 end if - !$acc enter data copyin(nterms) + $:GPU_ENTER_DATA(copyin=["nterms"]) $:GPU_UPDATE(device=["nterms"]) #:endif diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index d87ac93811..dfba8ef638 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -174,7 +174,7 @@ contains integer :: num_eqns_after_adv - !$acc enter data copyin(idwbuff, idwbuff) + $:GPU_ENTER_DATA(copyin=["idwbuff","idwbuff"]) $:GPU_UPDATE(device=["idwbuff", "idwbuff"]) @:ALLOCATE(q_cons_qp%vf(1:sys_size)) @@ -202,29 +202,29 @@ contains @:ALLOCATE(q_prim_qp%vf(l)%sf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, idwbuff(3)%beg:idwbuff(3)%end)) else q_prim_qp%vf(l)%sf => q_cons_qp%vf(l)%sf - !$acc enter data copyin(q_prim_qp%vf(l)%sf) - !$acc enter data attach(q_prim_qp%vf(l)%sf) + $:GPU_ENTER_DATA(copyin=["q_prim_qp%vf(l)%sf"]) + $:GPU_ENTER_DATA(attach=["q_prim_qp%vf(l)%sf"]) end if end do do l = adv_idx%beg, adv_idx%end q_prim_qp%vf(l)%sf => q_cons_qp%vf(l)%sf - !$acc enter data copyin(q_prim_qp%vf(l)%sf) - !$acc enter data attach(q_prim_qp%vf(l)%sf) + $:GPU_ENTER_DATA(copyin=["q_prim_qp%vf(l)%sf"]) + $:GPU_ENTER_DATA(attach=["q_prim_qp%vf(l)%sf"]) end do if (surface_tension) then q_prim_qp%vf(c_idx)%sf => & q_cons_qp%vf(c_idx)%sf - !$acc enter data copyin(q_prim_qp%vf(c_idx)%sf) - !$acc enter data attach(q_prim_qp%vf(c_idx)%sf) + $:GPU_ENTER_DATA(copyin=["q_prim_qp%vf(c_idx)%sf"]) + $:GPU_ENTER_DATA(attach=["q_prim_qp%vf(c_idx)%sf"]) end if if (cont_damage) then q_prim_qp%vf(damage_idx)%sf => & q_cons_qp%vf(damage_idx)%sf - !$acc enter data copyin(q_prim_qp%vf(damage_idx)%sf) - !$acc enter data attach(q_prim_qp%vf(damage_idx)%sf) + $:GPU_ENTER_DATA(copyin=["q_prim_qp%vf(damage_idx)%sf"]) + $:GPU_ENTER_DATA(attach=["q_prim_qp%vf(damage_idx)%sf"]) end if if (viscous) then @@ -549,14 +549,14 @@ contains if (riemann_solver /= 1 .and. riemann_solver /= 4) then do l = adv_idx%beg + 1, adv_idx%end flux_src_n(i)%vf(l)%sf => flux_src_n(i)%vf(adv_idx%beg)%sf - !$acc enter data attach(flux_src_n(i)%vf(l)%sf) + $:GPU_ENTER_DATA(attach=["flux_src_n(i)%vf(l)%sf"]) end do end if else do l = 1, sys_size flux_n(i)%vf(l)%sf => flux_n(1)%vf(l)%sf flux_src_n(i)%vf(l)%sf => flux_src_n(1)%vf(l)%sf - !$acc enter data attach(flux_n(i)%vf(l)%sf,flux_src_n(i)%vf(l)%sf) + $:GPU_ENTER_DATA(attach=["flux_n(i)%vf(l)%sf","flux_src_n(i)%vf(l)%sf"]) end do end if end do @@ -1788,13 +1788,13 @@ contains @:DEALLOCATE(q_cons_qp%vf(j)%sf) @:DEALLOCATE(q_prim_qp%vf(j)%sf) else - !$acc exit data detach(q_prim_qp%vf(j)%sf) + $:GPU_EXIT_DATA(detach=["q_prim_qp%vf(j)%sf"]) nullify (q_prim_qp%vf(j)%sf) end if end do do j = adv_idx%beg, adv_idx%end - !$acc exit data detach(q_prim_qp%vf(j)%sf) + $:GPU_EXIT_DATA(detach=["q_prim_qp%vf(j)%sf"]) nullify (q_prim_qp%vf(j)%sf) end do @@ -1827,7 +1827,7 @@ contains end if if (mpp_lim .and. bubbles_euler) then - !$acc exit data delete(alf_sum%sf) + $:GPU_EXIT_DATA(delete=["alf_sum%sf"]) deallocate (alf_sum%sf) end if diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp index d81659c2b4..1140834845 100644 --- a/src/simulation/m_riemann_solvers.fpp +++ b/src/simulation/m_riemann_solvers.fpp @@ -3338,7 +3338,7 @@ contains $:GPU_UPDATE(device=["Res","Re_idx","Re_size"]) end if - !$acc enter data copyin(is1, is2, is3, isx, isy, isz) + $:GPU_ENTER_DATA(copyin=["is1","is2","is3","isx","isy","isz"]) is1%beg = -1; is2%beg = 0; is3%beg = 0 is1%end = m; is2%end = n; is3%end = p diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp index ba41303c22..d2c6fedbdd 100644 --- a/src/simulation/m_viscous.fpp +++ b/src/simulation/m_viscous.fpp @@ -44,7 +44,7 @@ contains end do end do $:GPU_UPDATE(device=["Res_viscous","Re_idx","Re_size"]) - !$acc enter data copyin(is1_viscous, is2_viscous, is3_viscous, iv) + $:GPU_ENTER_DATA(copyin=["is1_viscous","is2_viscous","is3_viscous","iv"]) end subroutine s_initialize_viscous_module From 593ed0f2597c582f10a383d67a384ccf533cd464 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Thu, 12 Jun 2025 19:45:01 -0400 Subject: [PATCH 43/75] Renamed gpu macros file to parallel_macros.fpp --- .../include/{directive_macros.fpp => parallel_macros.fpp} | 0 src/common/m_boundary_common.fpp | 2 +- src/common/m_chemistry.fpp | 2 +- src/common/m_finite_differences.fpp | 2 +- src/common/m_helper.fpp | 2 +- src/common/m_helper_basic.fpp | 2 +- src/common/m_mpi_common.fpp | 2 +- src/common/m_phase_change.fpp | 2 +- src/common/m_variables_conversion.fpp | 2 +- src/pre_process/m_assign_variables.fpp | 2 +- src/pre_process/m_patches.fpp | 2 +- src/simulation/m_acoustic_src.fpp | 2 +- src/simulation/m_body_forces.fpp | 2 +- src/simulation/m_boundary_conditions.fpp | 2 +- src/simulation/m_bubbles.fpp | 2 +- src/simulation/m_bubbles_EE.fpp | 2 +- src/simulation/m_bubbles_EL.fpp | 2 +- src/simulation/m_bubbles_EL_kernels.fpp | 2 +- src/simulation/m_cbc.fpp | 2 +- src/simulation/m_compute_cbc.fpp | 2 +- src/simulation/m_data_output.fpp | 2 +- src/simulation/m_fftw.fpp | 2 +- src/simulation/m_global_parameters.fpp | 2 +- src/simulation/m_hyperelastic.fpp | 2 +- src/simulation/m_hypoelastic.fpp | 2 +- src/simulation/m_ibm.fpp | 2 +- src/simulation/m_mhd.fpp | 2 +- src/simulation/m_mpi_proxy.fpp | 2 +- src/simulation/m_qbmm.fpp | 2 +- src/simulation/m_rhs.fpp | 2 +- src/simulation/m_riemann_solvers.fpp | 2 +- src/simulation/m_sim_helpers.fpp | 2 +- src/simulation/m_start_up.fpp | 2 +- src/simulation/m_surface_tension.fpp | 2 +- src/simulation/m_time_steppers.fpp | 2 +- src/simulation/m_viscous.fpp | 2 +- src/simulation/m_weno.fpp | 2 +- 37 files changed, 36 insertions(+), 36 deletions(-) rename src/common/include/{directive_macros.fpp => parallel_macros.fpp} (100%) diff --git a/src/common/include/directive_macros.fpp b/src/common/include/parallel_macros.fpp similarity index 100% rename from src/common/include/directive_macros.fpp rename to src/common/include/parallel_macros.fpp diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index 5f9f8fa16e..ad0f2d81ff 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -6,7 +6,7 @@ !! boundary condiitons #:include 'macros.fpp' -#:include 'directive_macros.fpp' +#:include 'parallel_macros.fpp' module m_boundary_common diff --git a/src/common/m_chemistry.fpp b/src/common/m_chemistry.fpp index 4984a2248f..a094fe4651 100644 --- a/src/common/m_chemistry.fpp +++ b/src/common/m_chemistry.fpp @@ -4,7 +4,7 @@ !! @author Henry Le Berre #:include 'macros.fpp' -#:include 'directive_macros.fpp' +#:include 'parallel_macros.fpp' #:include 'case.fpp' module m_chemistry diff --git a/src/common/m_finite_differences.fpp b/src/common/m_finite_differences.fpp index 8119c1257a..7b7ca73029 100644 --- a/src/common/m_finite_differences.fpp +++ b/src/common/m_finite_differences.fpp @@ -1,5 +1,5 @@ #:include 'macros.fpp' -#:include 'directive_macros.fpp' +#:include 'parallel_macros.fpp' module m_finite_differences diff --git a/src/common/m_helper.fpp b/src/common/m_helper.fpp index cbcabc0a3f..83ac14675d 100644 --- a/src/common/m_helper.fpp +++ b/src/common/m_helper.fpp @@ -1,5 +1,5 @@ #:include 'macros.fpp' -#:include 'directive_macros.fpp' +#:include 'parallel_macros.fpp' !> !! @file m_helper.f90 !! @brief Contains module m_helper diff --git a/src/common/m_helper_basic.fpp b/src/common/m_helper_basic.fpp index b6afc36416..9b2d4007af 100644 --- a/src/common/m_helper_basic.fpp +++ b/src/common/m_helper_basic.fpp @@ -1,4 +1,4 @@ -#:include 'directive_macros.fpp' +#:include 'parallel_macros.fpp' !> !! @file m_helper_basic.f90 !! @brief Contains module m_helper_basic diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index 6ea2bc60f2..b7a5c2368c 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -1,6 +1,6 @@ #:include 'macros.fpp' -#:include 'directive_macros.fpp' +#:include 'parallel_macros.fpp' !> @brief The module serves as a proxy to the parameters and subroutines !! available in the MPI implementation's MPI module. Specifically, diff --git a/src/common/m_phase_change.fpp b/src/common/m_phase_change.fpp index 15f11be298..baf4a7601c 100644 --- a/src/common/m_phase_change.fpp +++ b/src/common/m_phase_change.fpp @@ -2,7 +2,7 @@ !> procedure. #:include 'macros.fpp' -#:include 'directive_macros.fpp' +#:include 'parallel_macros.fpp' module m_phase_change diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp index eb18cdd417..8db15cc5c9 100644 --- a/src/common/m_variables_conversion.fpp +++ b/src/common/m_variables_conversion.fpp @@ -3,7 +3,7 @@ !! @brief Contains module m_variables_conversion #:include 'macros.fpp' -#:include 'directive_macros.fpp' +#:include 'parallel_macros.fpp' #:include 'case.fpp' !> @brief This module consists of subroutines used in the conversion of the diff --git a/src/pre_process/m_assign_variables.fpp b/src/pre_process/m_assign_variables.fpp index 0f77df808d..fe1a089fbb 100644 --- a/src/pre_process/m_assign_variables.fpp +++ b/src/pre_process/m_assign_variables.fpp @@ -3,7 +3,7 @@ !! @brief Contains module m_assign_variables #:include 'case.fpp' -#:include 'directive_macros.fpp' +#:include 'parallel_macros.fpp' module m_assign_variables diff --git a/src/pre_process/m_patches.fpp b/src/pre_process/m_patches.fpp index 2dfefe6623..a9950a2ab9 100644 --- a/src/pre_process/m_patches.fpp +++ b/src/pre_process/m_patches.fpp @@ -7,7 +7,7 @@ #:include '2dHardcodedIC.fpp' #:include '3dHardcodedIC.fpp' #:include 'macros.fpp' -#:include 'directive_macros.fpp' +#:include 'parallel_macros.fpp' module m_patches diff --git a/src/simulation/m_acoustic_src.fpp b/src/simulation/m_acoustic_src.fpp index 3e1dea3e5e..3b6d1f6ad2 100644 --- a/src/simulation/m_acoustic_src.fpp +++ b/src/simulation/m_acoustic_src.fpp @@ -3,7 +3,7 @@ !! @brief Contains module m_acoustic_src #:include 'macros.fpp' -#:include 'directive_macros.fpp' +#:include 'parallel_macros.fpp' !> @brief The module contains the subroutines used to create a acoustic source pressure source term module m_acoustic_src diff --git a/src/simulation/m_body_forces.fpp b/src/simulation/m_body_forces.fpp index d761141a0d..176c5e6fe9 100644 --- a/src/simulation/m_body_forces.fpp +++ b/src/simulation/m_body_forces.fpp @@ -1,5 +1,5 @@ #:include 'macros.fpp' -#:include 'directive_macros.fpp' +#:include 'parallel_macros.fpp' module m_body_forces diff --git a/src/simulation/m_boundary_conditions.fpp b/src/simulation/m_boundary_conditions.fpp index f7408c43de..825fff5daf 100644 --- a/src/simulation/m_boundary_conditions.fpp +++ b/src/simulation/m_boundary_conditions.fpp @@ -2,7 +2,7 @@ ! @brief Contains module m_boundary_conditions !> @brief This module contains -#:include 'directive_macros.fpp' +#:include 'parallel_macros.fpp' module m_boundary_conditions use m_derived_types diff --git a/src/simulation/m_bubbles.fpp b/src/simulation/m_bubbles.fpp index 3d9341ef0a..c25db8b892 100644 --- a/src/simulation/m_bubbles.fpp +++ b/src/simulation/m_bubbles.fpp @@ -3,7 +3,7 @@ !! @brief Contains module m_bubbles #:include 'macros.fpp' -#:include 'directive_macros.fpp' +#:include 'parallel_macros.fpp' !> @brief This module contains the procedures shared by the ensemble-averaged and volume-averaged bubble models. module m_bubbles diff --git a/src/simulation/m_bubbles_EE.fpp b/src/simulation/m_bubbles_EE.fpp index c522af5e27..fc1bb62580 100644 --- a/src/simulation/m_bubbles_EE.fpp +++ b/src/simulation/m_bubbles_EE.fpp @@ -3,7 +3,7 @@ !! @brief Contains module m_bubbles_EE #:include 'macros.fpp' -#:include 'directive_macros.fpp' +#:include 'parallel_macros.fpp' !> @brief This module is used to compute the ensemble-averaged bubble dynamic variables module m_bubbles_EE diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index 3240a9d9a8..4ab4a904da 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -3,7 +3,7 @@ !! @brief Contains module m_bubbles_EL #:include 'macros.fpp' -#:include 'directive_macros.fpp' +#:include 'parallel_macros.fpp' !> @brief This module is used to to compute the volume-averaged bubble model module m_bubbles_EL diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp index aa660b2ed0..f6f9c70d4f 100644 --- a/src/simulation/m_bubbles_EL_kernels.fpp +++ b/src/simulation/m_bubbles_EL_kernels.fpp @@ -3,7 +3,7 @@ !! @brief Contains module m_bubbles_EL_kernels #:include 'macros.fpp' -#:include 'directive_macros.fpp' +#:include 'parallel_macros.fpp' !> @brief This module contains kernel functions used to map the effect of the lagrangian bubbles !! in the Eulerian framework. diff --git a/src/simulation/m_cbc.fpp b/src/simulation/m_cbc.fpp index 3c587d9735..27a918f7cd 100644 --- a/src/simulation/m_cbc.fpp +++ b/src/simulation/m_cbc.fpp @@ -19,7 +19,7 @@ !! Please refer to Thompson (1987, 1990) for detailed descriptions. #:include 'macros.fpp' -#:include 'directive_macros.fpp' +#:include 'parallel_macros.fpp' module m_cbc diff --git a/src/simulation/m_compute_cbc.fpp b/src/simulation/m_compute_cbc.fpp index d397d19cf5..51e404777e 100644 --- a/src/simulation/m_compute_cbc.fpp +++ b/src/simulation/m_compute_cbc.fpp @@ -1,4 +1,4 @@ -#:include 'directive_macros.fpp' +#:include 'parallel_macros.fpp' !> !! @file m_compute_cbc.f90 !! @brief CBC computation module diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp index b1922c08fa..34965ea09e 100644 --- a/src/simulation/m_data_output.fpp +++ b/src/simulation/m_data_output.fpp @@ -3,7 +3,7 @@ !! @brief Contains module m_data_output #:include 'macros.fpp' -#:include 'directive_macros.fpp' +#:include 'parallel_macros.fpp' !> @brief The primary purpose of this module is to output the grid and the !! conservative variables data at the chosen time-step interval. In diff --git a/src/simulation/m_fftw.fpp b/src/simulation/m_fftw.fpp index e26674edd2..24b8bb6188 100644 --- a/src/simulation/m_fftw.fpp +++ b/src/simulation/m_fftw.fpp @@ -3,7 +3,7 @@ !! @brief Contains module m_fftw #:include 'macros.fpp' -#:include 'directive_macros.fpp' +#:include 'parallel_macros.fpp' !> @brief The module contains the subroutines for the FFT routines module m_fftw diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index 43674e61e0..576d18ad15 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -4,7 +4,7 @@ #:include 'case.fpp' #:include 'macros.fpp' -#:include 'directive_macros.fpp' +#:include 'parallel_macros.fpp' !> @brief The module contains all of the parameters describing the program !! logistics, the computational domain and the simulation algorithm. diff --git a/src/simulation/m_hyperelastic.fpp b/src/simulation/m_hyperelastic.fpp index dd232a7cd8..a00a7c808e 100644 --- a/src/simulation/m_hyperelastic.fpp +++ b/src/simulation/m_hyperelastic.fpp @@ -3,7 +3,7 @@ !! @brief Contains module m_hyperelastic #:include 'macros.fpp' -#:include 'directive_macros.fpp' +#:include 'parallel_macros.fpp' !> @brief This module consists of subroutines used in the calculation !! of the cauchy tensor diff --git a/src/simulation/m_hypoelastic.fpp b/src/simulation/m_hypoelastic.fpp index 73f6383f69..e4105dac8c 100644 --- a/src/simulation/m_hypoelastic.fpp +++ b/src/simulation/m_hypoelastic.fpp @@ -3,7 +3,7 @@ !! @brief Contains module m_hypoelastic #:include 'macros.fpp' -#:include 'directive_macros.fpp' +#:include 'parallel_macros.fpp' !> @brief This module is used to compute source terms for hypoelastic model module m_hypoelastic diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index b1d47a65e3..10a9a25349 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -3,7 +3,7 @@ !! @brief Contains module m_ibm #:include 'macros.fpp' -#:include 'directive_macros.fpp' +#:include 'parallel_macros.fpp' !> @brief This module is used to handle all operations related to immersed !! boundary methods (IBMs) diff --git a/src/simulation/m_mhd.fpp b/src/simulation/m_mhd.fpp index 88f8621dbd..669363305d 100644 --- a/src/simulation/m_mhd.fpp +++ b/src/simulation/m_mhd.fpp @@ -3,7 +3,7 @@ !! @brief Contains module m_mhd #:include 'macros.fpp' -#:include 'directive_macros.fpp' +#:include 'parallel_macros.fpp' !> @brief This module is used to compute source terms for magnetohydrodynamics !! Note: not applicable for 1D diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index c8e943e8d9..d87c5df02c 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -4,7 +4,7 @@ #:include 'case.fpp' #:include 'macros.fpp' -#:include 'directive_macros.fpp' +#:include 'parallel_macros.fpp' !> @brief The module serves as a proxy to the parameters and subroutines !! available in the MPI implementation's MPI module. Specifically, diff --git a/src/simulation/m_qbmm.fpp b/src/simulation/m_qbmm.fpp index 08476068b2..3521a71c00 100644 --- a/src/simulation/m_qbmm.fpp +++ b/src/simulation/m_qbmm.fpp @@ -3,7 +3,7 @@ !! @brief Contains module m_qbmm #:include 'case.fpp' -#:include 'directive_macros.fpp' +#:include 'parallel_macros.fpp' #:include 'macros.fpp' !> @brief This module is used to compute moment inversion via qbmm diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index dfba8ef638..7f73510a31 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -4,7 +4,7 @@ #:include 'case.fpp' #:include 'macros.fpp' -#:include 'directive_macros.fpp' +#:include 'parallel_macros.fpp' !> @brief The module contains the subroutines used to calculate the right- !! hane-side (RHS) in the quasi-conservative, shock- and interface- diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp index 1140834845..706bb37730 100644 --- a/src/simulation/m_riemann_solvers.fpp +++ b/src/simulation/m_riemann_solvers.fpp @@ -21,7 +21,7 @@ #:include 'case.fpp' #:include 'macros.fpp' -#:include 'directive_macros.fpp' +#:include 'parallel_macros.fpp' #:include 'inline_riemann.fpp' module m_riemann_solvers diff --git a/src/simulation/m_sim_helpers.fpp b/src/simulation/m_sim_helpers.fpp index 2ff31ea283..da0aa370cb 100644 --- a/src/simulation/m_sim_helpers.fpp +++ b/src/simulation/m_sim_helpers.fpp @@ -1,4 +1,4 @@ -#:include 'directive_macros.fpp' +#:include 'parallel_macros.fpp' module m_sim_helpers use m_derived_types !< Definitions of the derived types diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index 981c9f8020..a8134173ca 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -3,7 +3,7 @@ !! @brief Contains module m_start_up #:include 'case.fpp' -#:include 'directive_macros.fpp' +#:include 'parallel_macros.fpp' !> @brief The purpose of the module is primarily to read in the files that !! contain the inputs, the initial condition data and the grid data diff --git a/src/simulation/m_surface_tension.fpp b/src/simulation/m_surface_tension.fpp index 4433fc9926..a29065c6da 100644 --- a/src/simulation/m_surface_tension.fpp +++ b/src/simulation/m_surface_tension.fpp @@ -1,5 +1,5 @@ #:include 'macros.fpp' -#:include 'directive_macros.fpp' +#:include 'parallel_macros.fpp' #:include 'inline_capillary.fpp' !> @brief This module is used to compute source terms for surface tension model diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 4d9e28e6a5..344b27d2c7 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -3,7 +3,7 @@ !! @brief Contains module m_time_steppers #:include 'macros.fpp' -#:include 'directive_macros.fpp' +#:include 'parallel_macros.fpp' !> @brief The following module features a variety of time-stepping schemes. !! Currently, it includes the following Runge-Kutta (RK) algorithms: diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp index d2c6fedbdd..77f939fb8b 100644 --- a/src/simulation/m_viscous.fpp +++ b/src/simulation/m_viscous.fpp @@ -2,7 +2,7 @@ !! @file m_viscous.f90 !! @brief Contains module m_viscous #:include 'macros.fpp' -#:include 'directive_macros.fpp' +#:include 'parallel_macros.fpp' !> @brief The module contains the subroutines used to compute viscous terms. module m_viscous diff --git a/src/simulation/m_weno.fpp b/src/simulation/m_weno.fpp index 098a2b1836..7ff75df03d 100644 --- a/src/simulation/m_weno.fpp +++ b/src/simulation/m_weno.fpp @@ -2,7 +2,7 @@ !! @file m_weno.f90 !! @brief Contains module m_weno #:include 'macros.fpp' -#:include 'directive_macros.fpp' +#:include 'parallel_macros.fpp' !> @brief Weighted essentially non-oscillatory (WENO) reconstruction scheme !! that is supplemented with monotonicity preserving bounds (MPWENO) From 7d378bee2a0fe272fb8c0709bc915c392b9e4654 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Thu, 12 Jun 2025 20:10:04 -0400 Subject: [PATCH 44/75] Ran formatter --- src/common/m_variables_conversion.fpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp index 8db15cc5c9..93b1c3e435 100644 --- a/src/common/m_variables_conversion.fpp +++ b/src/common/m_variables_conversion.fpp @@ -611,7 +611,7 @@ contains integer :: i, j -$:GPU_ENTER_DATA(copyin=["is1b","is1e","is2b","is2e","is3b","is3e"]) + $:GPU_ENTER_DATA(copyin=["is1b","is1e","is2b","is2e","is3b","is3e"]) #ifdef MFC_SIMULATION @:ALLOCATE(gammas (1:num_fluids)) From a8950143d892e54d779b025f5235375d5f93d937 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Thu, 12 Jun 2025 20:29:34 -0400 Subject: [PATCH 45/75] Undo changes mpi_common as will be refactored in future pull request --- src/common/m_mpi_common.fpp | 69 ++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 35 deletions(-) diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index b7a5c2368c..609382d8a2 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -1,6 +1,5 @@ #:include 'macros.fpp' -#:include 'parallel_macros.fpp' !> @brief The module serves as a proxy to the parameters and subroutines !! available in the MPI implementation's MPI module. Specifically, @@ -26,7 +25,7 @@ module m_mpi_common implicit none integer, private :: err_code, ierr, v_size !< - $:GPU_DECLARE(create=["v_size"]) + !$acc declare create(v_size) !! Generic flags used to identify and report MPI errors real(wp), private, allocatable, dimension(:), target :: buff_send !< @@ -39,10 +38,10 @@ module m_mpi_common !! average primitive variables, for a single computational domain boundary !! at the time, from the relevant neighboring processor. - $:GPU_DECLARE(create=["buff_send","buff_recv"]) + !$acc declare create(buff_send, buff_recv) integer :: halo_size, nVars - $:GPU_DECLARE(create=["halo_size","nVars"]) + !$acc declare create(halo_size, nVars) contains @@ -639,7 +638,7 @@ contains #ifdef MFC_MPI call nvtxStartRange("RHS-COMM-PACKBUF") - $:GPU_UPDATE(device=["v_size"]) +!$acc update device(v_size) #ifdef MFC_SIMULATION if (qbmm .and. .not. polytropic) then @@ -693,7 +692,7 @@ contains #:for mpi_dir in [1, 2, 3] if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 - $:GPU_PARALLEL_LOOP(collapse=4,private=["r"]) + !$acc parallel loop collapse(4) gang vector default(present) private(r) do l = 0, p do k = 0, n do j = 0, buff_size - 1 @@ -707,7 +706,7 @@ contains #ifdef MFC_SIMULATION if (qbmm .and. .not. polytropic) then - $:GPU_PARALLEL_LOOP(collapse=4,private=["r"]) + !$acc parallel loop collapse(4) gang vector default(present) private(r) do l = 0, p do k = 0, n do j = 0, buff_size - 1 @@ -722,7 +721,7 @@ contains end do end do - $:GPU_PARALLEL_LOOP(collapse=5,private=["r"]) + !$acc parallel loop collapse(5) gang vector default(present) private(r) do l = 0, p do k = 0, n do j = 0, buff_size - 1 @@ -739,7 +738,7 @@ contains end if #endif #:elif mpi_dir == 2 - $:GPU_PARALLEL_LOOP(collapse=4,private=["r"]) + !$acc parallel loop collapse(4) gang vector default(present) private(r) do i = 1, sys_size do l = 0, p do k = 0, buff_size - 1 @@ -755,7 +754,7 @@ contains #ifdef MFC_SIMULATION if (qbmm .and. .not. polytropic) then - $:GPU_PARALLEL_LOOP(collapse=5,private=["r"]) + !$acc parallel loop collapse(5) gang vector default(present) private(r) do i = sys_size + 1, sys_size + 4 do l = 0, p do k = 0, buff_size - 1 @@ -771,7 +770,7 @@ contains end do end do - $:GPU_PARALLEL_LOOP(collapse=5,private=["r"]) + !$acc parallel loop collapse(5) gang vector default(present) private(r) do i = sys_size + 1, sys_size + 4 do l = 0, p do k = 0, buff_size - 1 @@ -789,7 +788,7 @@ contains end if #endif #:else - $:GPU_PARALLEL_LOOP(collapse=4,private=["r"]) + !$acc parallel loop collapse(4) gang vector default(present) private(r) do i = 1, sys_size do l = 0, buff_size - 1 do k = -buff_size, n + buff_size @@ -805,7 +804,7 @@ contains #ifdef MFC_SIMULATION if (qbmm .and. .not. polytropic) then - $:GPU_PARALLEL_LOOP(collapse=5,private=["r"]) + !$acc parallel loop collapse(5) gang vector default(present) private(r) do i = sys_size + 1, sys_size + 4 do l = 0, buff_size - 1 do k = -buff_size, n + buff_size @@ -821,7 +820,7 @@ contains end do end do - $:GPU_PARALLEL_LOOP(collapse=5,private=["r"]) + !$acc parallel loop collapse(5) gang vector default(present) private(r) do i = sys_size + 1, sys_size + 4 do l = 0, buff_size - 1 do k = -buff_size, n + buff_size @@ -856,7 +855,7 @@ contains call nvtxStartRange("RHS-COMM-SENDRECV-RDMA") #:else call nvtxStartRange("RHS-COMM-DEV2HOST") - $:GPU_UPDATE(host=["buff_send"]) + !$acc update host(buff_send) call nvtxEndRange call nvtxStartRange("RHS-COMM-SENDRECV-NO-RMDA") #:endif @@ -874,7 +873,7 @@ contains !$acc wait #:else call nvtxStartRange("RHS-COMM-HOST2DEV") - $:GPU_UPDATE(device=["buff_recv"]) + !$acc update device(buff_recv) call nvtxEndRange #:endif end if @@ -891,7 +890,7 @@ contains #:for mpi_dir in [1, 2, 3] if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 - $:GPU_PARALLEL_LOOP(collapse=4,private=["r"]) + !$acc parallel loop collapse(4) gang vector default(present) private(r) do l = 0, p do k = 0, n do j = -buff_size, -1 @@ -912,7 +911,7 @@ contains #ifdef MFC_SIMULATION if (qbmm .and. .not. polytropic) then - $:GPU_PARALLEL_LOOP(collapse=5,private=["r"]) + !$acc parallel loop collapse(5) gang vector default(present) private(r) do l = 0, p do k = 0, n do j = -buff_size, -1 @@ -927,7 +926,7 @@ contains end do end do - $:GPU_PARALLEL_LOOP(collapse=5,private=["r"]) + !$acc parallel loop collapse(5) gang vector default(present) private(r) do l = 0, p do k = 0, n do j = -buff_size, -1 @@ -944,7 +943,7 @@ contains end if #endif #:elif mpi_dir == 2 - $:GPU_PARALLEL_LOOP(collapse=4,private=["r"]) + !$acc parallel loop collapse(4) gang vector default(present) private(r) do i = 1, sys_size do l = 0, p do k = -buff_size, -1 @@ -966,7 +965,7 @@ contains #ifdef MFC_SIMULATION if (qbmm .and. .not. polytropic) then - $:GPU_PARALLEL_LOOP(collapse=5,private=["r"]) + !$acc parallel loop collapse(5) gang vector default(present) private(r) do i = sys_size + 1, sys_size + 4 do l = 0, p do k = -buff_size, -1 @@ -982,7 +981,7 @@ contains end do end do - $:GPU_PARALLEL_LOOP(collapse=5,private=["r"]) + !$acc parallel loop collapse(5) gang vector default(present) private(r) do i = sys_size + 1, sys_size + 4 do l = 0, p do k = -buff_size, -1 @@ -1001,7 +1000,7 @@ contains #endif #:else ! Unpacking buffer from bc_z%beg - $:GPU_PARALLEL_LOOP(collapse=4,private=["r"]) + !$acc parallel loop collapse(4) gang vector default(present) private(r) do i = 1, sys_size do l = -buff_size, -1 do k = -buff_size, n + buff_size @@ -1024,7 +1023,7 @@ contains #ifdef MFC_SIMULATION if (qbmm .and. .not. polytropic) then - $:GPU_PARALLEL_LOOP(collapse=5,private=["r"]) + !$acc parallel loop collapse(5) gang vector default(present) private(r) do i = sys_size + 1, sys_size + 4 do l = -buff_size, -1 do k = -buff_size, n + buff_size @@ -1041,7 +1040,7 @@ contains end do end do - $:GPU_PARALLEL_LOOP(collapse=5,private=["r"]) + !$acc parallel loop collapse(5) gang vector default(present) private(r) do i = sys_size + 1, sys_size + 4 do l = -buff_size, -1 do k = -buff_size, n + buff_size @@ -1089,7 +1088,7 @@ contains #ifdef MFC_MPI nVars = num_dims + 1 - $:GPU_UPDATE(device=["nVars"]) + !$acc update device(nVars) buffer_counts = (/ & buff_size*nVars*(n + 1)*(p + 1), & @@ -1131,7 +1130,7 @@ contains #:for mpi_dir in [1, 2, 3] if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 - $:GPU_PARALLEL_LOOP(collapse=4,private=["r"]) + !$acc parallel loop collapse(4) gang vector default(present) private(r) do l = 0, p do k = 0, n do j = 0, buff_size - 1 @@ -1144,7 +1143,7 @@ contains end do #:elif mpi_dir == 2 - $:GPU_PARALLEL_LOOP(collapse=4,private=["r"]) + !$acc parallel loop collapse(4) gang vector default(present) private(r) do i = 1, nVars do l = 0, p do k = 0, buff_size - 1 @@ -1159,7 +1158,7 @@ contains end do #:else - $:GPU_PARALLEL_LOOP(collapse=4,private=["r"]) + !$acc parallel loop collapse(4) gang vector default(present) private(r) do i = 1, nVars do l = 0, buff_size - 1 do k = -buff_size, n + buff_size @@ -1187,7 +1186,7 @@ contains call nvtxStartRange("RHS-COMM-SENDRECV-RDMA") #:else call nvtxStartRange("RHS-COMM-DEV2HOST") - $:GPU_UPDATE(host=["buff_send"]) + !$acc update host(buff_send) call nvtxEndRange call nvtxStartRange("RHS-COMM-SENDRECV-NO-RMDA") #:endif @@ -1205,7 +1204,7 @@ contains !$acc wait #:else call nvtxStartRange("RHS-COMM-HOST2DEV") - $:GPU_UPDATE(device=["buff_recv"]) + !$acc update device(buff_recv) call nvtxEndRange #:endif end if @@ -1221,7 +1220,7 @@ contains #:for mpi_dir in [1, 2, 3] if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 - $:GPU_PARALLEL_LOOP(collapse=4,private=["r"]) + !$acc parallel loop collapse(4) gang vector default(present) private(r) do l = 0, p do k = 0, n do j = -buff_size, -1 @@ -1241,7 +1240,7 @@ contains end do #:elif mpi_dir == 2 - $:GPU_PARALLEL_LOOP(collapse=4,private=["r"]) + !$acc parallel loop collapse(4) gang vector default(present) private(r) do i = 1, nVars do l = 0, p do k = -buff_size, -1 @@ -1263,7 +1262,7 @@ contains #:else ! Unpacking buffer from bc_z%beg - $:GPU_PARALLEL_LOOP(collapse=4,private=["r"]) + !$acc parallel loop collapse(4) gang vector default(present) private(r) do i = 1, nVars do l = -buff_size, -1 do k = -buff_size, n + buff_size @@ -1301,4 +1300,4 @@ contains end subroutine s_finalize_mpi_common_module -end module m_mpi_common +end module m_mpi_common \ No newline at end of file From f36163f915be9e20ef5f19cd8f2f596630595bd5 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Thu, 12 Jun 2025 20:32:24 -0400 Subject: [PATCH 46/75] Ran formatter --- src/common/m_mpi_common.fpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index 609382d8a2..b439ce1c6d 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -1300,4 +1300,4 @@ contains end subroutine s_finalize_mpi_common_module -end module m_mpi_common \ No newline at end of file +end module m_mpi_common From 4c9f230e49247be1f8e16c3461a40c8750b25534 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Mon, 16 Jun 2025 11:32:18 -0400 Subject: [PATCH 47/75] Moved parallel_macros include directly into macros.fpp --- src/common/include/macros.fpp | 2 ++ src/common/m_boundary_common.fpp | 1 - src/common/m_chemistry.fpp | 1 - src/common/m_finite_differences.fpp | 1 - src/common/m_helper.fpp | 2 +- src/common/m_helper_basic.fpp | 3 ++- src/common/m_phase_change.fpp | 1 - src/common/m_variables_conversion.fpp | 1 - src/pre_process/m_assign_variables.fpp | 2 +- src/pre_process/m_patches.fpp | 1 - src/simulation/m_acoustic_src.fpp | 1 - src/simulation/m_body_forces.fpp | 1 - src/simulation/m_boundary_conditions.fpp | 3 ++- src/simulation/m_bubbles.fpp | 1 - src/simulation/m_bubbles_EE.fpp | 1 - src/simulation/m_bubbles_EL.fpp | 1 - src/simulation/m_bubbles_EL_kernels.fpp | 1 - src/simulation/m_cbc.fpp | 1 - src/simulation/m_compute_cbc.fpp | 3 ++- src/simulation/m_data_output.fpp | 1 - src/simulation/m_fftw.fpp | 1 - src/simulation/m_global_parameters.fpp | 1 - src/simulation/m_hyperelastic.fpp | 1 - src/simulation/m_hypoelastic.fpp | 1 - src/simulation/m_ibm.fpp | 1 - src/simulation/m_mhd.fpp | 1 - src/simulation/m_mpi_proxy.fpp | 1 - src/simulation/m_qbmm.fpp | 1 - src/simulation/m_rhs.fpp | 1 - src/simulation/m_riemann_solvers.fpp | 1 - src/simulation/m_sim_helpers.fpp | 3 ++- src/simulation/m_start_up.fpp | 2 +- src/simulation/m_surface_tension.fpp | 1 - src/simulation/m_time_steppers.fpp | 1 - src/simulation/m_viscous.fpp | 1 - src/simulation/m_weno.fpp | 1 - 36 files changed, 13 insertions(+), 35 deletions(-) diff --git a/src/common/include/macros.fpp b/src/common/include/macros.fpp index 679c276bf4..3a2bbf0b17 100644 --- a/src/common/include/macros.fpp +++ b/src/common/include/macros.fpp @@ -1,3 +1,5 @@ +#:include 'parallel_macros.fpp' + #:def LOG(expr) #ifdef MFC_DEBUG block diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index ad0f2d81ff..ce5b98a3fb 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -6,7 +6,6 @@ !! boundary condiitons #:include 'macros.fpp' -#:include 'parallel_macros.fpp' module m_boundary_common diff --git a/src/common/m_chemistry.fpp b/src/common/m_chemistry.fpp index a094fe4651..3ff223844f 100644 --- a/src/common/m_chemistry.fpp +++ b/src/common/m_chemistry.fpp @@ -4,7 +4,6 @@ !! @author Henry Le Berre #:include 'macros.fpp' -#:include 'parallel_macros.fpp' #:include 'case.fpp' module m_chemistry diff --git a/src/common/m_finite_differences.fpp b/src/common/m_finite_differences.fpp index 7b7ca73029..c9bf9a4ad2 100644 --- a/src/common/m_finite_differences.fpp +++ b/src/common/m_finite_differences.fpp @@ -1,5 +1,4 @@ #:include 'macros.fpp' -#:include 'parallel_macros.fpp' module m_finite_differences diff --git a/src/common/m_helper.fpp b/src/common/m_helper.fpp index 83ac14675d..44299f56d6 100644 --- a/src/common/m_helper.fpp +++ b/src/common/m_helper.fpp @@ -1,5 +1,5 @@ #:include 'macros.fpp' -#:include 'parallel_macros.fpp' + !> !! @file m_helper.f90 !! @brief Contains module m_helper diff --git a/src/common/m_helper_basic.fpp b/src/common/m_helper_basic.fpp index 9b2d4007af..2ba77f0f15 100644 --- a/src/common/m_helper_basic.fpp +++ b/src/common/m_helper_basic.fpp @@ -1,8 +1,9 @@ -#:include 'parallel_macros.fpp' !> !! @file m_helper_basic.f90 !! @brief Contains module m_helper_basic +#:include 'macros.fpp' + module m_helper_basic use m_derived_types !< Definitions of the derived types diff --git a/src/common/m_phase_change.fpp b/src/common/m_phase_change.fpp index baf4a7601c..0a23205d58 100644 --- a/src/common/m_phase_change.fpp +++ b/src/common/m_phase_change.fpp @@ -2,7 +2,6 @@ !> procedure. #:include 'macros.fpp' -#:include 'parallel_macros.fpp' module m_phase_change diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp index 93b1c3e435..4bc1426153 100644 --- a/src/common/m_variables_conversion.fpp +++ b/src/common/m_variables_conversion.fpp @@ -3,7 +3,6 @@ !! @brief Contains module m_variables_conversion #:include 'macros.fpp' -#:include 'parallel_macros.fpp' #:include 'case.fpp' !> @brief This module consists of subroutines used in the conversion of the diff --git a/src/pre_process/m_assign_variables.fpp b/src/pre_process/m_assign_variables.fpp index fe1a089fbb..0774d43c88 100644 --- a/src/pre_process/m_assign_variables.fpp +++ b/src/pre_process/m_assign_variables.fpp @@ -3,7 +3,7 @@ !! @brief Contains module m_assign_variables #:include 'case.fpp' -#:include 'parallel_macros.fpp' +#:include 'macros.fpp' module m_assign_variables diff --git a/src/pre_process/m_patches.fpp b/src/pre_process/m_patches.fpp index a9950a2ab9..f5245d392a 100644 --- a/src/pre_process/m_patches.fpp +++ b/src/pre_process/m_patches.fpp @@ -7,7 +7,6 @@ #:include '2dHardcodedIC.fpp' #:include '3dHardcodedIC.fpp' #:include 'macros.fpp' -#:include 'parallel_macros.fpp' module m_patches diff --git a/src/simulation/m_acoustic_src.fpp b/src/simulation/m_acoustic_src.fpp index 3b6d1f6ad2..2305aff227 100644 --- a/src/simulation/m_acoustic_src.fpp +++ b/src/simulation/m_acoustic_src.fpp @@ -3,7 +3,6 @@ !! @brief Contains module m_acoustic_src #:include 'macros.fpp' -#:include 'parallel_macros.fpp' !> @brief The module contains the subroutines used to create a acoustic source pressure source term module m_acoustic_src diff --git a/src/simulation/m_body_forces.fpp b/src/simulation/m_body_forces.fpp index 176c5e6fe9..794e7be075 100644 --- a/src/simulation/m_body_forces.fpp +++ b/src/simulation/m_body_forces.fpp @@ -1,5 +1,4 @@ #:include 'macros.fpp' -#:include 'parallel_macros.fpp' module m_body_forces diff --git a/src/simulation/m_boundary_conditions.fpp b/src/simulation/m_boundary_conditions.fpp index 825fff5daf..5589ee23aa 100644 --- a/src/simulation/m_boundary_conditions.fpp +++ b/src/simulation/m_boundary_conditions.fpp @@ -2,7 +2,8 @@ ! @brief Contains module m_boundary_conditions !> @brief This module contains -#:include 'parallel_macros.fpp' +#:include 'macros.fpp' + module m_boundary_conditions use m_derived_types diff --git a/src/simulation/m_bubbles.fpp b/src/simulation/m_bubbles.fpp index 1699cf8251..7cf2107522 100644 --- a/src/simulation/m_bubbles.fpp +++ b/src/simulation/m_bubbles.fpp @@ -3,7 +3,6 @@ !! @brief Contains module m_bubbles #:include 'macros.fpp' -#:include 'parallel_macros.fpp' !> @brief This module contains the procedures shared by the ensemble-averaged and volume-averaged bubble models. module m_bubbles diff --git a/src/simulation/m_bubbles_EE.fpp b/src/simulation/m_bubbles_EE.fpp index fc1bb62580..201a17d6ab 100644 --- a/src/simulation/m_bubbles_EE.fpp +++ b/src/simulation/m_bubbles_EE.fpp @@ -3,7 +3,6 @@ !! @brief Contains module m_bubbles_EE #:include 'macros.fpp' -#:include 'parallel_macros.fpp' !> @brief This module is used to compute the ensemble-averaged bubble dynamic variables module m_bubbles_EE diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index 4ab4a904da..0b9933ef8b 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -3,7 +3,6 @@ !! @brief Contains module m_bubbles_EL #:include 'macros.fpp' -#:include 'parallel_macros.fpp' !> @brief This module is used to to compute the volume-averaged bubble model module m_bubbles_EL diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp index f6f9c70d4f..d51f33589e 100644 --- a/src/simulation/m_bubbles_EL_kernels.fpp +++ b/src/simulation/m_bubbles_EL_kernels.fpp @@ -3,7 +3,6 @@ !! @brief Contains module m_bubbles_EL_kernels #:include 'macros.fpp' -#:include 'parallel_macros.fpp' !> @brief This module contains kernel functions used to map the effect of the lagrangian bubbles !! in the Eulerian framework. diff --git a/src/simulation/m_cbc.fpp b/src/simulation/m_cbc.fpp index 27a918f7cd..07b859e085 100644 --- a/src/simulation/m_cbc.fpp +++ b/src/simulation/m_cbc.fpp @@ -19,7 +19,6 @@ !! Please refer to Thompson (1987, 1990) for detailed descriptions. #:include 'macros.fpp' -#:include 'parallel_macros.fpp' module m_cbc diff --git a/src/simulation/m_compute_cbc.fpp b/src/simulation/m_compute_cbc.fpp index 51e404777e..fbe4b0d6b3 100644 --- a/src/simulation/m_compute_cbc.fpp +++ b/src/simulation/m_compute_cbc.fpp @@ -1,8 +1,9 @@ -#:include 'parallel_macros.fpp' !> !! @file m_compute_cbc.f90 !! @brief CBC computation module +#:include 'macros.fpp' + module m_compute_cbc use m_global_parameters implicit none diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp index 34965ea09e..2cec4cbe38 100644 --- a/src/simulation/m_data_output.fpp +++ b/src/simulation/m_data_output.fpp @@ -3,7 +3,6 @@ !! @brief Contains module m_data_output #:include 'macros.fpp' -#:include 'parallel_macros.fpp' !> @brief The primary purpose of this module is to output the grid and the !! conservative variables data at the chosen time-step interval. In diff --git a/src/simulation/m_fftw.fpp b/src/simulation/m_fftw.fpp index 24b8bb6188..4646f574a1 100644 --- a/src/simulation/m_fftw.fpp +++ b/src/simulation/m_fftw.fpp @@ -3,7 +3,6 @@ !! @brief Contains module m_fftw #:include 'macros.fpp' -#:include 'parallel_macros.fpp' !> @brief The module contains the subroutines for the FFT routines module m_fftw diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index 576d18ad15..b5a174e0e0 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -4,7 +4,6 @@ #:include 'case.fpp' #:include 'macros.fpp' -#:include 'parallel_macros.fpp' !> @brief The module contains all of the parameters describing the program !! logistics, the computational domain and the simulation algorithm. diff --git a/src/simulation/m_hyperelastic.fpp b/src/simulation/m_hyperelastic.fpp index a00a7c808e..1e8b475284 100644 --- a/src/simulation/m_hyperelastic.fpp +++ b/src/simulation/m_hyperelastic.fpp @@ -3,7 +3,6 @@ !! @brief Contains module m_hyperelastic #:include 'macros.fpp' -#:include 'parallel_macros.fpp' !> @brief This module consists of subroutines used in the calculation !! of the cauchy tensor diff --git a/src/simulation/m_hypoelastic.fpp b/src/simulation/m_hypoelastic.fpp index e4105dac8c..b2b5d7a859 100644 --- a/src/simulation/m_hypoelastic.fpp +++ b/src/simulation/m_hypoelastic.fpp @@ -3,7 +3,6 @@ !! @brief Contains module m_hypoelastic #:include 'macros.fpp' -#:include 'parallel_macros.fpp' !> @brief This module is used to compute source terms for hypoelastic model module m_hypoelastic diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 10a9a25349..a435d17454 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -3,7 +3,6 @@ !! @brief Contains module m_ibm #:include 'macros.fpp' -#:include 'parallel_macros.fpp' !> @brief This module is used to handle all operations related to immersed !! boundary methods (IBMs) diff --git a/src/simulation/m_mhd.fpp b/src/simulation/m_mhd.fpp index 669363305d..4f76567423 100644 --- a/src/simulation/m_mhd.fpp +++ b/src/simulation/m_mhd.fpp @@ -3,7 +3,6 @@ !! @brief Contains module m_mhd #:include 'macros.fpp' -#:include 'parallel_macros.fpp' !> @brief This module is used to compute source terms for magnetohydrodynamics !! Note: not applicable for 1D diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index d87c5df02c..4ec4c463f5 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -4,7 +4,6 @@ #:include 'case.fpp' #:include 'macros.fpp' -#:include 'parallel_macros.fpp' !> @brief The module serves as a proxy to the parameters and subroutines !! available in the MPI implementation's MPI module. Specifically, diff --git a/src/simulation/m_qbmm.fpp b/src/simulation/m_qbmm.fpp index 3521a71c00..c4352d05b3 100644 --- a/src/simulation/m_qbmm.fpp +++ b/src/simulation/m_qbmm.fpp @@ -3,7 +3,6 @@ !! @brief Contains module m_qbmm #:include 'case.fpp' -#:include 'parallel_macros.fpp' #:include 'macros.fpp' !> @brief This module is used to compute moment inversion via qbmm diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index 7f73510a31..416f9ca5bc 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -4,7 +4,6 @@ #:include 'case.fpp' #:include 'macros.fpp' -#:include 'parallel_macros.fpp' !> @brief The module contains the subroutines used to calculate the right- !! hane-side (RHS) in the quasi-conservative, shock- and interface- diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp index 706bb37730..45685f4c13 100644 --- a/src/simulation/m_riemann_solvers.fpp +++ b/src/simulation/m_riemann_solvers.fpp @@ -21,7 +21,6 @@ #:include 'case.fpp' #:include 'macros.fpp' -#:include 'parallel_macros.fpp' #:include 'inline_riemann.fpp' module m_riemann_solvers diff --git a/src/simulation/m_sim_helpers.fpp b/src/simulation/m_sim_helpers.fpp index da0aa370cb..aeb4f3933d 100644 --- a/src/simulation/m_sim_helpers.fpp +++ b/src/simulation/m_sim_helpers.fpp @@ -1,4 +1,5 @@ -#:include 'parallel_macros.fpp' +#:include 'macros.fpp' + module m_sim_helpers use m_derived_types !< Definitions of the derived types diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index a8134173ca..a89d882b82 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -3,7 +3,7 @@ !! @brief Contains module m_start_up #:include 'case.fpp' -#:include 'parallel_macros.fpp' +#:include 'macros.fpp' !> @brief The purpose of the module is primarily to read in the files that !! contain the inputs, the initial condition data and the grid data diff --git a/src/simulation/m_surface_tension.fpp b/src/simulation/m_surface_tension.fpp index a29065c6da..f6181237ee 100644 --- a/src/simulation/m_surface_tension.fpp +++ b/src/simulation/m_surface_tension.fpp @@ -1,5 +1,4 @@ #:include 'macros.fpp' -#:include 'parallel_macros.fpp' #:include 'inline_capillary.fpp' !> @brief This module is used to compute source terms for surface tension model diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 344b27d2c7..658f5abce8 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -3,7 +3,6 @@ !! @brief Contains module m_time_steppers #:include 'macros.fpp' -#:include 'parallel_macros.fpp' !> @brief The following module features a variety of time-stepping schemes. !! Currently, it includes the following Runge-Kutta (RK) algorithms: diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp index 77f939fb8b..da65b0d0a0 100644 --- a/src/simulation/m_viscous.fpp +++ b/src/simulation/m_viscous.fpp @@ -2,7 +2,6 @@ !! @file m_viscous.f90 !! @brief Contains module m_viscous #:include 'macros.fpp' -#:include 'parallel_macros.fpp' !> @brief The module contains the subroutines used to compute viscous terms. module m_viscous diff --git a/src/simulation/m_weno.fpp b/src/simulation/m_weno.fpp index 7ff75df03d..d965efa377 100644 --- a/src/simulation/m_weno.fpp +++ b/src/simulation/m_weno.fpp @@ -2,7 +2,6 @@ !! @file m_weno.f90 !! @brief Contains module m_weno #:include 'macros.fpp' -#:include 'parallel_macros.fpp' !> @brief Weighted essentially non-oscillatory (WENO) reconstruction scheme !! that is supplemented with monotonicity preserving bounds (MPWENO) From 395bc3ed7ce8647fb645e7e61135ad33044d4234 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Mon, 16 Jun 2025 11:37:39 -0400 Subject: [PATCH 48/75] Changed default of routine parallelism argument --- src/common/include/parallel_macros.fpp | 2 +- src/common/m_boundary_common.fpp | 22 ++++++++-------- src/common/m_helper.fpp | 4 +-- src/common/m_helper_basic.fpp | 6 ++--- src/common/m_phase_change.fpp | 12 ++++----- src/common/m_variables_conversion.fpp | 10 ++++---- src/pre_process/m_assign_variables.fpp | 4 +-- src/pre_process/m_patches.fpp | 8 +++--- src/simulation/m_acoustic_src.fpp | 6 ++--- src/simulation/m_bubbles.fpp | 34 ++++++++++++------------- src/simulation/m_bubbles_EL.fpp | 4 +-- src/simulation/m_bubbles_EL_kernels.fpp | 12 ++++----- src/simulation/m_compute_cbc.fpp | 16 ++++++------ src/simulation/m_hyperelastic.fpp | 4 +-- src/simulation/m_ibm.fpp | 2 +- src/simulation/m_qbmm.fpp | 14 +++++----- src/simulation/m_riemann_solvers.fpp | 4 +-- src/simulation/m_sim_helpers.fpp | 6 ++--- 18 files changed, 85 insertions(+), 85 deletions(-) diff --git a/src/common/include/parallel_macros.fpp b/src/common/include/parallel_macros.fpp index de4f20cafe..08f46135a1 100644 --- a/src/common/include/parallel_macros.fpp +++ b/src/common/include/parallel_macros.fpp @@ -272,7 +272,7 @@ $:acc_directive #:enddef -#:def GPU_LOOP(collapse=None, parallelism=["seq"], data_dependency=None, reduction=None, reductionOp=None, private=None, extraAccArgs=None) +#:def GPU_LOOP(collapse=None, parallelism=None, data_dependency=None, reduction=None, reductionOp=None, private=None, extraAccArgs=None) #:set collapse_val = GEN_COLLAPSE_STR(collapse) diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index ce5b98a3fb..b521d1272f 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -241,7 +241,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_ghost_cell_extrapolation #else - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) #endif type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv @@ -310,7 +310,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_symmetry #else - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) #endif type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv @@ -574,7 +574,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_periodic #else - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) #endif type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv @@ -717,7 +717,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_axis #else - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) #endif type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv @@ -780,7 +780,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_slip_wall #else - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) #endif type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv @@ -879,7 +879,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_no_slip_wall #else - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) #endif type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv @@ -1014,7 +1014,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_dirichlet #else - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) #endif type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv @@ -1083,7 +1083,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_qbmm_extrapolation #else - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) #endif real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv integer, intent(in) :: bc_dir, bc_loc @@ -1282,7 +1282,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_color_function_periodic #else - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) #endif type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs integer, intent(in) :: bc_dir, bc_loc @@ -1340,7 +1340,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_color_function_reflective #else - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) #endif type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs integer, intent(in) :: bc_dir, bc_loc @@ -1422,7 +1422,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_color_function_ghost_cell_extrapolation #else - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) #endif type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs integer, intent(in) :: bc_dir, bc_loc diff --git a/src/common/m_helper.fpp b/src/common/m_helper.fpp index 44299f56d6..509d9a3708 100644 --- a/src/common/m_helper.fpp +++ b/src/common/m_helper.fpp @@ -44,7 +44,7 @@ contains !! @param Rtmp is the bubble radii !! @param ntmp is the output number bubble density pure subroutine s_comp_n_from_prim(vftmp, Rtmp, ntmp, weights) - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) real(wp), intent(in) :: vftmp real(wp), dimension(nb), intent(in) :: Rtmp real(wp), intent(out) :: ntmp @@ -58,7 +58,7 @@ contains end subroutine s_comp_n_from_prim pure subroutine s_comp_n_from_cons(vftmp, nRtmp, ntmp, weights) - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) real(wp), intent(in) :: vftmp real(wp), dimension(nb), intent(in) :: nRtmp real(wp), intent(out) :: ntmp diff --git a/src/common/m_helper_basic.fpp b/src/common/m_helper_basic.fpp index 2ba77f0f15..34c96c5113 100644 --- a/src/common/m_helper_basic.fpp +++ b/src/common/m_helper_basic.fpp @@ -25,7 +25,7 @@ contains !! @param tol_input Relative error (default = 1e-6_wp). !! @return Result of the comparison. logical pure elemental function f_approx_equal(a, b, tol_input) result(res) - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) real(wp), intent(in) :: a, b real(wp), optional, intent(in) :: tol_input real(wp) :: tol @@ -48,7 +48,7 @@ contains !> Checks if a real(wp) variable is of default value. !! @param var Variable to check. logical pure elemental function f_is_default(var) result(res) - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) real(wp), intent(in) :: var res = f_approx_equal(var, dflt_real) @@ -73,7 +73,7 @@ contains !> Checks if a real(wp) variable is an integer. !! @param var Variable to check. logical pure elemental function f_is_integer(var) result(res) - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) real(wp), intent(in) :: var res = f_approx_equal(var, real(nint(var), wp)) diff --git a/src/common/m_phase_change.fpp b/src/common/m_phase_change.fpp index 0a23205d58..f4b1815687 100644 --- a/src/common/m_phase_change.fpp +++ b/src/common/m_phase_change.fpp @@ -289,7 +289,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_infinite_pt_relaxation_k #else - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) #endif ! initializing variables @@ -393,7 +393,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_infinite_ptg_relaxation_k #else - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) #endif integer, intent(in) :: j, k, l @@ -518,7 +518,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_correct_partial_densities #else - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) #endif !> @name variables for the correction of the reacting partial densities @@ -581,7 +581,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_jacobian_matrix #else - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) #endif real(wp), dimension(2, 2), intent(out) :: InvJac @@ -688,7 +688,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_pTg_residue #else - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) #endif integer, intent(in) :: j, k, l @@ -739,7 +739,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_TSat #else - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) #endif real(wp), intent(in) :: pSat diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp index 4bc1426153..08ce822a83 100644 --- a/src/common/m_variables_conversion.fpp +++ b/src/common/m_variables_conversion.fpp @@ -120,7 +120,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_pressure #else - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) #endif real(wp), intent(in) :: energy, alf @@ -461,7 +461,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_convert_species_to_mixture_variables_acc #else - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) #endif real(wp), intent(out) :: rho_K, gamma_K, pi_inf_K, qv_K @@ -542,7 +542,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_convert_species_to_mixture_variables_bubbles_acc #else - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) #endif real(wp), intent(inout) :: rho_K, gamma_K, pi_inf_K, qv_K @@ -1603,7 +1603,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_speed_of_sound #else - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) #endif real(wp), intent(in) :: pres @@ -1673,7 +1673,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_fast_magnetosonic_speed #else - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) #endif real(wp), intent(in) :: B(3), rho, c diff --git a/src/pre_process/m_assign_variables.fpp b/src/pre_process/m_assign_variables.fpp index 0774d43c88..fa012d0695 100644 --- a/src/pre_process/m_assign_variables.fpp +++ b/src/pre_process/m_assign_variables.fpp @@ -104,7 +104,7 @@ contains !! @param patch_id_fp Array to track patch ids pure subroutine s_assign_patch_mixture_primitive_variables(patch_id, j, k, l, & eta, q_prim_vf, patch_id_fp) - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) integer, intent(in) :: patch_id integer, intent(in) :: j, k, l @@ -277,7 +277,7 @@ contains !! @param patch_id_fp Array to track patch ids impure subroutine s_assign_patch_species_primitive_variables(patch_id, j, k, l, & eta, q_prim_vf, patch_id_fp) - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) integer, intent(in) :: patch_id integer, intent(in) :: j, k, l diff --git a/src/pre_process/m_patches.fpp b/src/pre_process/m_patches.fpp index f5245d392a..edab185958 100644 --- a/src/pre_process/m_patches.fpp +++ b/src/pre_process/m_patches.fpp @@ -2381,7 +2381,7 @@ contains end subroutine s_model subroutine s_convert_cylindrical_to_cartesian_coord(cyl_y, cyl_z) - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) real(wp), intent(in) :: cyl_y, cyl_z @@ -2392,7 +2392,7 @@ contains pure function f_convert_cyl_to_cart(cyl) result(cart) - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) t_vec3, intent(in) :: cyl t_vec3 :: cart @@ -2404,7 +2404,7 @@ contains end function f_convert_cyl_to_cart subroutine s_convert_cylindrical_to_spherical_coord(cyl_x, cyl_y) - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) real(wp), intent(IN) :: cyl_x, cyl_y @@ -2417,7 +2417,7 @@ contains !! @param offset Thickness !! @param a Starting position pure elemental function f_r(myth, offset, a) - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) real(wp), intent(in) :: myth, offset, a real(wp) :: b real(wp) :: f_r diff --git a/src/simulation/m_acoustic_src.fpp b/src/simulation/m_acoustic_src.fpp index 2305aff227..e319c0e963 100644 --- a/src/simulation/m_acoustic_src.fpp +++ b/src/simulation/m_acoustic_src.fpp @@ -346,7 +346,7 @@ contains !! @param gauss_sigma_time_local sigma in time for Gaussian pulse !! @param source Source term amplitude pure elemental subroutine s_source_temporal(sim_time, c, ai, term_index, frequency_local, gauss_sigma_time_local, source, sum_BB) - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) integer, intent(in) :: ai, term_index real(wp), intent(in) :: sim_time, c, sum_BB real(wp), intent(in) :: frequency_local, gauss_sigma_time_local @@ -698,7 +698,7 @@ contains !! @param c Speed of sound !! @return frequency_local Converted frequency pure elemental function f_frequency_local(freq_conv_flag, ai, c) - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) logical, intent(in) :: freq_conv_flag integer, intent(in) :: ai real(wp), intent(in) :: c @@ -717,7 +717,7 @@ contains !! @param ai Acoustic source index !! @return gauss_sigma_time_local Converted Gaussian sigma time pure elemental function f_gauss_sigma_time_local(gauss_conv_flag, ai, c) - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) logical, intent(in) :: gauss_conv_flag integer, intent(in) :: ai real(wp), intent(in) :: c diff --git a/src/simulation/m_bubbles.fpp b/src/simulation/m_bubbles.fpp index 7cf2107522..02e1932c8e 100644 --- a/src/simulation/m_bubbles.fpp +++ b/src/simulation/m_bubbles.fpp @@ -39,7 +39,7 @@ contains !! @param f_divu Divergence of velocity !! @param fCson Speed of sound from fP (EL) pure elemental function f_rddot(fRho, fP, fR, fV, fR0, fpb, fpbdot, alf, fntait, fBtait, f_bub_adv_src, f_divu, fCson) - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) real(wp), intent(in) :: fRho, fP, fR, fV, fR0, fpb, fpbdot, alf real(wp), intent(in) :: fntait, fBtait, f_bub_adv_src, f_divu real(wp), intent(in) :: fCson @@ -80,7 +80,7 @@ contains !! @param fV Current bubble velocity !! @param fpb Internal bubble pressure pure elemental function f_cpbw(fR0, fR, fV, fpb) - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) real(wp), intent(in) :: fR0, fR, fV, fpb real(wp) :: f_cpbw @@ -99,7 +99,7 @@ contains !! @param fntait Tait EOS parameter !! @param fBtait Tait EOS parameter pure elemental function f_H(fCpbw, fCpinf, fntait, fBtait) - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) real(wp), intent(in) :: fCpbw, fCpinf, fntait, fBtait real(wp) :: tmp1, tmp2, tmp3 @@ -119,7 +119,7 @@ contains !! @param fBtait Tait EOS parameter !! @param fH Bubble enthalpy pure elemental function f_cgas(fCpinf, fntait, fBtait, fH) - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) real(wp), intent(in) :: fCpinf, fntait, fBtait, fH real(wp) :: tmp @@ -142,7 +142,7 @@ contains !! @param advsrc Advection equation source term !! @param divu Divergence of velocity pure elemental function f_cpinfdot(fRho, fP, falf, fntait, fBtait, advsrc, divu) - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) real(wp), intent(in) :: fRho, fP, falf, fntait, fBtait, advsrc, divu real(wp) :: c2_liquid @@ -172,7 +172,7 @@ contains !! @param fR0 Equilibrium bubble radius !! @param fpbdot Time derivative of the internal bubble pressure pure elemental function f_Hdot(fCpbw, fCpinf, fCpinf_dot, fntait, fBtait, fR, fV, fR0, fpbdot) - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) real(wp), intent(in) :: fCpbw, fCpinf, fCpinf_dot, fntait, fBtait real(wp), intent(in) :: fR, fV, fR0, fpbdot @@ -208,7 +208,7 @@ contains !! @param fR0 Equilibrium bubble radius !! @param fCpbw Boundary wall pressure pure elemental function f_rddot_RP(fCp, fRho, fR, fV, fCpbw) - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) real(wp), intent(in) :: fCp, fRho, fR, fV, fCpbw real(wp) :: f_rddot_RP @@ -231,7 +231,7 @@ contains !! @param fntait Tait EOS parameter !! @param fBtait Tait EOS parameter pure elemental function f_rddot_G(fCpbw, fR, fV, fH, fHdot, fcgas, fntait, fBtait) - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) real(wp), intent(in) :: fCpbw, fR, fV, fH, fHdot real(wp), intent(in) :: fcgas, fntait, fBtait @@ -254,7 +254,7 @@ contains !! @param fV Current bubble velocity !! @param fpb Internal bubble pressure pure elemental function f_cpbw_KM(fR0, fR, fV, fpb) - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) real(wp), intent(in) :: fR0, fR, fV, fpb real(wp) :: f_cpbw_KM @@ -281,7 +281,7 @@ contains !! @param fR0 Equilibrium bubble radius !! @param fC Current sound speed pure elemental function f_rddot_KM(fpbdot, fCp, fCpbw, fRho, fR, fV, fR0, fC) - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) real(wp), intent(in) :: fpbdot, fCp, fCpbw real(wp), intent(in) :: fRho, fR, fV, fR0, fC @@ -315,7 +315,7 @@ contains !! @param pb Internal bubble pressure !! @param iR0 Current bubble size index pure elemental subroutine s_bwproperty(pb, iR0, chi_vw, k_mw, rho_mw) - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) real(wp), intent(in) :: pb integer, intent(in) :: iR0 real(wp), intent(out) :: chi_vw !< Bubble wall properties (Ando 2010) @@ -345,7 +345,7 @@ contains !! @param fR_m Mixture gas constant (EL) !! @param fgamma_m Mixture gamma (EL) pure elemental subroutine s_vflux(fR, fV, fpb, fmass_v, iR0, vflux, fmass_n, fbeta_c, fR_m, fgamma_m) - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) real(wp), intent(in) :: fR real(wp), intent(in) :: fV real(wp), intent(in) :: fpb @@ -403,7 +403,7 @@ contains !! @param fR_m Mixture gas constant (EL) !! @param fgamma_m Mixture gamma (EL) pure elemental function f_bpres_dot(fvflux, fR, fV, fpb, fmass_v, iR0, fbeta_t, fR_m, fgamma_m) - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) real(wp), intent(in) :: fvflux real(wp), intent(in) :: fR real(wp), intent(in) :: fV @@ -466,7 +466,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_advance_step #else - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) #endif real(wp), intent(inout) :: fR, fV, fpb, fmass_v real(wp), intent(in) :: fRho, fP, fR0, fpbdot, alf @@ -600,7 +600,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_initial_substep_h #else - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) #endif real(wp), intent(IN) :: fRho, fP, fR, fV, fR0, fpb, fpbdot, alf real(wp), intent(IN) :: fntait, fBtait, f_bub_adv_src, f_divu @@ -686,7 +686,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_advance_substep #else - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) #endif real(wp), intent(OUT) :: err real(wp), intent(IN) :: fRho, fP, fR, fV, fR0, fpb, fpbdot, alf @@ -784,7 +784,7 @@ contains !! @param fdMvdt_tmp Rate of change of the mass of vapor in the bubble pure elemental subroutine s_advance_EL(fR_tmp, fV_tmp, fPb_tmp, fMv_tmp, bub_id, & fmass_n, fbeta_c, fbeta_t, fdPbdt_tmp, advance_EL) - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) real(wp), intent(IN) :: fR_tmp, fV_tmp, fPb_tmp, fMv_tmp real(wp), intent(IN) :: fmass_n, fbeta_c, fbeta_t integer, intent(IN) :: bub_id diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index 0b9933ef8b..71706d18f8 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -741,7 +741,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_cson_from_pinf #else - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) #endif type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf real(wp), intent(in) :: pinf, rhol, gamma, pi_inf @@ -811,7 +811,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_get_pinf #else - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) #endif integer, intent(in) :: bub_id, ptype type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp index d51f33589e..fe644244cc 100644 --- a/src/simulation/m_bubbles_EL_kernels.fpp +++ b/src/simulation/m_bubbles_EL_kernels.fpp @@ -203,7 +203,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_applygaussian #else - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) #endif real(wp), dimension(3), intent(in) :: center integer, dimension(3), intent(in) :: cellaux @@ -273,7 +273,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_check_celloutside #else - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) #endif integer, dimension(3), intent(inout) :: cellaux logical, intent(out) :: celloutside @@ -309,7 +309,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_shift_cell_symmetric_bc #else - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) #endif integer, dimension(3), intent(inout) :: cellaux integer, dimension(3), intent(in) :: cell @@ -350,7 +350,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_stddsv #else - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) #endif integer, dimension(3), intent(in) :: cell real(wp), intent(in) :: volpart @@ -391,7 +391,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_get_char_vol #else - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) #endif integer, intent(in) :: cellx, celly, cellz real(wp), intent(out) :: Charvol @@ -416,7 +416,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_get_cell #else - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) #endif real(wp), dimension(3), intent(in) :: s_cell integer, dimension(3), intent(out) :: get_cell diff --git a/src/simulation/m_compute_cbc.fpp b/src/simulation/m_compute_cbc.fpp index fbe4b0d6b3..5bdbc6715d 100644 --- a/src/simulation/m_compute_cbc.fpp +++ b/src/simulation/m_compute_cbc.fpp @@ -88,7 +88,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_slip_wall_L #else - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) #endif real(wp), dimension(3), intent(in) :: lambda real(wp), dimension(sys_size), intent(inout) :: L @@ -106,7 +106,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_nonreflecting_subsonic_buffer_L #else - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) #endif real(wp), dimension(3), intent(in) :: lambda real(wp), dimension(sys_size), intent(inout) :: L @@ -136,7 +136,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_nonreflecting_subsonic_inflow_L #else - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) #endif real(wp), dimension(3), intent(in) :: lambda real(wp), dimension(sys_size), intent(inout) :: L @@ -153,7 +153,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_nonreflecting_subsonic_outflow_L #else - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) #endif real(wp), dimension(3), intent(in) :: lambda real(wp), dimension(sys_size), intent(inout) :: L @@ -177,7 +177,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_force_free_subsonic_outflow_L #else - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) #endif real(wp), dimension(3), intent(in) :: lambda real(wp), dimension(sys_size), intent(inout) :: L @@ -199,7 +199,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_constant_pressure_subsonic_outflow_L #else - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) #endif real(wp), dimension(3), intent(in) :: lambda real(wp), dimension(sys_size), intent(inout) :: L @@ -221,7 +221,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_supersonic_inflow_L #else - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) #endif real(wp), dimension(sys_size), intent(inout) :: L L(1:advxe) = 0._wp @@ -233,7 +233,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_supersonic_outflow_L #else - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) #endif real(wp), dimension(3), intent(in) :: lambda real(wp), dimension(sys_size), intent(inout) :: L diff --git a/src/simulation/m_hyperelastic.fpp b/src/simulation/m_hyperelastic.fpp index 1e8b475284..ad888c813d 100644 --- a/src/simulation/m_hyperelastic.fpp +++ b/src/simulation/m_hyperelastic.fpp @@ -220,7 +220,7 @@ contains !! calculate the FFtranspose to obtain the btensor, btensor is nxn tensor !! btensor is symmetric, save the data space pure subroutine s_neoHookean_cauchy_solver(btensor, q_prim_vf, G, j, k, l) - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf type(scalar_field), dimension(b_size), intent(inout) :: btensor real(wp), intent(in) :: G @@ -259,7 +259,7 @@ contains !! calculate the FFtranspose to obtain the btensor, btensor is nxn tensor !! btensor is symmetric, save the data space pure subroutine s_Mooney_Rivlin_cauchy_solver(btensor, q_prim_vf, G, j, k, l) - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf type(scalar_field), dimension(b_size), intent(inout) :: btensor real(wp), intent(in) :: G diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index a435d17454..7adad082ca 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -750,7 +750,7 @@ contains !> Function that uses the interpolation coefficients and the current state !! at the cell centers in order to estimate the state at the image point pure subroutine s_interpolate_image_point(q_prim_vf, gp, alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP, r_IP, v_IP, pb_IP, mv_IP, nmom_IP, pb, mv, presb_IP, massv_IP) - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) type(scalar_field), & dimension(sys_size), & intent(IN) :: q_prim_vf !< Primitive Variables diff --git a/src/simulation/m_qbmm.fpp b/src/simulation/m_qbmm.fpp index c4352d05b3..40e56ebf9d 100644 --- a/src/simulation/m_qbmm.fpp +++ b/src/simulation/m_qbmm.fpp @@ -566,7 +566,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_coeff_nonpoly #else - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) #endif real(wp), intent(in) :: pres, rho, c real(wp), dimension(nterms, 0:2, 0:2), intent(out) :: coeffs @@ -639,7 +639,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_coeff #else - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) #endif real(wp), intent(in) :: pres, rho, c @@ -864,7 +864,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_chyqmom #else - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) #endif real(wp), intent(in) :: pres, rho, c real(wp), dimension(nterms, 0:2, 0:2), intent(out) :: coeff @@ -880,7 +880,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_chyqmom #else - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) #endif real(wp), dimension(nmom), intent(in) :: momin real(wp), dimension(nnode), intent(inout) :: wght, abscX, abscY @@ -940,7 +940,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_hyqmom #else - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) #endif real(wp), dimension(2), intent(inout) :: frho, fup real(wp), dimension(3), intent(in) :: fmom @@ -959,7 +959,7 @@ contains end subroutine s_hyqmom pure function f_quad(abscX, abscY, wght_in, q, r, s) - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) real(wp), dimension(nnode, nb), intent(in) :: abscX, abscY, wght_in real(wp), intent(in) :: q, r, s @@ -975,7 +975,7 @@ contains end function f_quad pure function f_quad2D(abscX, abscY, wght_in, pow) - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) real(wp), dimension(nnode), intent(in) :: abscX, abscY, wght_in real(wp), dimension(3), intent(in) :: pow diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp index 45685f4c13..a55b0c144c 100644 --- a/src/simulation/m_riemann_solvers.fpp +++ b/src/simulation/m_riemann_solvers.fpp @@ -4241,7 +4241,7 @@ contains !! @param[in] divergence_v Velocity divergence (du/dx + dv/dy + dw/dz). !! @param[out] tau_shear_out Calculated shear stress tensor (stress on i-face, j-direction). pure subroutine s_calculate_shear_stress_tensor(vel_grad_avg, Re_shear, divergence_v, tau_shear_out) - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) implicit none @@ -4275,7 +4275,7 @@ contains !! @param[in] divergence_v Velocity divergence (du/dx + dv/dy + dw/dz). !! @param[out] tau_bulk_out Calculated bulk stress tensor (stress on i-face, i-direction). pure subroutine s_calculate_bulk_stress_tensor(Re_bulk, divergence_v, tau_bulk_out) - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) implicit none diff --git a/src/simulation/m_sim_helpers.fpp b/src/simulation/m_sim_helpers.fpp index aeb4f3933d..9be0c31c2b 100644 --- a/src/simulation/m_sim_helpers.fpp +++ b/src/simulation/m_sim_helpers.fpp @@ -35,7 +35,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_enthalpy #else - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) #endif type(scalar_field), intent(in), dimension(sys_size) :: q_prim_vf @@ -100,7 +100,7 @@ contains !! @param vcfl_sf (optional) cell centered viscous cfl number !! @param Rc_sf (optional) cell centered Rc pure subroutine s_compute_stability_from_dt(vel, c, rho, Re_l, j, k, l, icfl_sf, vcfl_sf, Rc_sf) - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) real(wp), intent(in), dimension(num_vels) :: vel real(wp), intent(in) :: c, rho real(wp), dimension(0:m, 0:n, 0:p), intent(inout) :: icfl_sf @@ -197,7 +197,7 @@ contains !! @param k y coordinate !! @param l z coordinate pure subroutine s_compute_dt_from_cfl(vel, c, max_dt, rho, Re_l, j, k, l) - $:GPU_ROUTINE() + $:GPU_ROUTINE(parallelism=['seq']) real(wp), dimension(num_vels), intent(in) :: vel real(wp), intent(in) :: c, rho real(wp), dimension(0:m, 0:n, 0:p), intent(inout) :: max_dt From c9b8708b0fb9630aa0aa34da1ce667b55b6317dd Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Tue, 17 Jun 2025 14:23:03 -0400 Subject: [PATCH 49/75] Switch to string lists --- src/common/include/parallel_macros.fpp | 77 ++++++---- src/common/m_boundary_common.fpp | 2 +- src/common/m_chemistry.fpp | 2 +- src/common/m_finite_differences.fpp | 2 +- src/common/m_phase_change.fpp | 16 +- src/common/m_variables_conversion.fpp | 26 ++-- src/simulation/m_acoustic_src.fpp | 46 +++--- src/simulation/m_body_forces.fpp | 4 +- src/simulation/m_boundary_conditions.fpp | 8 +- src/simulation/m_bubbles.fpp | 2 +- src/simulation/m_bubbles_EE.fpp | 16 +- src/simulation/m_bubbles_EL.fpp | 76 ++++----- src/simulation/m_bubbles_EL_kernels.fpp | 4 +- src/simulation/m_cbc.fpp | 58 +++---- src/simulation/m_data_output.fpp | 14 +- src/simulation/m_fftw.fpp | 16 +- src/simulation/m_global_parameters.fpp | 188 +++++++++++------------ src/simulation/m_hyperelastic.fpp | 18 +-- src/simulation/m_hypoelastic.fpp | 16 +- src/simulation/m_ibm.fpp | 42 ++--- src/simulation/m_mhd.fpp | 12 +- src/simulation/m_mpi_proxy.fpp | 74 ++++----- src/simulation/m_qbmm.fpp | 30 ++-- src/simulation/m_rhs.fpp | 164 ++++++++++---------- src/simulation/m_riemann_solvers.fpp | 124 +++++++-------- src/simulation/m_sim_helpers.fpp | 6 +- src/simulation/m_start_up.fpp | 64 ++++---- src/simulation/m_surface_tension.fpp | 26 ++-- src/simulation/m_time_steppers.fpp | 10 +- src/simulation/m_viscous.fpp | 34 ++-- src/simulation/m_weno.fpp | 34 ++-- 31 files changed, 614 insertions(+), 597 deletions(-) diff --git a/src/common/include/parallel_macros.fpp b/src/common/include/parallel_macros.fpp index 08f46135a1..d34b32065f 100644 --- a/src/common/include/parallel_macros.fpp +++ b/src/common/include/parallel_macros.fpp @@ -1,3 +1,4 @@ + #:def ASSERT_LIST(data, datatype) #:assert data is not None #:assert isinstance(data, list) @@ -5,9 +6,28 @@ #:assert all(isinstance(element, datatype) for element in data) #:enddef -#:def GEN_PARENTHESES_CLAUSE(clause_name, clause_list) +#:def GEN_PARENTHESES_CLAUSE(clause_name, clause_str) + #:set clause_regex = re.compile(',(?![^(]*\\))') + #:assert isinstance(clause_name, str) - #:if clause_list is not None + #:if clause_str is not None + #:set count = 0 + #:assert isinstance(clause_str, str) + #:assert clause_str[0] == '[' and clause_str[-1] == ']' + #:for c in clause_str + #:if c == '(' + #:set count = count + 1 + #:elif c == ')' + #:set count = count - 1 + #:endif + #:if count > 1 + #:stop 'Nested parentheses is not supported. Incorrect clause: {}'.format(clause_str) + #:elif count < 0 + #:stop 'Missing parentheses. Incorrect clause: {}'.format(clause_str) + #:endif + #:endfor + #:set clause_str = re.sub(clause_regex, ';', clause_str) + #:set clause_list = [x.strip() for x in clause_str.strip('[]').split(';')] $:ASSERT_LIST(clause_list, str) #:set clause_str = clause_name + '(' + ', '.join(clause_list) + ') ' #:else @@ -33,15 +53,10 @@ #:def GEN_COPYIN_STR(copyin, readonly) #:assert isinstance(readonly, bool) - #:if copyin is not None - $:ASSERT_LIST(copyin, str) - #:if readonly == True - #:set copyin_val = 'copyin(readonly:' + ', '.join(copyin) + ') ' - #:else - #:set copyin_val = 'copyin(' + ', '.join(copyin) + ') ' - #:endif - #:else - #:set copyin_val = '' + #:set copyin_val = GEN_PARENTHESES_CLAUSE('copyin', copyin) + #:if copyin is not None and readonly == True + #:set index = copyin_val.find('copyin(') + len('copyin(') + #:set copyin_val = copyin_val[:index] + 'readonly:' + copyin_val[index:] #:endif $:copyin_val #:enddef @@ -137,21 +152,26 @@ #:def GEN_REDUCTION_STR(reduction, reductionOp) #:if reduction is not None and reductionOp is not None - #:if isinstance(reduction, list) and isinstance(reductionOp, list) - $:ASSERT_LIST(reduction, list) - $:ASSERT_LIST(reductionOp, str) - #:assert all(len(element) != 0 for element in reduction) - #:assert all(isinstance(element, str) for sublist in reduction for element in sublist) - #:assert len(reduction) == len(reductionOp) - #:set reduction_list = ['reduction(' + op + ':' + ', '.join(red) + ') ' for (red, op) in zip(reduction, reductionOp)] - #:set reduction_val = ' '. join(reduction_list) + ' ' - #:elif isinstance(reduction, list) and isinstance(reductionOp, str) - $:ASSERT_LIST(reduction, str) - #:assert isinstance(reductionOp, str) - #:set reduction_val = 'reduction(' + reductionOp + ':' + ', '.join(reduction) + ') ' - #:else - #:stop 'Invalid datatypes for reduction or reductionOp. Must be list of lists and lists or list and str respectively' - #:endif + #:assert isinstance(reduction, str) + #:assert isinstance(reductionOp, str) + #:assert reduction[0] == '[' and reduction[-1] == ']' + #:assert reductionOp[0] == '[' and reductionOp[-1] == ']' + #:set reduction = reduction.replace(' ', '') + #:set reduction = reduction[1:-1] + #:set reduction_list = reduction.split('],') + #:set reduction_list = [str + ']' for str in reduction_list] + #:assert all(str[0] == '[' and str[-1] == ']' for str in reduction_list) + + #:set reductionOp_list = [x.strip() for x in reductionOp.strip('[]').split(',')] + $:ASSERT_LIST(reduction_list, str) + $:ASSERT_LIST(reductionOp_list, str) + #:assert len(reduction_list) == len(reductionOp_list) + #:set reduction_val = '' + #:for i in range(len(reduction_list)) + #:set temp_clause = GEN_PARENTHESES_CLAUSE('reduction', reduction_list[i]).strip('\n') + #:set ind = temp_clause.find('reduction(') + len('reduction(') + #:set reduction_val = reduction_val.strip('\n') + temp_clause[:ind] + reductionOp_list[i] + ':' + temp_clause[ind:] + #:endfor #:elif reduction is not None or reductionOp is not None #:stop 'Cannot set the reduction list or reduction operation without setting the other' #:else @@ -235,12 +255,9 @@ #:if not isinstance(function_name, str) #:stop "When inlining for Cray Compiler, function name must be given and given as a string" #:endif -#ifdef _CRAYFTN #:set cray_directive = ('!DIR$ INLINEALWAYS ' + function_name).strip('\n') + #:set cray_directive = '#ifdef _CRAYFTN\n' + cray_directive + '\n#else\n' + acc_directive + '\n#endif' $:cray_directive -#else - $:acc_directive -#endif #:else $:acc_directive #:endif diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index b521d1272f..b9b83f9fc1 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -20,7 +20,7 @@ module m_boundary_common implicit none type(scalar_field), dimension(:, :), allocatable :: bc_buffers - $:GPU_DECLARE(create=["bc_buffers"]) + $:GPU_DECLARE(create='[bc_buffers]') real(wp) :: bcxb, bcxe, bcyb, bcye, bczb, bcze diff --git a/src/common/m_chemistry.fpp b/src/common/m_chemistry.fpp index 3ff223844f..b1cac0a70b 100644 --- a/src/common/m_chemistry.fpp +++ b/src/common/m_chemistry.fpp @@ -99,7 +99,7 @@ contains real(wp), dimension(num_species) :: Ys real(wp), dimension(num_species) :: omega - $:GPU_PARALLEL_LOOP(collapse=3, private=["Ys", "omega"]) + $:GPU_PARALLEL_LOOP(collapse=3, private='[Ys, omega]') do z = bounds(3)%beg, bounds(3)%end do y = bounds(2)%beg, bounds(2)%end do x = bounds(1)%beg, bounds(1)%end diff --git a/src/common/m_finite_differences.fpp b/src/common/m_finite_differences.fpp index c9bf9a4ad2..1857a31cd8 100644 --- a/src/common/m_finite_differences.fpp +++ b/src/common/m_finite_differences.fpp @@ -18,7 +18,7 @@ contains real(wp) :: divergence - $:GPU_PARALLEL_LOOP(collapse=3, private=["divergence"]) + $:GPU_PARALLEL_LOOP(collapse=3, private='[divergence]') do x = ix_s%beg, ix_s%end do y = iy_s%beg, iy_s%end do z = iz_s%beg, iz_s%end diff --git a/src/common/m_phase_change.fpp b/src/common/m_phase_change.fpp index f4b1815687..e66b2951a6 100644 --- a/src/common/m_phase_change.fpp +++ b/src/common/m_phase_change.fpp @@ -40,7 +40,7 @@ module m_phase_change real(wp) :: A, B, C, D !> @} - $:GPU_DECLARE(create=["max_iter","pCr","TCr","mixM","lp","vp","A","B","C","D"]) + $:GPU_DECLARE(create='[max_iter,pCr,TCr,mixM,lp,vp,A,B,C,D]') contains @@ -87,20 +87,20 @@ contains real(wp) :: rho, rM, m1, m2, MCT !< total density, total reacting mass, individual reacting masses real(wp) :: TvF !< total volume fraction - $:GPU_DECLARE(create=["pS","pSOV","pSSL","TS","TSOV","TSSL","TSatOV","TSatSL"]) - $:GPU_DECLARE(create=["rhoe","dynE","rhos","rho","rM","m1","m2","MCT","TvF"]) + $:GPU_DECLARE(create='[pS,pSOV,pSSL,TS,TSOV,TSSL,TSatOV,TSatSL]') + $:GPU_DECLARE(create='[rhoe,dynE,rhos,rho,rM,m1,m2,MCT,TvF]') real(wp), dimension(num_fluids) :: p_infOV, p_infpT, p_infSL, sk, hk, gk, ek, rhok - $:GPU_DECLARE(create=["p_infOV","p_infpT","p_infSL","sk","hk","gk","ek","rhok"]) + $:GPU_DECLARE(create='[p_infOV,p_infpT,p_infSL,sk,hk,gk,ek,rhok]') !< Generic loop iterators integer :: i, j, k, l ! starting equilibrium solver - $:GPU_PARALLEL_LOOP(collapse=3, private=["p_infOV", "p_infpT", "p_infSL", & - "sk", "hk", "gk", "ek", "rhok", "pS", "pSOV", "pSSL", & - "TS", "TSOV", "TSatOV", "TSatSL", "TSSL", "rhoe", & - "dynE", "rhos", "rho", "rM", "m1", "m2", "MCT", "TvF"]) + $:GPU_PARALLEL_LOOP(collapse=3, private='[p_infOV, p_infpT, p_infSL, & + sk, hk, gk, ek, rhok,pS, pSOV, pSSL, & + TS, TSOV, TSatOV, TSatSL, TSSL, rhoe, & + dynE, rhos, rho, rM, m1, m2, MCT, TvF]') do j = 0, m do k = 0, n do l = 0, p diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp index 08ce822a83..2aadab4aaf 100644 --- a/src/common/m_variables_conversion.fpp +++ b/src/common/m_variables_conversion.fpp @@ -50,16 +50,16 @@ module m_variables_conversion !! In simulation, gammas, pi_infs, and qvs are already declared in m_global_variables #ifndef MFC_SIMULATION real(wp), allocatable, public, dimension(:) :: gammas, gs_min, pi_infs, ps_inf, cvs, qvs, qvps - $:GPU_DECLARE(create=["gammas","gs_min","pi_infs","ps_inf","cvs","qvs","qvps"]) + $:GPU_DECLARE(create='[gammas,gs_min,pi_infs,ps_inf,cvs,qvs,qvps]') #endif real(wp), allocatable, dimension(:) :: Gs integer, allocatable, dimension(:) :: bubrs real(wp), allocatable, dimension(:, :) :: Res - $:GPU_DECLARE(create=["bubrs","Gs","Res"]) + $:GPU_DECLARE(create='[bubrs,Gs,Res]') integer :: is1b, is2b, is3b, is1e, is2e, is3e - $:GPU_DECLARE(create=["is1b","is2b","is3b","is1e","is2e","is3e"]) + $:GPU_DECLARE(create='[is1b,is2b,is3b,is1e,is2e,is3e]') real(wp), allocatable, dimension(:, :, :), public :: rho_sf !< Scalar density function real(wp), allocatable, dimension(:, :, :), public :: gamma_sf !< Scalar sp. heat ratio function @@ -610,7 +610,7 @@ contains integer :: i, j - $:GPU_ENTER_DATA(copyin=["is1b","is1e","is2b","is2e","is3b","is3e"]) + $:GPU_ENTER_DATA(copyin='[is1b,is1e,is2b,is2e,is3b,is3e]') #ifdef MFC_SIMULATION @:ALLOCATE(gammas (1:num_fluids)) @@ -642,7 +642,7 @@ contains qvs(i) = fluid_pp(i)%qv qvps(i) = fluid_pp(i)%qvp end do - $:GPU_UPDATE(device=["gammas","gs_min","pi_infs","ps_inf","cvs","qvs","qvps","Gs"]) + $:GPU_UPDATE(device='[gammas,gs_min,pi_infs,ps_inf,cvs,qvs,qvps,Gs]') #ifdef MFC_SIMULATION @@ -654,7 +654,7 @@ contains end do end do - $:GPU_UPDATE(device=["Res","Re_idx","Re_size"]) + $:GPU_UPDATE(device='[Res,Re_idx,Re_size]') end if #endif @@ -668,7 +668,7 @@ contains do i = 1, nb bubrs(i) = bub_idx%rs(i) end do - $:GPU_UPDATE(device=["bubrs"]) + $:GPU_UPDATE(device='[bubrs]') end if #ifdef MFC_POST_PROCESS @@ -869,9 +869,9 @@ contains end if #:endif - $:GPU_PARALLEL_LOOP(collapse=3, private=["alpha_K", "alpha_rho_K", "Re_K", & - "nRtmp", "rho_K", "gamma_K", "pi_inf_K", "qv_K", & - "dyn_pres_K", "rhoYks", "B"]) + $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_K, alpha_rho_K, Re_K, & + & nRtmp, rho_K, gamma_K, pi_inf_K,qv_K, & + & dyn_pres_K, rhoYks, B]') do l = ibounds(3)%beg, ibounds(3)%end do k = ibounds(2)%beg, ibounds(2)%end do j = ibounds(1)%beg, ibounds(1)%end @@ -1461,13 +1461,13 @@ contains is2b = is2%beg; is2e = is2%end is3b = is3%beg; is3e = is3%end - $:GPU_UPDATE(device=["is1b","is2b","is3b","is1e","is2e","is3e"]) + $:GPU_UPDATE(device='[is1b,is2b,is3b,is1e,is2e,is3e]') ! Computing the flux variables from the primitive variables, without ! accounting for the contribution of either viscosity or capillarity #ifdef MFC_SIMULATION - $:GPU_PARALLEL_LOOP(collapse=3, private=["alpha_rho_K", "vel_K", & - "alpha_K", "Re_K", "Y_K"]) + $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_rho_K, vel_K, & + & alpha_K, Re_K, Y_K]') do l = is3b, is3e do k = is2b, is2e do j = is1b, is1e diff --git a/src/simulation/m_acoustic_src.fpp b/src/simulation/m_acoustic_src.fpp index e319c0e963..c604d2bb35 100644 --- a/src/simulation/m_acoustic_src.fpp +++ b/src/simulation/m_acoustic_src.fpp @@ -23,43 +23,43 @@ module m_acoustic_src private; public :: s_initialize_acoustic_src, s_precalculate_acoustic_spatial_sources, s_acoustic_src_calculations integer, allocatable, dimension(:) :: pulse, support - $:GPU_DECLARE(create=["pulse","support"]) + $:GPU_DECLARE(create='[pulse,support]') logical, allocatable, dimension(:) :: dipole - $:GPU_DECLARE(create=["dipole"]) + $:GPU_DECLARE(create='[dipole]') real(wp), allocatable, target, dimension(:, :) :: loc_acoustic - $:GPU_DECLARE(create=["loc_acoustic"]) + $:GPU_DECLARE(create='[loc_acoustic]') real(wp), allocatable, dimension(:) :: mag, length, height, wavelength, frequency real(wp), allocatable, dimension(:) :: gauss_sigma_dist, gauss_sigma_time, npulse, dir, delay - $:GPU_DECLARE(create=["mag","length","height","wavelength","frequency"]) - $:GPU_DECLARE(create=["gauss_sigma_dist","gauss_sigma_time","npulse","dir","delay"]) + $:GPU_DECLARE(create='[mag,length,height,wavelength,frequency]') + $:GPU_DECLARE(create='[gauss_sigma_dist,gauss_sigma_time,npulse,dir,delay]') real(wp), allocatable, dimension(:) :: foc_length, aperture - $:GPU_DECLARE(create=["foc_length","aperture"]) + $:GPU_DECLARE(create='[foc_length,aperture]') real(wp), allocatable, dimension(:) :: element_spacing_angle, element_polygon_ratio, rotate_angle - $:GPU_DECLARE(create=["element_spacing_angle","element_polygon_ratio","rotate_angle"]) + $:GPU_DECLARE(create='[element_spacing_angle,element_polygon_ratio,rotate_angle]') real(wp), allocatable, dimension(:) :: bb_bandwidth, bb_lowest_freq - $:GPU_DECLARE(create=["bb_bandwidth","bb_lowest_freq"]) + $:GPU_DECLARE(create='[bb_bandwidth,bb_lowest_freq]') integer, allocatable, dimension(:) :: num_elements, element_on, bb_num_freq - $:GPU_DECLARE(create=["num_elements","element_on","bb_num_freq"]) + $:GPU_DECLARE(create='[num_elements,element_on,bb_num_freq]') !> @name Acoustic source terms !> @{ real(wp), allocatable, dimension(:, :, :) :: mass_src, e_src real(wp), allocatable, dimension(:, :, :, :) :: mom_src !> @} - $:GPU_DECLARE(create=["mass_src","e_src","mom_src"]) + $:GPU_DECLARE(create='[mass_src,e_src,mom_src]') integer, dimension(:), allocatable :: source_spatials_num_points !< Number of non-zero source grid points for each source - $:GPU_DECLARE(create=["source_spatials_num_points"]) + $:GPU_DECLARE(create='[source_spatials_num_points]') type(source_spatial_type), dimension(:), allocatable :: source_spatials !< Data of non-zero source grid points for each source - $:GPU_DECLARE(create=["source_spatials"]) + $:GPU_DECLARE(create='[source_spatials]') contains @@ -110,12 +110,12 @@ contains delay(i) = acoustic(i)%delay end if end do - $:GPU_UPDATE(device=["loc_acoustic","mag","dipole","support","length", & - & "height","wavelength","frequency","gauss_sigma_dist", & - & "gauss_sigma_time","foc_length","aperture","npulse","pulse", & - & "dir","delay","element_polygon_ratio","rotate_angle", & - & "element_spacing_angle","num_elements","element_on", & - & "bb_num_freq","bb_bandwidth","bb_lowest_freq"]) + $:GPU_UPDATE(device='[loc_acoustic,mag,dipole,support,length, & + & height,wavelength,frequency,gauss_sigma_dist, & + & gauss_sigma_time,foc_length,aperture,npulse,pulse, & + & dir,delay,element_polygon_ratio,rotate_angle, & + & element_spacing_angle,num_elements,element_on, & + & bb_num_freq,bb_bandwidth,bb_lowest_freq]') @:ALLOCATE(mass_src(0:m, 0:n, 0:p)) @:ALLOCATE(mom_src(1:num_vels, 0:m, 0:n, 0:p)) @@ -219,7 +219,7 @@ contains deallocate (phi_rn) - $:GPU_PARALLEL_LOOP(private=["myalpha","myalpha_rho"]) + $:GPU_PARALLEL_LOOP(private='[myalpha,myalpha_rho]') do i = 1, num_points j = source_spatials(ai)%coord(1, i) k = source_spatials(ai)%coord(2, i) @@ -473,14 +473,14 @@ contains call s_mpi_abort('Fatal Error: Inconsistent allocation of source_spatials') end if - $:GPU_UPDATE(device=["source_spatials(ai)%coord"]) - $:GPU_UPDATE(device=["source_spatials(ai)%val"]) + $:GPU_UPDATE(device='[source_spatials(ai)%coord]') + $:GPU_UPDATE(device='[source_spatials(ai)%val]') if (support(ai) >= 5) then if (dim == 2) then - $:GPU_UPDATE(device=["source_spatials(ai)%angle"]) + $:GPU_UPDATE(device='[source_spatials(ai)%angle]') end if if (dim == 3) then - $:GPU_UPDATE(device=["source_spatials(ai)%xyz_to_r_ratios"]) + $:GPU_UPDATE(device='[source_spatials(ai)%xyz_to_r_ratios]') end if end if diff --git a/src/simulation/m_body_forces.fpp b/src/simulation/m_body_forces.fpp index 794e7be075..d5f811d273 100644 --- a/src/simulation/m_body_forces.fpp +++ b/src/simulation/m_body_forces.fpp @@ -22,7 +22,7 @@ module m_body_forces s_finalize_body_forces_module real(wp), allocatable, dimension(:, :, :) :: rhoM - $:GPU_DECLARE(create=["rhoM"]) + $:GPU_DECLARE(create='[rhoM]') contains @@ -67,7 +67,7 @@ contains end if end if - $:GPU_UPDATE(device=["accel_bf"]) + $:GPU_UPDATE(device='[accel_bf]') end subroutine s_compute_acceleration diff --git a/src/simulation/m_boundary_conditions.fpp b/src/simulation/m_boundary_conditions.fpp index 5589ee23aa..e484c62a68 100644 --- a/src/simulation/m_boundary_conditions.fpp +++ b/src/simulation/m_boundary_conditions.fpp @@ -41,7 +41,7 @@ contains do dir = 1, num_dims do loc = -1, 1, 2 read (1) bc_type(dir, loc)%sf - $:GPU_UPDATE(device=["bc_type(dir, loc)%sf"]) + $:GPU_UPDATE(device='[bc_type(dir, loc)%sf]') end do end do close (1) @@ -57,7 +57,7 @@ contains do dir = 1, num_dims do loc = -1, 1, 2 read (1) bc_buffers(dir, loc)%sf - $:GPU_UPDATE(device=["bc_buffers(dir, loc)%sf"]) + $:GPU_UPDATE(device='[bc_buffers(dir, loc)%sf]') end do end do close (1) @@ -105,7 +105,7 @@ contains call MPI_File_set_view(file_id, int(offset, KIND=MPI_ADDRESS_KIND), MPI_INTEGER, MPI_BC_TYPE_TYPE(dir, loc), 'native', MPI_INFO_NULL, ierr) call MPI_File_read_all(file_id, bc_type(dir, loc)%sf, 1, MPI_BC_TYPE_TYPE(dir, loc), MPI_STATUS_IGNORE, ierr) offset = offset + sizeof(bc_type(dir, loc)%sf) - $:GPU_UPDATE(device=["bc_type(dir, loc)%sf"]) + $:GPU_UPDATE(device='[bc_type(dir, loc)%sf]') end do end do @@ -115,7 +115,7 @@ contains call MPI_File_set_view(file_id, int(offset, KIND=MPI_ADDRESS_KIND), mpi_p, MPI_BC_BUFFER_TYPE(dir, loc), 'native', MPI_INFO_NULL, ierr) call MPI_File_read_all(file_id, bc_buffers(dir, loc)%sf, 1, MPI_BC_BUFFER_TYPE(dir, loc), MPI_STATUS_IGNORE, ierr) offset = offset + sizeof(bc_buffers(dir, loc)%sf) - $:GPU_UPDATE(device=["bc_buffers(dir, loc)%sf"]) + $:GPU_UPDATE(device='[bc_buffers(dir, loc)%sf]') end do end do diff --git a/src/simulation/m_bubbles.fpp b/src/simulation/m_bubbles.fpp index 02e1932c8e..0443e8f182 100644 --- a/src/simulation/m_bubbles.fpp +++ b/src/simulation/m_bubbles.fpp @@ -20,7 +20,7 @@ module m_bubbles real(wp) :: chi_vw !< Bubble wall properties (Ando 2010) real(wp) :: k_mw !< Bubble wall properties (Ando 2010) real(wp) :: rho_mw !< Bubble wall properties (Ando 2010) - $:GPU_DECLARE(create=["chi_vw","k_mw","rho_mw"]) + $:GPU_DECLARE(create='[chi_vw,k_mw,rho_mw]') contains diff --git a/src/simulation/m_bubbles_EE.fpp b/src/simulation/m_bubbles_EE.fpp index 201a17d6ab..c7d85c90f0 100644 --- a/src/simulation/m_bubbles_EE.fpp +++ b/src/simulation/m_bubbles_EE.fpp @@ -21,13 +21,13 @@ module m_bubbles_EE real(wp), allocatable, dimension(:, :, :) :: bub_adv_src real(wp), allocatable, dimension(:, :, :, :) :: bub_r_src, bub_v_src, bub_p_src, bub_m_src - $:GPU_DECLARE(create=["bub_adv_src","bub_r_src","bub_v_src","bub_p_src","bub_m_src"]) + $:GPU_DECLARE(create='[bub_adv_src,bub_r_src,bub_v_src,bub_p_src,bub_m_src]') type(scalar_field) :: divu !< matrix for div(u) - $:GPU_DECLARE(create=["divu"]) + $:GPU_DECLARE(create='[divu]') integer, allocatable, dimension(:) :: rs, vs, ms, ps - $:GPU_DECLARE(create=["rs","vs","ms","ps"]) + $:GPU_DECLARE(create='[rs,vs,ms,ps]') contains @@ -51,9 +51,9 @@ contains end if end do - $:GPU_UPDATE(device=["rs", "vs"]) + $:GPU_UPDATE(device='[rs, vs]') if (.not. polytropic) then - $:GPU_UPDATE(device=["ps", "ms"]) + $:GPU_UPDATE(device='[ps, ms]') end if @:ALLOCATE(divu%sf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, idwbuff(3)%beg:idwbuff(3)%end)) @@ -190,9 +190,9 @@ contains end do adap_dt_stop_max = 0 - $:GPU_PARALLEL_LOOP(collapse=3, private=["Rtmp", "Vtmp", "myalpha_rho", "myalpha"], & - reduction=["adap_dt_stop_max"], reductionOp="MAX", & - copy=["adap_dt_stop_max"]) + $:GPU_PARALLEL_LOOP(collapse=3, private='[Rtmp, Vtmp, myalpha_rho, myalpha]', & + & reduction='[[adap_dt_stop_max]]', reductionOp='[MAX]', & + & copy='[adap_dt_stop_max]') do l = 0, p do k = 0, n do j = 0, m diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index 71706d18f8..4489c2c850 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -32,26 +32,26 @@ module m_bubbles_EL real(wp), allocatable, dimension(:) :: bub_R0 !< Initial bubble radius real(wp), allocatable, dimension(:) :: Rmax_stats !< Maximum radius real(wp), allocatable, dimension(:) :: Rmin_stats !< Minimum radius - $:GPU_DECLARE(create=["lag_id", "bub_R0", "Rmax_stats", "Rmin_stats"]) + $:GPU_DECLARE(create='[lag_id, bub_R0, Rmax_stats, Rmin_stats]') real(wp), allocatable, dimension(:) :: gas_mg !< Bubble's gas mass real(wp), allocatable, dimension(:) :: gas_betaT !< heatflux model (Preston et al., 2007) real(wp), allocatable, dimension(:) :: gas_betaC !< massflux model (Preston et al., 2007) real(wp), allocatable, dimension(:) :: bub_dphidt !< subgrid velocity potential (Maeda & Colonius, 2018) - $:GPU_DECLARE(create=["gas_mg", "gas_betaT", "gas_betaC", "bub_dphidt"]) + $:GPU_DECLARE(create='[gas_mg, gas_betaT, gas_betaC, bub_dphidt]') !(nBub, 1 -> actual val or 2 -> temp val) real(wp), allocatable, dimension(:, :) :: gas_p !< Pressure in the bubble real(wp), allocatable, dimension(:, :) :: gas_mv !< Vapor mass in the bubble real(wp), allocatable, dimension(:, :) :: intfc_rad !< Bubble radius real(wp), allocatable, dimension(:, :) :: intfc_vel !< Velocity of the bubble interface - $:GPU_DECLARE(create=["gas_p", "gas_mv", "intfc_rad", "intfc_vel"]) + $:GPU_DECLARE(create='[gas_p, gas_mv, intfc_rad, intfc_vel]') !(nBub, 1-> x or 2->y or 3 ->z, 1 -> actual or 2 -> temporal val) real(wp), allocatable, dimension(:, :, :) :: mtn_pos !< Bubble's position real(wp), allocatable, dimension(:, :, :) :: mtn_posPrev !< Bubble's previous position real(wp), allocatable, dimension(:, :, :) :: mtn_vel !< Bubble's velocity real(wp), allocatable, dimension(:, :, :) :: mtn_s !< Bubble's computational cell position in real format - $:GPU_DECLARE(create=["mtn_pos", "mtn_posPrev", "mtn_vel", "mtn_s"]) + $:GPU_DECLARE(create='[mtn_pos, mtn_posPrev, mtn_vel, mtn_s]') !(nBub, 1-> x or 2->y or 3 ->z, time-stage) real(wp), allocatable, dimension(:, :) :: intfc_draddt !< Time derivative of bubble's radius real(wp), allocatable, dimension(:, :) :: intfc_dveldt !< Time derivative of bubble's interface velocity @@ -59,18 +59,18 @@ module m_bubbles_EL real(wp), allocatable, dimension(:, :) :: gas_dmvdt !< Time derivative of the vapor mass in the bubble real(wp), allocatable, dimension(:, :, :) :: mtn_dposdt !< Time derivative of the bubble's position real(wp), allocatable, dimension(:, :, :) :: mtn_dveldt !< Time derivative of the bubble's velocity - $:GPU_DECLARE(create=["intfc_draddt", "intfc_dveldt", "gas_dpdt", "gas_dmvdt", "mtn_dposdt", "mtn_dveldt"]) + $:GPU_DECLARE(create='[intfc_draddt, intfc_dveldt, gas_dpdt, gas_dmvdt, mtn_dposdt, mtn_dveldt]') integer, private :: lag_num_ts !< Number of time stages in the time-stepping scheme - $:GPU_DECLARE(create=["lag_num_ts"]) + $:GPU_DECLARE(create='[lag_num_ts]') integer :: nBubs !< Number of bubbles in the local domain real(wp) :: Rmax_glb, Rmin_glb !< Maximum and minimum bubbe size in the local domain type(vector_field) :: q_beta !< Projection of the lagrangian particles in the Eulerian framework integer :: q_beta_idx !< Size of the q_beta vector field - $:GPU_DECLARE(create=["nBubs","Rmax_glb","Rmin_glb","q_beta","q_beta_idx"]) + $:GPU_DECLARE(create='[nBubs,Rmax_glb,Rmin_glb,q_beta,q_beta_idx]') contains @@ -100,7 +100,7 @@ contains call s_mpi_abort('Please check the lag_params%solver_approach input') end if - $:GPU_UPDATE(device=["lag_num_ts", "q_beta_idx"]) + $:GPU_UPDATE(device='[lag_num_ts, q_beta_idx]') @:ALLOCATE(q_beta%vf(1:q_beta_idx)) @@ -248,19 +248,19 @@ contains print *, " Lagrange bubbles running, in proc", proc_rank, "number:", bub_id, "/", id - $:GPU_UPDATE(device=["bubbles_lagrange", "lag_params"]) + $:GPU_UPDATE(device='[bubbles_lagrange, lag_params]') - $:GPU_UPDATE(device=["lag_id","bub_R0","Rmax_stats","Rmin_stats","gas_mg", & - & "gas_betaT","gas_betaC","bub_dphidt","gas_p","gas_mv", & - & "intfc_rad","intfc_vel","mtn_pos","mtn_posPrev","mtn_vel", & - & "mtn_s","intfc_draddt","intfc_dveldt","gas_dpdt","gas_dmvdt", & - & "mtn_dposdt","mtn_dveldt","nBubs"]) + $:GPU_UPDATE(device='[lag_id,bub_R0,Rmax_stats,Rmin_stats,gas_mg, & + & gas_betaT,gas_betaC,bub_dphidt,gas_p,gas_mv, & + & intfc_rad,intfc_vel,mtn_pos,mtn_posPrev,mtn_vel, & + & mtn_s,intfc_draddt,intfc_dveldt,gas_dpdt,gas_dmvdt, & + & mtn_dposdt,mtn_dveldt,nBubs]') Rmax_glb = min(dflt_real, -dflt_real) Rmin_glb = max(dflt_real, -dflt_real) - $:GPU_UPDATE(device=["Rmax_glb", "Rmin_glb"]) + $:GPU_UPDATE(device='[Rmax_glb, Rmin_glb]') - $:GPU_UPDATE(device=["dx","dy","dz","x_cb","x_cc","y_cb","y_cc","z_cb","z_cc"]) + $:GPU_UPDATE(device='[dx,dy,dz,x_cb,x_cc,y_cb,y_cc,z_cb,z_cc]') !Populate temporal variables call s_transfer_data_to_tmp() @@ -527,7 +527,7 @@ contains ! Subgrid p_inf model based on Maeda and Colonius (2018). if (lag_params%pressure_corrector) then ! Calculate velocity potentials (valid for one bubble per cell) - $:GPU_PARALLEL_LOOP(private=["k","cell"]) + $:GPU_PARALLEL_LOOP(private='[k,cell]') do k = 1, nBubs call s_get_pinf(k, q_prim_vf, 2, paux, cell, preterm1, term2, Romega) myR0 = bub_R0(k) @@ -548,9 +548,9 @@ contains ! Radial motion model adap_dt_stop_max = 0 - $:GPU_PARALLEL_LOOP(private=["k","myalpha_rho","myalpha","Re","cell"], & - & reduction=["adap_dt_stop_max"],reductionOp="MAX", & - & copy=["adap_dt_stop_max"],copyin=["stage"]) + $:GPU_PARALLEL_LOOP(private='[k,myalpha_rho,myalpha,Re,cell]', & + & reduction='[[adap_dt_stop_max]]',reductionOp='[MAX]', & + & copy='[adap_dt_stop_max]',copyin='[stage]') do k = 1, nBubs ! Keller-Miksis model @@ -618,7 +618,7 @@ contains if (adap_dt .and. adap_dt_stop_max > 0) call s_mpi_abort("Adaptive time stepping failed to converge.") ! Bubbles remain in a fixed position - $:GPU_PARALLEL_LOOP(collapse=2, private=["k"], copyin=["stage"]) + $:GPU_PARALLEL_LOOP(collapse=2, private='[k]', copyin='[stage]') do k = 1, nBubs do l = 1, 3 mtn_dposdt(k, l, stage) = 0._wp @@ -1027,7 +1027,7 @@ contains integer :: k if (time_stepper == 1) then ! 1st order TVD RK - $:GPU_PARALLEL_LOOP(private=["k"]) + $:GPU_PARALLEL_LOOP(private='[k]') do k = 1, nBubs !u{1} = u{n} + dt * RHS{n} intfc_rad(k, 1) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) @@ -1043,13 +1043,13 @@ contains if (lag_params%write_bubbles_stats) call s_calculate_lag_bubble_stats() if (lag_params%write_bubbles) then - $:GPU_UPDATE(host=["gas_p","gas_mv","intfc_rad","intfc_vel"]) + $:GPU_UPDATE(host='[gas_p,gas_mv,intfc_rad,intfc_vel]') call s_write_lag_particles(mytime) end if elseif (time_stepper == 2) then ! 2nd order TVD RK if (stage == 1) then - $:GPU_PARALLEL_LOOP(private=["k"]) + $:GPU_PARALLEL_LOOP(private='[k]') do k = 1, nBubs !u{1} = u{n} + dt * RHS{n} intfc_rad(k, 2) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) @@ -1061,7 +1061,7 @@ contains end do elseif (stage == 2) then - $:GPU_PARALLEL_LOOP(private=["k"]) + $:GPU_PARALLEL_LOOP(private='[k]') do k = 1, nBubs !u{1} = u{n} + (1/2) * dt * (RHS{n} + RHS{1}) intfc_rad(k, 1) = intfc_rad(k, 1) + dt*(intfc_draddt(k, 1) + intfc_draddt(k, 2))/2._wp @@ -1077,7 +1077,7 @@ contains if (lag_params%write_bubbles_stats) call s_calculate_lag_bubble_stats() if (lag_params%write_bubbles) then - $:GPU_UPDATE(host=["gas_p","gas_mv","intfc_rad","intfc_vel"]) + $:GPU_UPDATE(host='[gas_p,gas_mv,intfc_rad,intfc_vel]') call s_write_lag_particles(mytime) end if @@ -1085,7 +1085,7 @@ contains elseif (time_stepper == 3) then ! 3rd order TVD RK if (stage == 1) then - $:GPU_PARALLEL_LOOP(private=["k"]) + $:GPU_PARALLEL_LOOP(private='[k]') do k = 1, nBubs !u{1} = u{n} + dt * RHS{n} intfc_rad(k, 2) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) @@ -1097,7 +1097,7 @@ contains end do elseif (stage == 2) then - $:GPU_PARALLEL_LOOP(private=["k"]) + $:GPU_PARALLEL_LOOP(private='[k]') do k = 1, nBubs !u{2} = u{n} + (1/4) * dt * [RHS{n} + RHS{1}] intfc_rad(k, 2) = intfc_rad(k, 1) + dt*(intfc_draddt(k, 1) + intfc_draddt(k, 2))/4._wp @@ -1108,7 +1108,7 @@ contains gas_mv(k, 2) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/4._wp end do elseif (stage == 3) then - $:GPU_PARALLEL_LOOP(private=["k"]) + $:GPU_PARALLEL_LOOP(private='[k]') do k = 1, nBubs !u{n+1} = u{n} + (2/3) * dt * [(1/4)* RHS{n} + (1/4)* RHS{1} + RHS{2}] intfc_rad(k, 1) = intfc_rad(k, 1) + (2._wp/3._wp)*dt*(intfc_draddt(k, 1)/4._wp + intfc_draddt(k, 2)/4._wp + intfc_draddt(k, 3)) @@ -1124,7 +1124,7 @@ contains if (lag_params%write_bubbles_stats) call s_calculate_lag_bubble_stats() if (lag_params%write_bubbles) then - $:GPU_UPDATE(host=["gas_p","gas_mv","intfc_rad","intfc_vel"]) + $:GPU_UPDATE(host='[gas_p,gas_mv,intfc_rad,intfc_vel]') call s_write_lag_particles(mytime) end if @@ -1194,7 +1194,7 @@ contains integer :: k - $:GPU_PARALLEL_LOOP(private=["k"]) + $:GPU_PARALLEL_LOOP(private='[k]') do k = 1, nBubs gas_p(k, 2) = gas_p(k, 1) gas_mv(k, 2) = gas_mv(k, 1) @@ -1416,9 +1416,9 @@ contains lag_void_max = 0._wp lag_void_avg = 0._wp lag_vol = 0._wp - $:GPU_PARALLEL_LOOP(collapse=3, reduction=[["lag_vol", "lag_void_avg"], & - ["lag_void_max"]], reductionOp=["+", "MAX"], & - copy=["lag_vol", "lag_void_avg", "lag_void_max"]) + $:GPU_PARALLEL_LOOP(collapse=3, reduction='[[lag_vol, lag_void_avg], & + & [lag_void_max]]', reductionOp='[+, MAX]', & + & copy='[lag_vol, lag_void_avg, lag_void_max]') do k = 0, p do j = 0, n do i = 0, m @@ -1602,8 +1602,8 @@ contains integer :: k - $:GPU_PARALLEL_LOOP(reduction=[["Rmax_glb"], ["Rmin_glb"]], & - & reductionOp=["MAX", "MIN"], copy=["Rmax_glb","Rmin_glb"]) + $:GPU_PARALLEL_LOOP(reduction='[[Rmax_glb], [Rmin_glb]]', & + & reductionOp='[MAX, MIN]', copy='[Rmax_glb,Rmin_glb]') do k = 1, nBubs Rmax_glb = max(Rmax_glb, intfc_rad(k, 1)/bub_R0(k)) Rmin_glb = min(Rmin_glb, intfc_rad(k, 1)/bub_R0(k)) @@ -1622,7 +1622,7 @@ contains write (file_loc, '(A,I0,A)') 'stats_lag_bubbles_', proc_rank, '.dat' file_loc = trim(case_dir)//'/D/'//trim(file_loc) - $:GPU_UPDATE(host=["Rmax_glb","Rmin_glb"]) + $:GPU_UPDATE(host='[Rmax_glb,Rmin_glb]') open (13, FILE=trim(file_loc), FORM='formatted', position='rewind') write (13, *) 'proc_rank, particleID, x, y, z, Rmax_glb, Rmin_glb' @@ -1675,7 +1675,7 @@ contains end do nBubs = nBubs - 1 - $:GPU_UPDATE(device=["nBubs"]) + $:GPU_UPDATE(device='[nBubs]') end subroutine s_remove_lag_bubble diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp index fe644244cc..daac6b8238 100644 --- a/src/simulation/m_bubbles_EL_kernels.fpp +++ b/src/simulation/m_bubbles_EL_kernels.fpp @@ -55,7 +55,7 @@ contains real(wp), dimension(3) :: s_coord integer :: l - $:GPU_PARALLEL_LOOP(private=["l","s_coord","cell"]) + $:GPU_PARALLEL_LOOP(private='[l,s_coord,cell]') do l = 1, nBubs volpart = 4._wp/3._wp*pi*lbk_rad(l, 2)**3._wp @@ -120,7 +120,7 @@ contains smearGridz = smearGrid if (p == 0) smearGridz = 1 - $:GPU_PARALLEL_LOOP(private=["nodecoord","l","s_coord","cell","center"], copyin=["smearGrid","smearGridz"]) + $:GPU_PARALLEL_LOOP(private='[nodecoord,l,s_coord,cell,center]', copyin='[smearGrid,smearGridz]') do l = 1, nBubs nodecoord(1:3) = 0 center(1:3) = 0._wp diff --git a/src/simulation/m_cbc.fpp b/src/simulation/m_cbc.fpp index 07b859e085..83ce6a864c 100644 --- a/src/simulation/m_cbc.fpp +++ b/src/simulation/m_cbc.fpp @@ -48,7 +48,7 @@ module m_cbc real(wp), allocatable, dimension(:, :, :, :) :: q_prim_rsx_vf real(wp), allocatable, dimension(:, :, :, :) :: q_prim_rsy_vf real(wp), allocatable, dimension(:, :, :, :) :: q_prim_rsz_vf - $:GPU_DECLARE(create=["q_prim_rsx_vf","q_prim_rsy_vf","q_prim_rsz_vf"]) + $:GPU_DECLARE(create='[q_prim_rsx_vf,q_prim_rsy_vf,q_prim_rsz_vf]') type(scalar_field), allocatable, dimension(:) :: F_rs_vf, F_src_rs_vf !< @@ -58,7 +58,7 @@ module m_cbc real(wp), allocatable, dimension(:, :, :, :) :: F_rsx_vf, F_src_rsx_vf !< real(wp), allocatable, dimension(:, :, :, :) :: F_rsy_vf, F_src_rsy_vf !< real(wp), allocatable, dimension(:, :, :, :) :: F_rsz_vf, F_src_rsz_vf !< - $:GPU_DECLARE(create=["F_rsx_vf","F_src_rsx_vf","F_rsy_vf","F_src_rsy_vf","F_rsz_vf","F_src_rsz_vf"]) + $:GPU_DECLARE(create='[F_rsx_vf,F_src_rsx_vf,F_rsy_vf,F_src_rsy_vf,F_rsz_vf,F_src_rsz_vf]') !! There is a CCE bug that is causing some subset of these variables to interfere !! with variables of the same name in m_riemann_solvers.fpp, and giving this versions @@ -69,14 +69,14 @@ module m_cbc real(wp), allocatable, dimension(:, :, :, :) :: flux_rsx_vf_l, flux_src_rsx_vf_l !< real(wp), allocatable, dimension(:, :, :, :) :: flux_rsy_vf_l, flux_src_rsy_vf_l real(wp), allocatable, dimension(:, :, :, :) :: flux_rsz_vf_l, flux_src_rsz_vf_l - $:GPU_DECLARE(create=["flux_rsx_vf_l","flux_src_rsx_vf_l","flux_rsy_vf_l","flux_src_rsy_vf_l","flux_rsz_vf_l","flux_src_rsz_vf_l"]) + $:GPU_DECLARE(create='[flux_rsx_vf_l,flux_src_rsx_vf_l,flux_rsy_vf_l,flux_src_rsy_vf_l,flux_rsz_vf_l,flux_src_rsz_vf_l]') real(wp) :: c !< Cell averaged speed of sound real(wp), dimension(2) :: Re !< Cell averaged Reynolds numbers - $:GPU_DECLARE(create=["c","Re"]) + $:GPU_DECLARE(create='[c,Re]') real(wp) :: dpres_ds !< Spatial derivatives in s-dir of pressure - $:GPU_DECLARE(create=["dpres_ds"]) + $:GPU_DECLARE(create='[dpres_ds]') real(wp), allocatable, dimension(:) :: ds !< Cell-width distribution in the s-direction @@ -96,21 +96,21 @@ module m_cbc real(wp), allocatable, dimension(:, :, :) :: pi_coef_y !< Polynomial interpolant coefficients in y-dir real(wp), allocatable, dimension(:, :, :) :: pi_coef_z !< Polynomial interpolant coefficients in z-dir - $:GPU_DECLARE(create=["ds","fd_coef_x","fd_coef_y","fd_coef_z","pi_coef_x","pi_coef_y","pi_coef_z"]) + $:GPU_DECLARE(create='[ds,fd_coef_x,fd_coef_y,fd_coef_z,pi_coef_x,pi_coef_y,pi_coef_z]') !! The first dimension of the array identifies the polynomial, the !! second dimension identifies the position of its coefficients and the last !! dimension denotes the location of the CBC. type(int_bounds_info) :: is1, is2, is3 !< Indical bounds in the s1-, s2- and s3-directions - $:GPU_DECLARE(create=["is1","is2","is3"]) + $:GPU_DECLARE(create='[is1,is2,is3]') integer :: dj integer :: bcxb, bcxe, bcyb, bcye, bczb, bcze integer :: cbc_dir, cbc_loc integer :: flux_cbc_index - $:GPU_DECLARE(create=["dj","bcxb","bcxe","bcyb","bcye","bczb","bcze"]) - $:GPU_DECLARE(create=["cbc_dir", "cbc_loc","flux_cbc_index"]) + $:GPU_DECLARE(create='[dj,bcxb,bcxe,bcyb,bcye,bczb,bcze]') + $:GPU_DECLARE(create='[cbc_dir, cbc_loc,flux_cbc_index]') !! GRCBC inputs for subsonic inflow and outflow conditions consisting of !! inflow velocities, pressure, density and void fraction as well as @@ -119,9 +119,9 @@ module m_cbc real(wp), allocatable, dimension(:) :: pres_in, pres_out, Del_in, Del_out real(wp), allocatable, dimension(:, :) :: vel_in, vel_out real(wp), allocatable, dimension(:, :) :: alpha_rho_in, alpha_in - $:GPU_DECLARE(create=["pres_in","pres_out","Del_in","Del_out"]) - $:GPU_DECLARE(create=["vel_in","vel_out"]) - $:GPU_DECLARE(create=["alpha_rho_in","alpha_in"]) + $:GPU_DECLARE(create='[pres_in,pres_out,Del_in,Del_out]') + $:GPU_DECLARE(create='[vel_in,vel_out]') + $:GPU_DECLARE(create='[alpha_rho_in,alpha_in]') contains @@ -138,7 +138,7 @@ contains else flux_cbc_index = adv_idx%end end if - $:GPU_UPDATE(device=["flux_cbc_index"]) + $:GPU_UPDATE(device='[flux_cbc_index]') call s_any_cbc_boundaries(is_cbc) @@ -388,8 +388,8 @@ contains end if - $:GPU_UPDATE(device=["fd_coef_x","fd_coef_y","fd_coef_z", & - & "pi_coef_x","pi_coef_y","pi_coef_z"]) + $:GPU_UPDATE(device='[fd_coef_x,fd_coef_y,fd_coef_z, & + & pi_coef_x,pi_coef_y,pi_coef_z]') ! Associating the procedural pointer to the appropriate subroutine ! that will be utilized in the conversion to the mixture variables @@ -397,20 +397,20 @@ contains bcxb = bc_x%beg bcxe = bc_x%end - $:GPU_UPDATE(device=["bcxb", "bcxe"]) + $:GPU_UPDATE(device='[bcxb, bcxe]') if (n > 0) then bcyb = bc_y%beg bcye = bc_y%end - $:GPU_UPDATE(device=["bcyb", "bcye"]) + $:GPU_UPDATE(device='[bcyb, bcye]') end if if (p > 0) then bczb = bc_z%beg bcze = bc_z%end - $:GPU_UPDATE(device=["bczb", "bcze"]) + $:GPU_UPDATE(device='[bczb, bcze]') end if ! Allocate GRCBC inputs @@ -442,8 +442,8 @@ contains end do end if #:endfor - $:GPU_UPDATE(device=["vel_in","vel_out","pres_in","pres_out", & - & "Del_in","Del_out","alpha_rho_in","alpha_in"]) + $:GPU_UPDATE(device='[vel_in,vel_out,pres_in,pres_out, & + & Del_in,Del_out,alpha_rho_in,alpha_in]') end subroutine s_initialize_cbc_module @@ -607,7 +607,7 @@ contains end if - $:GPU_UPDATE(device=["ds"]) + $:GPU_UPDATE(device='[ds]') end subroutine s_associate_cbc_coefficients_pointers @@ -683,7 +683,7 @@ contains cbc_dir = cbc_dir_norm cbc_loc = cbc_loc_norm - $:GPU_UPDATE(device=["cbc_dir", "cbc_loc"]) + $:GPU_UPDATE(device='[cbc_dir, cbc_loc]') call s_initialize_cbc(q_prim_vf, flux_vf, flux_src_vf, & ix, iy, iz) @@ -775,10 +775,10 @@ contains end if ! FD2 or FD4 of RHS at j = 0 - $:GPU_PARALLEL_LOOP(collapse=2, private=["alpha_rho", "vel", "adv", & - "mf", "dvel_ds", "dadv_ds", "Re_cbc", "dalpha_rho_ds","dvel_dt", & - "dadv_dt", "dalpha_rho_dt", "L", "lambda", "Ys", "dYs_dt", & - "dYs_ds", "h_k", "Cp_i", "Gamma_i", "Xs"]) + $:GPU_PARALLEL_LOOP(collapse=2, private='[alpha_rho, vel, adv, & + & mf, dvel_ds, dadv_ds, Re_cbc, dalpha_rho_ds,dvel_dt, & + & dadv_dt, dalpha_rho_dt, L, lambda, Ys, dYs_dt, & + & dYs_ds, h_k, Cp_i, Gamma_i, Xs]') do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1163,8 +1163,8 @@ contains end if dj = max(0, cbc_loc) - $:GPU_UPDATE(device=["is1","is2","is3","dj"]) - $:GPU_UPDATE(device=["dir_idx","dir_flg"]) + $:GPU_UPDATE(device='[is1,is2,is3,dj]') + $:GPU_UPDATE(device='[dir_idx,dir_flg]') ! Reshaping Inputted Data in x-direction if (cbc_dir == 1) then @@ -1414,7 +1414,7 @@ contains ! Determining the indicial shift based on CBC location dj = max(0, cbc_loc) - $:GPU_UPDATE(device=["dj"]) + $:GPU_UPDATE(device='[dj]') ! Reshaping Outputted Data in x-direction if (cbc_dir == 1) then diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp index 2cec4cbe38..5a4c8e0aa1 100644 --- a/src/simulation/m_data_output.fpp +++ b/src/simulation/m_data_output.fpp @@ -54,14 +54,14 @@ module m_data_output real(wp), allocatable, dimension(:, :, :) :: ccfl_sf !< CCFL stability criterion real(wp), allocatable, dimension(:, :, :) :: Rc_sf !< Rc stability criterion real(wp), public, allocatable, dimension(:, :) :: c_mass - $:GPU_DECLARE(create=["icfl_sf","vcfl_sf","ccfl_sf","Rc_sf"]) + $:GPU_DECLARE(create='[icfl_sf,vcfl_sf,ccfl_sf,Rc_sf]') real(wp) :: icfl_max_loc, icfl_max_glb !< ICFL stability extrema on local and global grids real(wp) :: vcfl_max_loc, vcfl_max_glb !< VCFL stability extrema on local and global grids real(wp) :: ccfl_max_loc, ccfl_max_glb !< CCFL stability extrema on local and global grids real(wp) :: Rc_min_loc, Rc_min_glb !< Rc stability extrema on local and global grids - $:GPU_DECLARE(create=["icfl_max_loc","icfl_max_glb","vcfl_max_loc","vcfl_max_glb"]) - $:GPU_DECLARE(create=["ccfl_max_loc","ccfl_max_glb","Rc_min_loc","Rc_min_glb"]) + $:GPU_DECLARE(create='[icfl_max_loc,icfl_max_glb,vcfl_max_loc,vcfl_max_glb]') + $:GPU_DECLARE(create='[ccfl_max_loc,ccfl_max_glb,Rc_min_loc,Rc_min_glb]') !> @name ICFL, VCFL, CCFL and Rc stability criteria extrema over all the time-steps !> @{ @@ -278,7 +278,7 @@ contains integer :: j, k, l ! Computing Stability Criteria at Current Time-step - $:GPU_PARALLEL_LOOP(collapse=3, private=["vel", "alpha", "Re"]) + $:GPU_PARALLEL_LOOP(collapse=3, private='[vel, alpha, Re]') do l = 0, p do k = 0, n do j = 0, m @@ -302,10 +302,10 @@ contains ! Determining local stability criteria extrema at current time-step #ifdef _CRAYFTN - $:GPU_UPDATE(host=["icfl_sf"]) + $:GPU_UPDATE(host='[icfl_sf]') if (viscous) then - $:GPU_UPDATE(host=["vcfl_sf","Rc_sf"]) + $:GPU_UPDATE(host='[vcfl_sf,Rc_sf]') end if icfl_max_loc = maxval(icfl_sf) @@ -526,7 +526,7 @@ contains if (prim_vars_wrt .or. (n == 0 .and. p == 0)) then call s_convert_conservative_to_primitive_variables(q_cons_vf, q_T_sf, q_prim_vf, idwint) do i = 1, sys_size - $:GPU_UPDATE(host=["q_prim_vf(i)%sf(:,:,:)"]) + $:GPU_UPDATE(host='[q_prim_vf(i)%sf(:,:,:)]') end do ! q_prim_vf(bubxb) stores the value of nb needed in riemann solvers, so replace with true primitive value (=1._wp) if (qbmm) then diff --git a/src/simulation/m_fftw.fpp b/src/simulation/m_fftw.fpp index 4646f574a1..b30bdbcf93 100644 --- a/src/simulation/m_fftw.fpp +++ b/src/simulation/m_fftw.fpp @@ -46,12 +46,12 @@ module m_fftw !! Filtered complex data in Fourier space #if defined(MFC_OpenACC) - $:GPU_DECLARE(create=["real_size","cmplx_size","x_size","batch_size","Nfq"]) + $:GPU_DECLARE(create='[real_size,cmplx_size,x_size,batch_size,Nfq]') real(dp), allocatable, target :: data_real_gpu(:) complex(dp), allocatable, target :: data_cmplx_gpu(:) complex(dp), allocatable, target :: data_fltr_cmplx_gpu(:) - $:GPU_DECLARE(create=["data_real_gpu","data_cmplx_gpu","data_fltr_cmplx_gpu"]) + $:GPU_DECLARE(create='[data_real_gpu,data_cmplx_gpu,data_fltr_cmplx_gpu]') #if defined(__PGI) integer :: fwd_plan_gpu, bwd_plan_gpu @@ -89,8 +89,8 @@ contains gpu_fft_size(1) = real_size; iembed(1) = 0 oembed(1) = 0 - $:GPU_ENTER_DATA(copyin=["real_size","cmplx_size","x_size","sys_size","batch_size","Nfq"]) - $:GPU_UPDATE(device=["real_size","cmplx_size","x_size","sys_size","batch_size"]) + $:GPU_ENTER_DATA(copyin='[real_size,cmplx_size,x_size,sys_size,batch_size,Nfq]') + $:GPU_UPDATE(device='[real_size,cmplx_size,x_size,sys_size,batch_size]') #else ! Allocate input and output DFT data sizes fftw_real_data = fftw_alloc_real(int(real_size, c_size_t)) @@ -171,7 +171,7 @@ contains #endif !$acc end host_data Nfq = 3 - $:GPU_UPDATE(device=["Nfq"]) + $:GPU_UPDATE(device='[Nfq]') $:GPU_PARALLEL_LOOP(collapse=3) do k = 1, sys_size @@ -212,7 +212,7 @@ contains end do end do - $:GPU_PARALLEL_LOOP(collapse=3, firstprivate=["i"]) + $:GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]') do k = 1, sys_size do j = 0, m do l = 0, p @@ -231,7 +231,7 @@ contains !$acc end host_data Nfq = min(floor(2_dp*real(i, dp)*pi), cmplx_size) - $:GPU_UPDATE(device=["Nfq"]) + $:GPU_UPDATE(device='[Nfq]') $:GPU_PARALLEL_LOOP(collapse=3) do k = 1, sys_size @@ -251,7 +251,7 @@ contains #endif !$acc end host_data - $:GPU_PARALLEL_LOOP(collapse=3, firstprivate=["i"]) + $:GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]') do k = 1, sys_size do j = 0, m do l = 0, p diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index b5a174e0e0..f869a31ac0 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -53,7 +53,7 @@ module m_global_parameters logical :: cyl_coord integer :: grid_geometry !> @} - $:GPU_DECLARE(create=["cyl_coord","grid_geometry"]) + $:GPU_DECLARE(create='[cyl_coord,grid_geometry]') !> @name Cell-boundary (CB) locations in the x-, y- and z-directions, respectively !> @{ @@ -76,7 +76,7 @@ module m_global_parameters real(wp) :: dt !< Size of the time-step - $:GPU_DECLARE(create=["x_cb","y_cb","z_cb","x_cc","y_cc","z_cc","dx","dy","dz","dt","m","n","p"]) + $:GPU_DECLARE(create='[x_cb,y_cb,z_cb,x_cc,y_cc,z_cc,dx,dy,dz,dt,m,n,p]') !> @name Starting time-step iteration, stopping time-step iteration and the number !! of time-step iterations between successive solution backups, respectively @@ -90,7 +90,7 @@ module m_global_parameters real(wp) :: t_stop, t_save, cfl_target integer :: n_start !> @} - $:GPU_DECLARE(create=["cfl_target"]) + $:GPU_DECLARE(create='[cfl_target]') logical :: cfl_adap_dt, cfl_const_dt, cfl_dt @@ -158,7 +158,7 @@ module m_global_parameters logical :: bulk_stress !< Bulk stresses logical :: cont_damage !< Continuum damage modeling - $:GPU_DECLARE(create=["chemistry"]) + $:GPU_DECLARE(create='[chemistry]') logical :: bodyForces logical :: bf_x, bf_y, bf_z !< body force toggle in three directions @@ -169,27 +169,27 @@ module m_global_parameters #:endfor #:endfor real(wp), dimension(3) :: accel_bf - $:GPU_DECLARE(create=["accel_bf"]) + $:GPU_DECLARE(create='[accel_bf]') integer :: cpu_start, cpu_end, cpu_rate #:if not MFC_CASE_OPTIMIZATION - $:GPU_DECLARE(create=["num_dims","num_vels","weno_polyn","weno_order"]) - $:GPU_DECLARE(create=["weno_num_stencils","num_fluids","wenojs"]) - $:GPU_DECLARE(create=["mapped_weno", "wenoz","teno","wenoz_q","mhd","relativity"]) + $:GPU_DECLARE(create='[num_dims,num_vels,weno_polyn,weno_order]') + $:GPU_DECLARE(create='[weno_num_stencils,num_fluids,wenojs]') + $:GPU_DECLARE(create='[mapped_weno, wenoz,teno,wenoz_q,mhd,relativity]') #:endif - $:GPU_DECLARE(create=["mpp_lim","model_eqns","mixture_err","alt_soundspeed"]) - $:GPU_DECLARE(create=["avg_state","mp_weno","weno_eps","teno_CT","hypoelasticity"]) - $:GPU_DECLARE(create=["hyperelasticity","hyper_model","elasticity","low_Mach"]) - $:GPU_DECLARE(create=["viscous","shear_stress","bulk_stress","cont_damage"]) + $:GPU_DECLARE(create='[mpp_lim,model_eqns,mixture_err,alt_soundspeed]') + $:GPU_DECLARE(create='[avg_state,mp_weno,weno_eps,teno_CT,hypoelasticity]') + $:GPU_DECLARE(create='[hyperelasticity,hyper_model,elasticity,low_Mach]') + $:GPU_DECLARE(create='[viscous,shear_stress,bulk_stress,cont_damage]') logical :: relax !< activate phase change integer :: relax_model !< Relaxation model real(wp) :: palpha_eps !< trigger parameter for the p relaxation procedure, phase change model real(wp) :: ptgalpha_eps !< trigger parameter for the pTg relaxation procedure, phase change model - $:GPU_DECLARE(create=["relax", "relax_model", "palpha_eps","ptgalpha_eps"]) + $:GPU_DECLARE(create='[relax, relax_model, palpha_eps,ptgalpha_eps]') integer :: num_bc_patches logical :: bc_io @@ -199,9 +199,9 @@ module m_global_parameters !> @{ type(int_bounds_info) :: bc_x, bc_y, bc_z !> @} - $:GPU_DECLARE(create=["bc_x%vb1", "bc_x%vb2", "bc_x%vb3", "bc_x%ve1", "bc_x%ve2", "bc_x%ve3"]) - $:GPU_DECLARE(create=["bc_y%vb1", "bc_y%vb2", "bc_y%vb3", "bc_y%ve1", "bc_y%ve2", "bc_y%ve3"]) - $:GPU_DECLARE(create=["bc_z%vb1", "bc_z%vb2", "bc_z%vb3", "bc_z%ve1", "bc_z%ve2", "bc_z%ve3"]) + $:GPU_DECLARE(create='[bc_x%vb1, bc_x%vb2, bc_x%vb3, bc_x%ve1, bc_x%ve2, bc_x%ve3]') + $:GPU_DECLARE(create='[bc_y%vb1, bc_y%vb2, bc_y%vb3, bc_y%ve1, bc_y%ve2, bc_y%ve3]') + $:GPU_DECLARE(create='[bc_z%vb1, bc_z%vb2, bc_z%vb3, bc_z%ve1, bc_z%ve2, bc_z%ve3]') type(bounds_info) :: x_domain, y_domain, z_domain real(wp) :: x_a, y_a, z_a @@ -255,20 +255,20 @@ module m_global_parameters integer :: c_idx !< Index of color function integer :: damage_idx !< Index of damage state variable (D) for continuum damage model !> @} - $:GPU_DECLARE(create=["sys_size","E_idx","n_idx","bub_idx","alf_idx","gamma_idx"]) - $:GPU_DECLARE(create=["pi_inf_idx","B_idx","stress_idx","xi_idx","b_size"]) - $:GPU_DECLARE(create=["tensor_size","species_idx","c_idx"]) + $:GPU_DECLARE(create='[sys_size,E_idx,n_idx,bub_idx,alf_idx,gamma_idx]') + $:GPU_DECLARE(create='[pi_inf_idx,B_idx,stress_idx,xi_idx,b_size]') + $:GPU_DECLARE(create='[tensor_size,species_idx,c_idx]') ! Cell Indices for the (local) interior points (O-m, O-n, 0-p). ! Stands for "InDices With INTerior". type(int_bounds_info) :: idwint(1:3) - $:GPU_DECLARE(create=["idwint"]) + $:GPU_DECLARE(create='[idwint]') ! Cell Indices for the entire (local) domain. In simulation and post_process, ! this includes the buffer region. idwbuff and idwint are the same otherwise. ! Stands for "InDices With BUFFer". type(int_bounds_info) :: idwbuff(1:3) - $:GPU_DECLARE(create=["idwbuff"]) + $:GPU_DECLARE(create='[idwbuff]') !> @name The number of fluids, along with their identifying indexes, respectively, !! for which viscous effects, e.g. the shear and/or the volume Reynolds (Re) @@ -278,7 +278,7 @@ module m_global_parameters integer, allocatable, dimension(:, :) :: Re_idx !> @} - $:GPU_DECLARE(create=["Re_size","Re_idx"]) + $:GPU_DECLARE(create='[Re_size,Re_idx]') ! The WENO average (WA) flag regulates whether the calculation of any cell- ! average spatial derivatives is carried out in each cell by utilizing the @@ -289,7 +289,7 @@ module m_global_parameters real(wp) :: wa_flg !> @{ - $:GPU_DECLARE(create=["wa_flg"]) + $:GPU_DECLARE(create='[wa_flg]') !> @name The coordinate direction indexes and flags (flg), respectively, for which !! the configurations will be determined with respect to a working direction @@ -301,14 +301,14 @@ module m_global_parameters integer, dimension(3) :: dir_idx_tau !!used for hypoelasticity=true !> @} - $:GPU_DECLARE(create=["dir_idx","dir_flg","dir_idx_tau"]) + $:GPU_DECLARE(create='[dir_idx,dir_flg,dir_idx_tau]') integer :: buff_size !< !! The number of cells that are necessary to be able to store enough boundary !! conditions data to march the solution in the physical computational domain !! to the next time-step. - $:GPU_DECLARE(create=["buff_size"]) + $:GPU_DECLARE(create='[buff_size]') integer :: shear_num !! Number of shear stress components integer, dimension(3) :: shear_indices !< @@ -319,7 +319,7 @@ module m_global_parameters !! Indices of shear stress components to reflect for boundary conditions. !! Size: (1:3, 1:shear_BC_flip_num) for (x/y/z, [indices]) - $:GPU_DECLARE(create=["shear_num","shear_indices","shear_BC_flip_num","shear_BC_flip_indices"]) + $:GPU_DECLARE(create='[shear_num,shear_indices,shear_BC_flip_num,shear_BC_flip_indices]') ! END: Simulation Algorithm Parameters @@ -339,7 +339,7 @@ module m_global_parameters !! The finite-difference number is given by MAX(1, fd_order/2). Essentially, !! it is a measure of the half-size of the finite-difference stencil for the !! selected order of accuracy. - $:GPU_DECLARE(create=["fd_order","fd_number"]) + $:GPU_DECLARE(create='[fd_order,fd_number]') logical :: probe_wrt logical :: integral_wrt @@ -352,7 +352,7 @@ module m_global_parameters !> @{ real(wp) :: rhoref, pref !> @} - $:GPU_DECLARE(create=["rhoref","pref"]) + $:GPU_DECLARE(create='[rhoref,pref]') !> @name Immersed Boundaries !> @{ @@ -367,7 +367,7 @@ module m_global_parameters !! the maximum allowable number of patches, num_patches_max, may be changed !! in the module m_derived_types.f90. - $:GPU_DECLARE(create=["ib","num_ibs","patch_ib"]) + $:GPU_DECLARE(create='[ib,num_ibs,patch_ib]') !> @} !> @name Bubble modeling @@ -382,31 +382,31 @@ module m_global_parameters real(wp) :: Ca !< Cavitation number real(wp) :: Web !< Weber number real(wp) :: Re_inv !< Inverse Reynolds number - $:GPU_DECLARE(create=["R0ref","Ca","Web","Re_inv"]) + $:GPU_DECLARE(create='[R0ref,Ca,Web,Re_inv]') real(wp), dimension(:), allocatable :: weight !< Simpson quadrature weights real(wp), dimension(:), allocatable :: R0 !< Bubble sizes real(wp), dimension(:), allocatable :: V0 !< Bubble velocities - $:GPU_DECLARE(create=["weight","R0","V0"]) + $:GPU_DECLARE(create='[weight,R0,V0]') logical :: bubbles_euler !< Bubbles euler on/off logical :: polytropic !< Polytropic switch logical :: polydisperse !< Polydisperse bubbles - $:GPU_DECLARE(create=["bubbles_euler","polytropic","polydisperse"]) + $:GPU_DECLARE(create='[bubbles_euler,polytropic,polydisperse]') logical :: adv_n !< Solve the number density equation and compute alpha from number density logical :: adap_dt !< Adaptive step size control real(wp) :: adap_dt_tol !< Tolerance to control adaptive step size - $:GPU_DECLARE(create=["adv_n","adap_dt","adap_dt_tol"]) + $:GPU_DECLARE(create='[adv_n,adap_dt,adap_dt_tol]') integer :: bubble_model !< Gilmore or Keller--Miksis bubble model integer :: thermal !< Thermal behavior. 1 = adiabatic, 2 = isotherm, 3 = transfer - $:GPU_DECLARE(create=["bubble_model","thermal"]) + $:GPU_DECLARE(create='[bubble_model,thermal]') real(wp), allocatable, dimension(:, :, :) :: ptil !< Pressure modification real(wp) :: poly_sigma !< log normal sigma for polydisperse PDF - $:GPU_DECLARE(create=["ptil", "poly_sigma"]) + $:GPU_DECLARE(create='[ptil, poly_sigma]') logical :: qbmm !< Quadrature moment method integer, parameter :: nmom = 6 !< Number of carried moments per R0 location @@ -415,39 +415,39 @@ module m_global_parameters integer :: R0_type real(wp) :: pi_fac !< Factor for artificial pi_inf - $:GPU_DECLARE(create=["qbmm", "nmomsp","nmomtot","R0_type","pi_fac"]) + $:GPU_DECLARE(create='[qbmm, nmomsp,nmomtot,R0_type,pi_fac]') #:if not MFC_CASE_OPTIMIZATION - $:GPU_DECLARE(create=["nb"]) + $:GPU_DECLARE(create='[nb]') #:endif type(scalar_field), allocatable, dimension(:) :: mom_sp type(scalar_field), allocatable, dimension(:, :, :) :: mom_3d - $:GPU_DECLARE(create=["mom_sp","mom_3d"]) + $:GPU_DECLARE(create='[mom_sp,mom_3d]') !> @} type(chemistry_parameters) :: chem_params - $:GPU_DECLARE(create=["chem_params"]) + $:GPU_DECLARE(create='[chem_params]') !> @name Physical bubble parameters (see Ando 2010, Preston 2007) !> @{ real(wp) :: R_n, R_v, phi_vn, phi_nv, Pe_c, Tw, pv, M_n, M_v, k_vl, k_nl, cp_n, cp_v - $:GPU_DECLARE(create=["R_n","R_v","phi_vn","phi_nv","Pe_c","Tw",]) - $:GPU_DECLARE(create=["pv","M_n", "M_v","k_vl","k_nl","cp_n","cp_v"]) + $:GPU_DECLARE(create='[R_n,R_v,phi_vn,phi_nv,Pe_c,Tw]') + $:GPU_DECLARE(create='[pv,M_n, M_v,k_vl,k_nl,cp_n,cp_v]') real(wp), dimension(:), allocatable :: k_n, k_v, pb0, mass_n0, mass_v0, Pe_T real(wp), dimension(:), allocatable :: Re_trans_T, Re_trans_c, Im_trans_T, Im_trans_c, omegaN - $:GPU_DECLARE(create=["k_n","k_v","pb0","mass_n0","mass_v0","Pe_T"]) - $:GPU_DECLARE(create=["Re_trans_T","Re_trans_c","Im_trans_T","Im_trans_c","omegaN"]) + $:GPU_DECLARE(create='[k_n,k_v,pb0,mass_n0,mass_v0,Pe_T]') + $:GPU_DECLARE(create='[Re_trans_T,Re_trans_c,Im_trans_T,Im_trans_c,omegaN]') real(wp) :: mul0, ss, gamma_v, mu_v real(wp) :: gamma_m, gamma_n, mu_n real(wp) :: gam !> @} - $:GPU_DECLARE(create=["mul0","ss","gamma_v","mu_v","gamma_m","gamma_n","mu_n","gam"]) + $:GPU_DECLARE(create='[mul0,ss,gamma_v,mu_v,gamma_m,gamma_n,mu_n,gam]') !> @name Acoustic acoustic_source parameters !> @{ @@ -455,14 +455,14 @@ module m_global_parameters type(acoustic_parameters), dimension(num_probes_max) :: acoustic !< Acoustic source parameters integer :: num_source !< Number of acoustic sources !> @} - $:GPU_DECLARE(create=["acoustic_source","acoustic","num_source"]) + $:GPU_DECLARE(create='[acoustic_source,acoustic,num_source]') !> @name Surface tension parameters !> @{ real(wp) :: sigma logical :: surface_tension - $:GPU_DECLARE(create=["sigma","surface_tension"]) + $:GPU_DECLARE(create='[sigma,surface_tension]') !> @} integer :: momxb, momxe @@ -473,13 +473,13 @@ module m_global_parameters integer :: strxb, strxe integer :: chemxb, chemxe integer :: xibeg, xiend - $:GPU_DECLARE(create=["momxb","momxe","advxb","advxe","contxb","contxe"]) - $:GPU_DECLARE(create=["intxb","intxe", "bubxb","bubxe"]) - $:GPU_DECLARE(create=["strxb","strxe","chemxb","chemxe"]) - $:GPU_DECLARE(create=["xibeg","xiend"]) + $:GPU_DECLARE(create='[momxb,momxe,advxb,advxe,contxb,contxe]') + $:GPU_DECLARE(create='[intxb,intxe, bubxb,bubxe]') + $:GPU_DECLARE(create='[strxb,strxe,chemxb,chemxe]') + $:GPU_DECLARE(create='[xibeg,xiend]') real(wp), allocatable, dimension(:) :: gammas, gs_min, pi_infs, ps_inf, cvs, qvs, qvps - $:GPU_DECLARE(create=["gammas","gs_min","pi_infs","ps_inf","cvs","qvs","qvps"]) + $:GPU_DECLARE(create='[gammas,gs_min,pi_infs,ps_inf,cvs,qvs,qvps]') real(wp) :: mytime !< Current simulation time real(wp) :: finaltime !< Final simulation time @@ -490,25 +490,25 @@ module m_global_parameters type(pres_field), allocatable, dimension(:) :: mv_ts - $:GPU_DECLARE(create=["pb_ts","mv_ts"]) + $:GPU_DECLARE(create='[pb_ts,mv_ts]') !> @name lagrangian subgrid bubble parameters !> @{! logical :: bubbles_lagrange !< Lagrangian subgrid bubble model switch type(bubbles_lagrange_parameters) :: lag_params !< Lagrange bubbles' parameters - $:GPU_DECLARE(create=["bubbles_lagrange","lag_params"]) + $:GPU_DECLARE(create='[bubbles_lagrange,lag_params]') !> @} real(wp) :: Bx0 !< Constant magnetic field in the x-direction (1D) logical :: powell !< Powell‐correction for div B = 0 - $:GPU_DECLARE(create=["Bx0","powell"]) + $:GPU_DECLARE(create='[Bx0,powell]') !> @name Continuum damage model parameters !> @{! real(wp) :: tau_star !< Stress threshold for continuum damage modeling real(wp) :: cont_damage_s !< Exponent s for continuum damage modeling real(wp) :: alpha_bar !< Damage rate factor for continuum damage modeling - $:GPU_DECLARE(create=["tau_star","cont_damage_s","alpha_bar"]) + $:GPU_DECLARE(create='[tau_star,cont_damage_s,alpha_bar]') !> @} contains @@ -801,10 +801,10 @@ contains else weno_num_stencils = weno_polyn end if - $:GPU_UPDATE(device=["weno_polyn"]) - $:GPU_UPDATE(device=["weno_num_stencils"]) - $:GPU_UPDATE(device=["nb"]) - $:GPU_UPDATE(device=["num_dims","num_vels","num_fluids"]) + $:GPU_UPDATE(device='[weno_polyn]') + $:GPU_UPDATE(device='[weno_num_stencils]') + $:GPU_UPDATE(device='[nb]') + $:GPU_UPDATE(device='[num_dims,num_vels,num_fluids]') #:endif ! Initializing the number of fluids for which viscous effects will @@ -1040,7 +1040,7 @@ contains if (Re_size(1) > 0._wp) shear_stress = .true. if (Re_size(2) > 0._wp) bulk_stress = .true. - $:GPU_UPDATE(device=["Re_size","viscous","shear_stress","bulk_stress"]) + $:GPU_UPDATE(device='[Re_size,viscous,shear_stress,bulk_stress]') ! Bookkeeping the indexes of any viscous fluids and any pairs of ! fluids whose interface will support effects of surface tension @@ -1096,7 +1096,7 @@ contains ! y-dir: flip tau_xy and tau_yz ! z-dir: flip tau_xz and tau_yz end if - $:GPU_UPDATE(device=["shear_num","shear_indices","shear_BC_flip_num","shear_BC_flip_indices"]) + $:GPU_UPDATE(device='[shear_num,shear_indices,shear_BC_flip_num,shear_BC_flip_indices]') end if if (hyperelasticity) then @@ -1163,7 +1163,7 @@ contains ! cell-boundary values or otherwise, the unaltered left and right, ! WENO-reconstructed, cell-boundary values wa_flg = 0._wp; if (weno_avg) wa_flg = 1._wp - $:GPU_UPDATE(device=["wa_flg"]) + $:GPU_UPDATE(device='[wa_flg]') ! Resort to default WENO-JS if no other WENO scheme is selected #:if not MFC_CASE_OPTIMIZATION @@ -1173,7 +1173,7 @@ contains if (ib) allocate (MPI_IO_IB_DATA%var%sf(0:m, 0:n, 0:p)) Np = 0 - $:GPU_UPDATE(device=["Re_size"]) + $:GPU_UPDATE(device='[Re_size]') if (elasticity) then fd_number = max(1, fd_order/2) @@ -1191,7 +1191,7 @@ contains idwint, idwbuff, viscous, & bubbles_lagrange, m, n, p, & num_dims) - $:GPU_UPDATE(device=["idwint", "idwbuff"]) + $:GPU_UPDATE(device='[idwint, idwbuff]') ! Configuring Coordinate Direction Indexes if (bubbles_euler) then @@ -1201,7 +1201,7 @@ contains & idwbuff(3)%beg:idwbuff(3)%end)) end if - $:GPU_UPDATE(device=["fd_order", "fd_number"]) + $:GPU_UPDATE(device='[fd_order, fd_number]') if (cyl_coord .neqv. .true.) then ! Cartesian grid grid_geometry = 1 @@ -1228,44 +1228,44 @@ contains chemxb = species_idx%beg chemxe = species_idx%end - $:GPU_UPDATE(device=["momxb","momxe","advxb","advxe","contxb","contxe", & - & "bubxb","bubxe","intxb","intxe","sys_size","buff_size","E_idx", & - & "alf_idx","n_idx","adv_n","adap_dt","pi_fac","strxb","strxe", & - & "chemxb","chemxe","c_idx"]) - $:GPU_UPDATE(device=["b_size","xibeg","xiend","tensor_size"]) + $:GPU_UPDATE(device='[momxb,momxe,advxb,advxe,contxb,contxe, & + & bubxb,bubxe,intxb,intxe,sys_size,buff_size,E_idx, & + & alf_idx,n_idx,adv_n,adap_dt,pi_fac,strxb,strxe, & + & chemxb,chemxe,c_idx]') + $:GPU_UPDATE(device='[b_size,xibeg,xiend,tensor_size]') - $:GPU_UPDATE(device=["species_idx"]) - $:GPU_UPDATE(device=["cfl_target","m","n","p"]) + $:GPU_UPDATE(device='[species_idx]') + $:GPU_UPDATE(device='[cfl_target,m,n,p]') - $:GPU_UPDATE(device=["alt_soundspeed","acoustic_source","num_source"]) - $:GPU_UPDATE(device=["dt","sys_size","buff_size","pref","rhoref", & - & "gamma_idx","pi_inf_idx","E_idx","alf_idx","stress_idx", & - & "mpp_lim","bubbles_euler","hypoelasticity","alt_soundspeed", & - & "avg_state","num_fluids","model_eqns","num_dims","num_vels", & - & "mixture_err","grid_geometry","cyl_coord","mp_weno","weno_eps", & - & "teno_CT","hyperelasticity","hyper_model","elasticity","xi_idx", & - & "B_idx","low_Mach"]) + $:GPU_UPDATE(device='[alt_soundspeed,acoustic_source,num_source]') + $:GPU_UPDATE(device='[dt,sys_size,buff_size,pref,rhoref, & + & gamma_idx,pi_inf_idx,E_idx,alf_idx,stress_idx, & + & mpp_lim,bubbles_euler,hypoelasticity,alt_soundspeed, & + & avg_state,num_fluids,model_eqns,num_dims,num_vels, & + & mixture_err,grid_geometry,cyl_coord,mp_weno,weno_eps, & + & teno_CT,hyperelasticity,hyper_model,elasticity,xi_idx, & + & B_idx,low_Mach]') - $:GPU_UPDATE(device=["Bx0", "powell"]) + $:GPU_UPDATE(device='[Bx0, powell]') - $:GPU_UPDATE(device=["cont_damage","tau_star","cont_damage_s","alpha_bar"]) + $:GPU_UPDATE(device='[cont_damage,tau_star,cont_damage_s,alpha_bar]') #:if not MFC_CASE_OPTIMIZATION - $:GPU_UPDATE(device=["wenojs","mapped_weno","wenoz","teno"]) - $:GPU_UPDATE(device=["wenoz_q"]) - $:GPU_UPDATE(device=["mhd", "relativity"]) + $:GPU_UPDATE(device='[wenojs,mapped_weno,wenoz,teno]') + $:GPU_UPDATE(device='[wenoz_q]') + $:GPU_UPDATE(device='[mhd, relativity]') #:endif - $:GPU_ENTER_DATA(copyin=["nb","R0ref","Ca","Web","Re_inv","weight","R0", & - & "V0","bubbles_euler","polytropic","polydisperse","qbmm","R0_type", & - & "ptil","bubble_model","thermal","poly_sigma"]) - $:GPU_ENTER_DATA(copyin=["R_n","R_v","phi_vn","phi_nv","Pe_c","Tw","pv", & - & "M_n","M_v","k_n","k_v","pb0","mass_n0","mass_v0","Pe_T", & - & "Re_trans_T","Re_trans_c","Im_trans_T","Im_trans_c","omegaN", & - & "mul0","ss","gamma_v","mu_v","gamma_m","gamma_n","mu_n","gam"]) - $:GPU_ENTER_DATA(copyin=["dir_idx","dir_flg","dir_idx_tau"]) + $:GPU_ENTER_DATA(copyin='[nb,R0ref,Ca,Web,Re_inv,weight,R0, & + & V0,bubbles_euler,polytropic,polydisperse,qbmm,R0_type, & + & ptil,bubble_model,thermal,poly_sigma]') + $:GPU_ENTER_DATA(copyin='[R_n,R_v,phi_vn,phi_nv,Pe_c,Tw,pv, & + & M_n,M_v,k_n,k_v,pb0,mass_n0,mass_v0,Pe_T, & + & Re_trans_T,Re_trans_c,Im_trans_T,Im_trans_c,omegaN, & + & mul0,ss,gamma_v,mu_v,gamma_m,gamma_n,mu_n,gam]') + $:GPU_ENTER_DATA(copyin='[dir_idx,dir_flg,dir_idx_tau]') - $:GPU_ENTER_DATA(copyin=["relax","relax_model","palpha_eps","ptgalpha_eps"]) + $:GPU_ENTER_DATA(copyin='[relax,relax_model,palpha_eps,ptgalpha_eps]') ! Allocating grid variables for the x-, y- and z-directions @:ALLOCATE(x_cb(-1 - buff_size:m + buff_size)) diff --git a/src/simulation/m_hyperelastic.fpp b/src/simulation/m_hyperelastic.fpp index ad888c813d..6c11405c86 100644 --- a/src/simulation/m_hyperelastic.fpp +++ b/src/simulation/m_hyperelastic.fpp @@ -26,14 +26,14 @@ module m_hyperelastic !! The btensor at the cell-interior Gaussian quadrature points. !! These tensor is needed to be calculated once and make the code DRY. type(vector_field) :: btensor !< - $:GPU_DECLARE(create=["btensor"]) + $:GPU_DECLARE(create='[btensor]') real(wp), allocatable, dimension(:, :) :: fd_coeff_x real(wp), allocatable, dimension(:, :) :: fd_coeff_y real(wp), allocatable, dimension(:, :) :: fd_coeff_z - $:GPU_DECLARE(create=["fd_coeff_x","fd_coeff_y", "fd_coeff_z"]) + $:GPU_DECLARE(create='[fd_coeff_x,fd_coeff_y, fd_coeff_z]') real(wp), allocatable, dimension(:) :: Gs - $:GPU_DECLARE(create=["Gs"]) + $:GPU_DECLARE(create='[Gs]') contains @@ -59,7 +59,7 @@ contains do i = 1, num_fluids Gs(i) = fluid_pp(i)%G end do - $:GPU_UPDATE(device=["Gs"]) + $:GPU_UPDATE(device='[Gs]') @:ALLOCATE(fd_coeff_x(-fd_number:fd_number, 0:m)) if (n > 0) then @@ -72,16 +72,16 @@ contains ! Computing centered finite difference coefficients call s_compute_finite_difference_coefficients(m, x_cc, fd_coeff_x, buff_size, & fd_number, fd_order) - $:GPU_UPDATE(device=["fd_coeff_x"]) + $:GPU_UPDATE(device='[fd_coeff_x]') if (n > 0) then call s_compute_finite_difference_coefficients(n, y_cc, fd_coeff_y, buff_size, & fd_number, fd_order) - $:GPU_UPDATE(device=["fd_coeff_y"]) + $:GPU_UPDATE(device='[fd_coeff_y]') end if if (p > 0) then call s_compute_finite_difference_coefficients(p, z_cc, fd_coeff_z, buff_size, & fd_number, fd_order) - $:GPU_UPDATE(device=["fd_coeff_z"]) + $:GPU_UPDATE(device='[fd_coeff_z]') end if end subroutine s_initialize_hyperelastic_module @@ -106,8 +106,8 @@ contains real(wp) :: G integer :: j, k, l, i, r - $:GPU_PARALLEL_LOOP(collapse=3, private=["alpha_K", "alpha_rho_K", "rho", & - "gamma", "pi_inf", "qv", "G", "Re", "tensora", "tensorb"]) + $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_K, alpha_rho_K, rho, & + & gamma, pi_inf, qv, G, Re, tensora, tensorb]') do l = 0, p do k = 0, n do j = 0, m diff --git a/src/simulation/m_hypoelastic.fpp b/src/simulation/m_hypoelastic.fpp index b2b5d7a859..27c34a090c 100644 --- a/src/simulation/m_hypoelastic.fpp +++ b/src/simulation/m_hypoelastic.fpp @@ -20,20 +20,20 @@ module m_hypoelastic s_compute_damage_state real(wp), allocatable, dimension(:) :: Gs - $:GPU_DECLARE(create=["Gs"]) + $:GPU_DECLARE(create='[Gs]') real(wp), allocatable, dimension(:, :, :) :: du_dx, du_dy, du_dz real(wp), allocatable, dimension(:, :, :) :: dv_dx, dv_dy, dv_dz real(wp), allocatable, dimension(:, :, :) :: dw_dx, dw_dy, dw_dz - $:GPU_DECLARE(create=["du_dx","du_dy","du_dz","dv_dx","dv_dy","dv_dz","dw_dx","dw_dy","dw_dz"]) + $:GPU_DECLARE(create='[du_dx,du_dy,du_dz,dv_dx,dv_dy,dv_dz,dw_dx,dw_dy,dw_dz]') real(wp), allocatable, dimension(:, :, :) :: rho_K_field, G_K_field - $:GPU_DECLARE(create=["rho_K_field","G_K_field"]) + $:GPU_DECLARE(create='[rho_K_field,G_K_field]') real(wp), allocatable, dimension(:, :) :: fd_coeff_x_h real(wp), allocatable, dimension(:, :) :: fd_coeff_y_h real(wp), allocatable, dimension(:, :) :: fd_coeff_z_h - $:GPU_DECLARE(create=["fd_coeff_x_h","fd_coeff_y_h","fd_coeff_z_h"]) + $:GPU_DECLARE(create='[fd_coeff_x_h,fd_coeff_y_h,fd_coeff_z_h]') contains @@ -55,7 +55,7 @@ contains do i = 1, num_fluids Gs(i) = fluid_pp(i)%G end do - $:GPU_UPDATE(device=["Gs"]) + $:GPU_UPDATE(device='[Gs]') @:ALLOCATE(fd_coeff_x_h(-fd_number:fd_number, 0:m)) if (n > 0) then @@ -68,16 +68,16 @@ contains ! Computing centered finite difference coefficients call s_compute_finite_difference_coefficients(m, x_cc, fd_coeff_x_h, buff_size, & fd_number, fd_order) - $:GPU_UPDATE(device=["fd_coeff_x_h"]) + $:GPU_UPDATE(device='[fd_coeff_x_h]') if (n > 0) then call s_compute_finite_difference_coefficients(n, y_cc, fd_coeff_y_h, buff_size, & fd_number, fd_order) - $:GPU_UPDATE(device=["fd_coeff_y_h"]) + $:GPU_UPDATE(device='[fd_coeff_y_h]') end if if (p > 0) then call s_compute_finite_difference_coefficients(p, z_cc, fd_coeff_z_h, buff_size, & fd_number, fd_order) - $:GPU_UPDATE(device=["fd_coeff_z_h"]) + $:GPU_UPDATE(device='[fd_coeff_z_h]') end if end subroutine s_initialize_hypoelastic_module diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 7adad082ca..1290b3911a 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -35,15 +35,15 @@ module m_ibm type(integer_field), public :: ib_markers type(levelset_field), public :: levelset type(levelset_norm_field), public :: levelset_norm - $:GPU_DECLARE(create=["ib_markers","levelset","levelset_norm"]) + $:GPU_DECLARE(create='[ib_markers,levelset,levelset_norm]') type(ghost_point), dimension(:), allocatable :: ghost_points type(ghost_point), dimension(:), allocatable :: inner_points - $:GPU_DECLARE(create=["ghost_points","inner_points"]) + $:GPU_DECLARE(create='[ghost_points,inner_points]') integer :: num_gps !< Number of ghost points integer :: num_inner_gps !< Number of ghost points - $:GPU_DECLARE(create=["gp_layers","num_gps","num_inner_gps"]) + $:GPU_DECLARE(create='[gp_layers,num_gps,num_inner_gps]') contains @@ -70,7 +70,7 @@ contains @:ACC_SETUP_SFs(levelset) @:ACC_SETUP_SFs(levelset_norm) - $:GPU_ENTER_DATA(copyin=["num_gps","num_inner_gps"]) + $:GPU_ENTER_DATA(copyin='[num_gps,num_inner_gps]') end subroutine s_initialize_ibm_module @@ -80,31 +80,31 @@ contains integer :: i, j, k - $:GPU_UPDATE(device=["ib_markers%sf"]) - $:GPU_UPDATE(device=["levelset%sf"]) - $:GPU_UPDATE(device=["levelset_norm%sf"]) + $:GPU_UPDATE(device='[ib_markers%sf]') + $:GPU_UPDATE(device='[levelset%sf]') + $:GPU_UPDATE(device='[levelset_norm%sf]') ! Get neighboring IB variables from other processors call s_mpi_sendrecv_ib_buffers(ib_markers, gp_layers) - $:GPU_UPDATE(host=["ib_markers%sf"]) + $:GPU_UPDATE(host='[ib_markers%sf]') call s_find_num_ghost_points(num_gps, num_inner_gps) - $:GPU_UPDATE(device=["num_gps", "num_inner_gps"]) + $:GPU_UPDATE(device='[num_gps, num_inner_gps]') @:ALLOCATE(ghost_points(1:num_gps)) @:ALLOCATE(inner_points(1:num_inner_gps)) - $:GPU_ENTER_DATA(copyin=["ghost_points","inner_points"]) + $:GPU_ENTER_DATA(copyin='[ghost_points,inner_points]') call s_find_ghost_points(ghost_points, inner_points) - $:GPU_UPDATE(device=["ghost_points", "inner_points"]) + $:GPU_UPDATE(device='[ghost_points, inner_points]') call s_compute_image_points(ghost_points, levelset, levelset_norm) - $:GPU_UPDATE(device=["ghost_points"]) + $:GPU_UPDATE(device='[ghost_points]') call s_compute_interpolation_coeffs(ghost_points) - $:GPU_UPDATE(device=["ghost_points"]) + $:GPU_UPDATE(device='[ghost_points]') end subroutine s_ibm_setup @@ -152,11 +152,11 @@ contains type(ghost_point) :: gp type(ghost_point) :: innerp - $:GPU_PARALLEL_LOOP(private=["physical_loc","dyn_pres","alpha_rho_IP", & - & "alpha_IP","pres_IP","vel_IP","vel_g","vel_norm_IP","r_IP", & - & "v_IP","pb_IP","mv_IP","nmom_IP","presb_IP","massv_IP","rho", & - & "gamma","pi_inf","Re_K","G_K","Gs","gp","innerp","norm","buf", & - & "j","k","l","q","coeff"]) + $:GPU_PARALLEL_LOOP(private='[physical_loc,dyn_pres,alpha_rho_IP, & + & alpha_IP,pres_IP,vel_IP,vel_g,vel_norm_IP,r_IP, & + & v_IP,pb_IP,mv_IP,nmom_IP,presb_IP,massv_IP,rho, & + & gamma,pi_inf,Re_K,G_K,Gs,gp,innerp,norm,buf, & + & j,k,l,q,coeff]') do i = 1, num_gps gp = ghost_points(i) @@ -300,9 +300,9 @@ contains end do !Correct the state of the inner points in IBs - $:GPU_PARALLEL_LOOP(private=["physical_loc","dyn_pres","alpha_rho_IP", & - & "alpha_IP","vel_g","rho","gamma","pi_inf","Re_K","innerp", & - & "j","k","l","q"]) + $:GPU_PARALLEL_LOOP(private='[physical_loc,dyn_pres,alpha_rho_IP, & + & alpha_IP,vel_g,rho,gamma,pi_inf,Re_K,innerp, & + & j,k,l,q]') do i = 1, num_inner_gps vel_g = 0._wp diff --git a/src/simulation/m_mhd.fpp b/src/simulation/m_mhd.fpp index 4f76567423..b17ec69271 100644 --- a/src/simulation/m_mhd.fpp +++ b/src/simulation/m_mhd.fpp @@ -24,12 +24,12 @@ module m_mhd real(wp), allocatable, dimension(:, :, :) :: du_dx, du_dy, du_dz real(wp), allocatable, dimension(:, :, :) :: dv_dx, dv_dy, dv_dz real(wp), allocatable, dimension(:, :, :) :: dw_dx, dw_dy, dw_dz - $:GPU_DECLARE(create=["du_dx","du_dy","du_dz","dv_dx","dv_dy","dv_dz","dw_dx","dw_dy","dw_dz"]) + $:GPU_DECLARE(create='[du_dx,du_dy,du_dz,dv_dx,dv_dy,dv_dz,dw_dx,dw_dy,dw_dz]') real(wp), allocatable, dimension(:, :) :: fd_coeff_x_h real(wp), allocatable, dimension(:, :) :: fd_coeff_y_h real(wp), allocatable, dimension(:, :) :: fd_coeff_z_h - $:GPU_DECLARE(create=["fd_coeff_x_h","fd_coeff_y_h","fd_coeff_z_h"]) + $:GPU_DECLARE(create='[fd_coeff_x_h,fd_coeff_y_h,fd_coeff_z_h]') contains @@ -52,12 +52,12 @@ contains ! Computing centered finite difference coefficients call s_compute_finite_difference_coefficients(m, x_cc, fd_coeff_x_h, buff_size, fd_number, fd_order) - $:GPU_UPDATE(device=["fd_coeff_x_h"]) + $:GPU_UPDATE(device='[fd_coeff_x_h]') call s_compute_finite_difference_coefficients(n, y_cc, fd_coeff_y_h, buff_size, fd_number, fd_order) - $:GPU_UPDATE(device=["fd_coeff_y_h"]) + $:GPU_UPDATE(device='[fd_coeff_y_h]') if (p > 0) then call s_compute_finite_difference_coefficients(p, z_cc, fd_coeff_z_h, buff_size, fd_number, fd_order) - $:GPU_UPDATE(device=["fd_coeff_z_h"]) + $:GPU_UPDATE(device='[fd_coeff_z_h]') end if end subroutine s_initialize_mhd_powell_module @@ -76,7 +76,7 @@ contains real(wp), dimension(3) :: v, B real(wp) :: divB, vdotB - $:GPU_PARALLEL_LOOP(collapse=3, private=["v", "B"]) + $:GPU_PARALLEL_LOOP(collapse=3, private='[v, B]') do q = 0, p do l = 0, n do k = 0, m diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index 4ec4c463f5..bc8e75538c 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -46,7 +46,7 @@ module m_mpi_proxy !> @{ integer, private :: err_code, ierr, v_size !> @} - $:GPU_DECLARE(create=["v_size"]) + $:GPU_DECLARE(create='[v_size]') contains @@ -798,7 +798,7 @@ contains if (bc_x%end >= 0) then ! PBC at the beginning and end ! Packing buffer to be sent to bc_x%end - $:GPU_PARALLEL_LOOP(collapse=3,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=3,private='[r]') do l = 0, p do k = 0, n do j = m - gp_layers + 1, m @@ -834,7 +834,7 @@ contains else #endif - $:GPU_UPDATE(host=["ib_buff_send","ib_buff_send"]) + $:GPU_UPDATE(host='[ib_buff_send,ib_buff_send]') ! Send/receive buffer to/from bc_x%end/bc_x%beg call MPI_SENDRECV( & @@ -853,7 +853,7 @@ contains else ! PBC at the beginning only ! Packing buffer to be sent to bc_x%beg - $:GPU_PARALLEL_LOOP(collapse=3,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=3,private='[r]') do l = 0, p do k = 0, n do j = 0, gp_layers - 1 @@ -888,7 +888,7 @@ contains !$acc wait else #endif - $:GPU_UPDATE(host=["ib_buff_send"]) + $:GPU_UPDATE(host='[ib_buff_send]') ! Send/receive buffer to/from bc_x%end/bc_x%beg call MPI_SENDRECV( & @@ -908,12 +908,12 @@ contains #if defined(MFC_OpenACC) if (rdma_mpi .eqv. .false.) then - $:GPU_UPDATE(device=["ib_buff_recv"]) + $:GPU_UPDATE(device='[ib_buff_recv]') end if #endif ! Unpacking buffer received from bc_x%beg - $:GPU_PARALLEL_LOOP(collapse=3,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=3,private='[r]') do l = 0, p do k = 0, n do j = -gp_layers, -1 @@ -929,7 +929,7 @@ contains if (bc_x%beg >= 0) then ! PBC at the end and beginning - $:GPU_PARALLEL_LOOP(collapse=3,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=3,private='[r]') ! Packing buffer to be sent to bc_x%beg do l = 0, p do k = 0, n @@ -966,7 +966,7 @@ contains else #endif - $:GPU_UPDATE(host=["ib_buff_send"]) + $:GPU_UPDATE(host='[ib_buff_send]') call MPI_SENDRECV( & ib_buff_send(0), & gp_layers*(n + 1)*(p + 1), & @@ -983,7 +983,7 @@ contains else ! PBC at the end only ! Packing buffer to be sent to bc_x%end - $:GPU_PARALLEL_LOOP(collapse=3,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=3,private='[r]') do l = 0, p do k = 0, n do j = m - gp_layers + 1, m @@ -1019,7 +1019,7 @@ contains else #endif - $:GPU_UPDATE(host=["ib_buff_send"]) + $:GPU_UPDATE(host='[ib_buff_send]') call MPI_SENDRECV( & ib_buff_send(0), & @@ -1037,11 +1037,11 @@ contains end if if (rdma_mpi .eqv. .false.) then - $:GPU_UPDATE(device=["ib_buff_recv"]) + $:GPU_UPDATE(device='[ib_buff_recv]') end if ! Unpacking buffer received from bc_x%end - $:GPU_PARALLEL_LOOP(collapse=3,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=3,private='[r]') do l = 0, p do k = 0, n do j = m + 1, m + gp_layers @@ -1061,7 +1061,7 @@ contains if (bc_y%end >= 0) then ! PBC at the beginning and end ! Packing buffer to be sent to bc_y%end - $:GPU_PARALLEL_LOOP(collapse=3,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=3,private='[r]') do l = 0, p do k = n - gp_layers + 1, n do j = -gp_layers, m + gp_layers @@ -1098,7 +1098,7 @@ contains else #endif - $:GPU_UPDATE(host=["ib_buff_send"]) + $:GPU_UPDATE(host='[ib_buff_send]') ! Send/receive buffer to/from bc_x%end/bc_x%beg call MPI_SENDRECV( & @@ -1117,7 +1117,7 @@ contains else ! PBC at the beginning only ! Packing buffer to be sent to bc_y%beg - $:GPU_PARALLEL_LOOP(collapse=3,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=3,private='[r]') do l = 0, p do k = 0, gp_layers - 1 do j = -gp_layers, m + gp_layers @@ -1154,7 +1154,7 @@ contains else #endif - $:GPU_UPDATE(host=["ib_buff_send"]) + $:GPU_UPDATE(host='[ib_buff_send]') ! Send/receive buffer to/from bc_x%end/bc_x%beg call MPI_SENDRECV( & @@ -1174,12 +1174,12 @@ contains #if defined(MFC_OpenACC) if (rdma_mpi .eqv. .false.) then - $:GPU_UPDATE(device=["ib_buff_recv"]) + $:GPU_UPDATE(device='[ib_buff_recv]') end if #endif ! Unpacking buffer received from bc_y%beg - $:GPU_PARALLEL_LOOP(collapse=3,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=3,private='[r]') do l = 0, p do k = -gp_layers, -1 do j = -gp_layers, m + gp_layers @@ -1197,7 +1197,7 @@ contains if (bc_y%beg >= 0) then ! PBC at the end and beginning ! Packing buffer to be sent to bc_y%beg - $:GPU_PARALLEL_LOOP(collapse=3,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=3,private='[r]') do l = 0, p do k = 0, gp_layers - 1 do j = -gp_layers, m + gp_layers @@ -1234,7 +1234,7 @@ contains else #endif - $:GPU_UPDATE(host=["ib_buff_send"]) + $:GPU_UPDATE(host='[ib_buff_send]') ! Send/receive buffer to/from bc_x%end/bc_x%beg call MPI_SENDRECV( & @@ -1253,7 +1253,7 @@ contains else ! PBC at the end only ! Packing buffer to be sent to bc_y%end - $:GPU_PARALLEL_LOOP(collapse=3,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=3,private='[r]') do l = 0, p do k = n - gp_layers + 1, n do j = -gp_layers, m + gp_layers @@ -1290,7 +1290,7 @@ contains else #endif - $:GPU_UPDATE(host=["ib_buff_send"]) + $:GPU_UPDATE(host='[ib_buff_send]') ! Send/receive buffer to/from bc_x%end/bc_x%beg call MPI_SENDRECV( & @@ -1310,12 +1310,12 @@ contains #if defined(MFC_OpenACC) if (rdma_mpi .eqv. .false.) then - $:GPU_UPDATE(device=["ib_buff_recv"]) + $:GPU_UPDATE(device='[ib_buff_recv]') end if #endif ! Unpacking buffer received form bc_y%end - $:GPU_PARALLEL_LOOP(collapse=3,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=3,private='[r]') do l = 0, p do k = n + 1, n + gp_layers do j = -gp_layers, m + gp_layers @@ -1335,7 +1335,7 @@ contains if (bc_z%end >= 0) then ! PBC at the beginning and end ! Packing buffer to be sent to bc_z%end - $:GPU_PARALLEL_LOOP(collapse=3,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=3,private='[r]') do l = p - gp_layers + 1, p do k = -gp_layers, n + gp_layers do j = -gp_layers, m + gp_layers @@ -1373,7 +1373,7 @@ contains else #endif - $:GPU_UPDATE(host=["ib_buff_send"]) + $:GPU_UPDATE(host='[ib_buff_send]') ! Send/receive buffer to/from bc_x%end/bc_x%beg call MPI_SENDRECV( & @@ -1392,7 +1392,7 @@ contains else ! PBC at the beginning only ! Packing buffer to be sent to bc_z%beg - $:GPU_PARALLEL_LOOP(collapse=3,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=3,private='[r]') do l = 0, gp_layers - 1 do k = -gp_layers, n + gp_layers do j = -gp_layers, m + gp_layers @@ -1429,7 +1429,7 @@ contains else #endif - $:GPU_UPDATE(host=["ib_buff_send"]) + $:GPU_UPDATE(host='[ib_buff_send]') ! Send/receive buffer to/from bc_x%end/bc_x%beg call MPI_SENDRECV( & @@ -1449,12 +1449,12 @@ contains #if defined(MFC_OpenACC) if (rdma_mpi .eqv. .false.) then - $:GPU_UPDATE(device=["ib_buff_recv"]) + $:GPU_UPDATE(device='[ib_buff_recv]') end if #endif ! Unpacking buffer from bc_z%beg - $:GPU_PARALLEL_LOOP(collapse=3,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=3,private='[r]') do l = -gp_layers, -1 do k = -gp_layers, n + gp_layers do j = -gp_layers, m + gp_layers @@ -1473,7 +1473,7 @@ contains if (bc_z%beg >= 0) then ! PBC at the end and beginning ! Packing buffer to be sent to bc_z%beg - $:GPU_PARALLEL_LOOP(collapse=3,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=3,private='[r]') do l = 0, gp_layers - 1 do k = -gp_layers, n + gp_layers do j = -gp_layers, m + gp_layers @@ -1509,7 +1509,7 @@ contains !$acc wait else #endif - $:GPU_UPDATE(host=["ib_buff_send"]) + $:GPU_UPDATE(host='[ib_buff_send]') ! Send/receive buffer to/from bc_x%end/bc_x%beg call MPI_SENDRECV( & @@ -1528,7 +1528,7 @@ contains else ! PBC at the end only ! Packing buffer to be sent to bc_z%end - $:GPU_PARALLEL_LOOP(collapse=3,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=3,private='[r]') do l = p - gp_layers + 1, p do k = -gp_layers, n + gp_layers do j = -gp_layers, m + gp_layers @@ -1565,7 +1565,7 @@ contains !$acc wait else #endif - $:GPU_UPDATE(host=["ib_buff_send"]) + $:GPU_UPDATE(host='[ib_buff_send]') ! Send/receive buffer to/from bc_x%end/bc_x%beg call MPI_SENDRECV( & @@ -1585,12 +1585,12 @@ contains #if defined(MFC_OpenACC) if (rdma_mpi .eqv. .false.) then - $:GPU_UPDATE(device=["ib_buff_recv"]) + $:GPU_UPDATE(device='[ib_buff_recv]') end if #endif ! Unpacking buffer received from bc_z%end - $:GPU_PARALLEL_LOOP(collapse=3,private=["r"]) + $:GPU_PARALLEL_LOOP(collapse=3,private='[r]') do l = p + 1, p + gp_layers do k = -gp_layers, n + gp_layers do j = -gp_layers, m + gp_layers diff --git a/src/simulation/m_qbmm.fpp b/src/simulation/m_qbmm.fpp index 40e56ebf9d..93361bf0a5 100644 --- a/src/simulation/m_qbmm.fpp +++ b/src/simulation/m_qbmm.fpp @@ -25,21 +25,21 @@ module m_qbmm private; public :: s_initialize_qbmm_module, s_mom_inv, s_coeff, s_compute_qbmm_rhs real(wp), allocatable, dimension(:, :, :, :, :) :: momrhs - $:GPU_DECLARE(create=["momrhs"]) + $:GPU_DECLARE(create='[momrhs]') #:if MFC_CASE_OPTIMIZATION integer, parameter :: nterms = ${nterms}$ #:else integer :: nterms - $:GPU_DECLARE(create=["nterms"]) + $:GPU_DECLARE(create='[nterms]') #:endif type(int_bounds_info) :: is1_qbmm, is2_qbmm, is3_qbmm - $:GPU_DECLARE(create=["is1_qbmm","is2_qbmm","is3_qbmm"]) + $:GPU_DECLARE(create='[is1_qbmm,is2_qbmm,is3_qbmm]') integer, allocatable, dimension(:) :: bubrs integer, allocatable, dimension(:, :) :: bubmoms - $:GPU_DECLARE(create=["bubrs","bubmoms"]) + $:GPU_DECLARE(create='[bubrs,bubmoms]') contains @@ -57,8 +57,8 @@ contains nterms = 7 end if - $:GPU_ENTER_DATA(copyin=["nterms"]) - $:GPU_UPDATE(device=["nterms"]) + $:GPU_ENTER_DATA(copyin='[nterms]') + $:GPU_UPDATE(device='[nterms]') #:endif @@ -392,7 +392,7 @@ contains end do end if - $:GPU_UPDATE(device=["momrhs"]) + $:GPU_UPDATE(device='[momrhs]') @:ALLOCATE(bubrs(1:nb)) @:ALLOCATE(bubmoms(1:nb, 1:nmom)) @@ -400,14 +400,14 @@ contains do i = 1, nb bubrs(i) = bub_idx%rs(i) end do - $:GPU_UPDATE(device=["bubrs"]) + $:GPU_UPDATE(device='[bubrs]') do j = 1, nmom do i = 1, nb bubmoms(i, j) = bub_idx%moms(i, j) end do end do - $:GPU_UPDATE(device=["bubmoms"]) + $:GPU_UPDATE(device='[bubmoms]') end subroutine s_initialize_qbmm_module @@ -433,7 +433,7 @@ contains end select if (.not. polytropic) then - $:GPU_PARALLEL_LOOP(collapse=5,private=["nb_q","nR","nR2","R","R2","nb_dot","nR_dot","nR2_dot","var","AX"]) + $:GPU_PARALLEL_LOOP(collapse=5,private='[nb_q,nR,nR2,R,R2,nb_dot,nR_dot,nR2_dot,var,AX]') do i = 1, nb do q = 1, nnode do l = 0, p @@ -715,12 +715,12 @@ contains integer :: id1, id2, id3, i1, i2, j, q, r is1_qbmm = ix; is2_qbmm = iy; is3_qbmm = iz - $:GPU_UPDATE(device=["is1_qbmm","is2_qbmm","is3_qbmm"]) + $:GPU_UPDATE(device='[is1_qbmm,is2_qbmm,is3_qbmm]') - $:GPU_PARALLEL_LOOP(collapse=3, private=["moms", "msum", "wght", "abscX", & - "abscY", "wght_pb", "wght_mv", "wght_ht", "coeff", "ht", "r", "q", & - "n_tait", "B_tait", "pres", "rho", "nbub", "c", "alf", "momsum", & - "drdt", "drdt2", "chi_vw", "x_vw", "rho_mw", "k_mw", "T_bar", "grad_T"]) + $:GPU_PARALLEL_LOOP(collapse=3, private='[moms, msum, wght, abscX, & + & abscY, wght_pb, wght_mv, wght_ht, coeff, ht, r, q, & + & n_tait, B_tait, pres, rho, nbub, c, alf, momsum, & + & drdt, drdt2, chi_vw, x_vw, rho_mw, k_mw, T_bar, grad_T]') do id3 = is3_qbmm%beg, is3_qbmm%end do id2 = is2_qbmm%beg, is2_qbmm%end do id1 = is1_qbmm%beg, is1_qbmm%end diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index 416f9ca5bc..f76c81f4ed 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -75,13 +75,13 @@ module m_rhs !! conservative variables, which are located in q_cons_vf, at cell-interior !! Gaussian quadrature points (QP). type(vector_field) :: q_cons_qp !< - $:GPU_DECLARE(create=["q_cons_qp"]) + $:GPU_DECLARE(create='[q_cons_qp]') !! The primitive variables at cell-interior Gaussian quadrature points. These !! are calculated from the conservative variables and gradient magnitude (GM) !! of the volume fractions, q_cons_qp and gm_alpha_qp, respectively. type(vector_field) :: q_prim_qp !< - $:GPU_DECLARE(create=["q_prim_qp"]) + $:GPU_DECLARE(create='[q_prim_qp]') !> @name The first-order spatial derivatives of the primitive variables at cell- !! interior Gaussian quadrature points. These are WENO-reconstructed from @@ -90,7 +90,7 @@ module m_rhs !! of the primitive variables, located in qK_prim_n, where K = L or R. !> @{ type(vector_field), allocatable, dimension(:) :: dq_prim_dx_qp, dq_prim_dy_qp, dq_prim_dz_qp - $:GPU_DECLARE(create=["dq_prim_dx_qp","dq_prim_dy_qp","dq_prim_dz_qp"]) + $:GPU_DECLARE(create='[dq_prim_dx_qp,dq_prim_dy_qp,dq_prim_dz_qp]') !> @} !> @name The left and right WENO-reconstructed cell-boundary values of the cell- @@ -100,26 +100,26 @@ module m_rhs !> @{ type(vector_field), allocatable, dimension(:) :: dqL_prim_dx_n, dqL_prim_dy_n, dqL_prim_dz_n type(vector_field), allocatable, dimension(:) :: dqR_prim_dx_n, dqR_prim_dy_n, dqR_prim_dz_n - $:GPU_DECLARE(create=["dqL_prim_dx_n","dqL_prim_dy_n","dqL_prim_dz_n"]) - $:GPU_DECLARE(create=["dqR_prim_dx_n","dqR_prim_dy_n","dqR_prim_dz_n"]) + $:GPU_DECLARE(create='[dqL_prim_dx_n,dqL_prim_dy_n,dqL_prim_dz_n]') + $:GPU_DECLARE(create='[dqR_prim_dx_n,dqR_prim_dy_n,dqR_prim_dz_n]') !> @} type(scalar_field), allocatable, dimension(:) :: tau_Re_vf - $:GPU_DECLARE(create=["tau_Re_vf"]) + $:GPU_DECLARE(create='[tau_Re_vf]') type(vector_field) :: gm_alpha_qp !< !! The gradient magnitude of the volume fractions at cell-interior Gaussian !! quadrature points. gm_alpha_qp is calculated from individual first-order !! spatial derivatives located in dq_prim_ds_qp. - $:GPU_DECLARE(create=["gm_alpha_qp"]) + $:GPU_DECLARE(create='[gm_alpha_qp]') !> @name The left and right WENO-reconstructed cell-boundary values of the cell- !! average gradient magnitude of volume fractions, located in gm_alpha_qp. !> @{ type(vector_field), allocatable, dimension(:) :: gm_alphaL_n type(vector_field), allocatable, dimension(:) :: gm_alphaR_n - $:GPU_DECLARE(create=["gm_alphaL_n","gm_alphaR_n"]) + $:GPU_DECLARE(create='[gm_alphaL_n,gm_alphaR_n]') !> @} !> @name The cell-boundary values of the fluxes (src - source, gsrc - geometrical @@ -129,38 +129,38 @@ module m_rhs type(vector_field), allocatable, dimension(:) :: flux_n type(vector_field), allocatable, dimension(:) :: flux_src_n type(vector_field), allocatable, dimension(:) :: flux_gsrc_n - $:GPU_DECLARE(create=["flux_n","flux_src_n","flux_gsrc_n"]) + $:GPU_DECLARE(create='[flux_n,flux_src_n,flux_gsrc_n]') !> @} type(vector_field), allocatable, dimension(:) :: qL_prim, qR_prim - $:GPU_DECLARE(create=["qL_prim","qR_prim"]) + $:GPU_DECLARE(create='[qL_prim,qR_prim]') type(int_bounds_info) :: iv !< Vector field indical bounds - $:GPU_DECLARE(create=["iv"]) + $:GPU_DECLARE(create='[iv]') !> @name Indical bounds in the x-, y- and z-directions !> @{ type(int_bounds_info) :: irx, iry, irz - $:GPU_DECLARE(create=["irx","iry","irz"]) + $:GPU_DECLARE(create='[irx,iry,irz]') type(int_bounds_info) :: is1, is2, is3 - $:GPU_DECLARE(create=["is1","is2","is3"]) + $:GPU_DECLARE(create='[is1,is2,is3]') !> @name Saved fluxes for testing !> @{ type(scalar_field) :: alf_sum !> @} - $:GPU_DECLARE(create=["alf_sum"]) + $:GPU_DECLARE(create='[alf_sum]') real(wp), allocatable, dimension(:, :, :) :: blkmod1, blkmod2, alpha1, alpha2, Kterm real(wp), allocatable, dimension(:, :, :, :) :: qL_rsx_vf, qL_rsy_vf, qL_rsz_vf, qR_rsx_vf, qR_rsy_vf, qR_rsz_vf real(wp), allocatable, dimension(:, :, :, :) :: dqL_rsx_vf, dqL_rsy_vf, dqL_rsz_vf, dqR_rsx_vf, dqR_rsy_vf, dqR_rsz_vf - $:GPU_DECLARE(create=["blkmod1","blkmod2","alpha1","alpha2","Kterm"]) - $:GPU_DECLARE(create=["qL_rsx_vf","qL_rsy_vf","qL_rsz_vf","qR_rsx_vf","qR_rsy_vf","qR_rsz_vf"]) - $:GPU_DECLARE(create=["dqL_rsx_vf","dqL_rsy_vf","dqL_rsz_vf","dqR_rsx_vf","dqR_rsy_vf","dqR_rsz_vf"]) + $:GPU_DECLARE(create='[blkmod1,blkmod2,alpha1,alpha2,Kterm]') + $:GPU_DECLARE(create='[qL_rsx_vf,qL_rsy_vf,qL_rsz_vf,qR_rsx_vf,qR_rsy_vf,qR_rsz_vf]') + $:GPU_DECLARE(create='[dqL_rsx_vf,dqL_rsy_vf,dqL_rsz_vf,dqR_rsx_vf,dqR_rsy_vf,dqR_rsz_vf]') real(wp), allocatable, dimension(:, :, :) :: nbub !< Bubble number density - $:GPU_DECLARE(create=["nbub"]) + $:GPU_DECLARE(create='[nbub]') contains @@ -173,8 +173,8 @@ contains integer :: num_eqns_after_adv - $:GPU_ENTER_DATA(copyin=["idwbuff","idwbuff"]) - $:GPU_UPDATE(device=["idwbuff", "idwbuff"]) + $:GPU_ENTER_DATA(copyin='[idwbuff,idwbuff]') + $:GPU_UPDATE(device='[idwbuff, idwbuff]') @:ALLOCATE(q_cons_qp%vf(1:sys_size)) @:ALLOCATE(q_prim_qp%vf(1:sys_size)) @@ -201,29 +201,29 @@ contains @:ALLOCATE(q_prim_qp%vf(l)%sf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, idwbuff(3)%beg:idwbuff(3)%end)) else q_prim_qp%vf(l)%sf => q_cons_qp%vf(l)%sf - $:GPU_ENTER_DATA(copyin=["q_prim_qp%vf(l)%sf"]) - $:GPU_ENTER_DATA(attach=["q_prim_qp%vf(l)%sf"]) + $:GPU_ENTER_DATA(copyin='[q_prim_qp%vf(l)%sf]') + $:GPU_ENTER_DATA(attach='[q_prim_qp%vf(l)%sf]') end if end do do l = adv_idx%beg, adv_idx%end q_prim_qp%vf(l)%sf => q_cons_qp%vf(l)%sf - $:GPU_ENTER_DATA(copyin=["q_prim_qp%vf(l)%sf"]) - $:GPU_ENTER_DATA(attach=["q_prim_qp%vf(l)%sf"]) + $:GPU_ENTER_DATA(copyin='[q_prim_qp%vf(l)%sf]') + $:GPU_ENTER_DATA(attach='[q_prim_qp%vf(l)%sf]') end do if (surface_tension) then q_prim_qp%vf(c_idx)%sf => & q_cons_qp%vf(c_idx)%sf - $:GPU_ENTER_DATA(copyin=["q_prim_qp%vf(c_idx)%sf"]) - $:GPU_ENTER_DATA(attach=["q_prim_qp%vf(c_idx)%sf"]) + $:GPU_ENTER_DATA(copyin='[q_prim_qp%vf(c_idx)%sf]') + $:GPU_ENTER_DATA(attach='[q_prim_qp%vf(c_idx)%sf]') end if if (cont_damage) then q_prim_qp%vf(damage_idx)%sf => & q_cons_qp%vf(damage_idx)%sf - $:GPU_ENTER_DATA(copyin=["q_prim_qp%vf(damage_idx)%sf"]) - $:GPU_ENTER_DATA(attach=["q_prim_qp%vf(damage_idx)%sf"]) + $:GPU_ENTER_DATA(copyin='[q_prim_qp%vf(damage_idx)%sf]') + $:GPU_ENTER_DATA(attach='[q_prim_qp%vf(damage_idx)%sf]') end if if (viscous) then @@ -548,14 +548,14 @@ contains if (riemann_solver /= 1 .and. riemann_solver /= 4) then do l = adv_idx%beg + 1, adv_idx%end flux_src_n(i)%vf(l)%sf => flux_src_n(i)%vf(adv_idx%beg)%sf - $:GPU_ENTER_DATA(attach=["flux_src_n(i)%vf(l)%sf"]) + $:GPU_ENTER_DATA(attach='[flux_src_n(i)%vf(l)%sf]') end do end if else do l = 1, sys_size flux_n(i)%vf(l)%sf => flux_n(1)%vf(l)%sf flux_src_n(i)%vf(l)%sf => flux_src_n(1)%vf(l)%sf - $:GPU_ENTER_DATA(attach=["flux_n(i)%vf(l)%sf","flux_src_n(i)%vf(l)%sf"]) + $:GPU_ENTER_DATA(attach='[flux_n(i)%vf(l)%sf,flux_src_n(i)%vf(l)%sf]') end do end if end do @@ -978,7 +978,7 @@ contains call s_cbc(q_prim_vf%vf, flux_n(idir)%vf, flux_src_n_vf%vf, idir, 1, irx, iry, irz) end if - $:GPU_PARALLEL_LOOP(collapse=4,private=["inv_ds","flux_face1","flux_face2"]) + $:GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,flux_face1,flux_face2]') do j = 1, sys_size do q_loop = 0, p do l_loop = 0, n @@ -993,8 +993,8 @@ contains end do if (model_eqns == 3) then - $:GPU_PARALLEL_LOOP(collapse=4,private=["inv_ds","advected_qty_val", & - & "pressure_val","flux_face1","flux_face2"]) + $:GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,advected_qty_val, & + & pressure_val,flux_face1,flux_face2]') do q_loop = 0, p do l_loop = 0, n do k_loop = 0, m @@ -1023,7 +1023,7 @@ contains call s_cbc(q_prim_vf%vf, flux_n(idir)%vf, flux_src_n_vf%vf, idir, 1, irx, iry, irz) end if - $:GPU_PARALLEL_LOOP(collapse=4,private=["inv_ds","flux_face1","flux_face2"]) + $:GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,flux_face1,flux_face2]') do j = 1, sys_size do l = 0, p do k = 0, n @@ -1038,8 +1038,8 @@ contains end do if (model_eqns == 3) then - $:GPU_PARALLEL_LOOP(collapse=4,private=["inv_ds","advected_qty_val", & - & "pressure_val","flux_face1","flux_face2"]) + $:GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,advected_qty_val, & + & pressure_val,flux_face1,flux_face2]') do l = 0, p do k = 0, n do q = 0, m @@ -1064,7 +1064,7 @@ contains end if if (cyl_coord) then - $:GPU_PARALLEL_LOOP(collapse=4,private=["flux_face1","flux_face2"]) + $:GPU_PARALLEL_LOOP(collapse=4,private='[flux_face1,flux_face2]') do j = 1, sys_size do l = 0, p do k = 0, n @@ -1090,8 +1090,8 @@ contains end if if (grid_geometry == 3) then ! Cylindrical Coordinates - $:GPU_PARALLEL_LOOP(collapse=4,private=["inv_ds","velocity_val", & - & "flux_face1","flux_face2"]) + $:GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,velocity_val, & + & flux_face1,flux_face2]') do j = 1, sys_size do k = 0, p do q = 0, n @@ -1106,7 +1106,7 @@ contains end do end do end do - $:GPU_PARALLEL_LOOP(collapse=4,private=["flux_face1","flux_face2"]) + $:GPU_PARALLEL_LOOP(collapse=4,private='[flux_face1,flux_face2]') do j = 1, sys_size do k = 0, p do q = 0, n @@ -1120,7 +1120,7 @@ contains end do end do else ! Cartesian Coordinates - $:GPU_PARALLEL_LOOP(collapse=4,private=["inv_ds","flux_face1","flux_face2"]) + $:GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,flux_face1,flux_face2]') do j = 1, sys_size do k = 0, p do q = 0, n @@ -1136,8 +1136,8 @@ contains end if if (model_eqns == 3) then - $:GPU_PARALLEL_LOOP(collapse=4,private=["inv_ds","advected_qty_val", & - & "pressure_val","flux_face1","flux_face2"]) + $:GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,advected_qty_val, & + & pressure_val,flux_face1,flux_face2]') do k = 0, p do q = 0, n do l = 0, m @@ -1181,8 +1181,8 @@ contains case (1) ! x-direction use_standard_riemann = (riemann_solver == 1 .or. riemann_solver == 4) if (use_standard_riemann) then - $:GPU_PARALLEL_LOOP(collapse=4,private=["local_inv_ds", & - & "local_term_coeff","local_flux1","local_flux2"]) + $:GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, & + & local_term_coeff,local_flux1,local_flux2]') do j_adv = advxb, advxe do q_idx = 0, p ! z_extent do l_idx = 0, n ! y_extent @@ -1200,9 +1200,9 @@ contains else ! Other Riemann solvers if (alt_soundspeed) then if (bubbles_euler .neqv. .true.) then - $:GPU_PARALLEL_LOOP(collapse=3, private=["local_inv_ds", & - "local_q_cons_val", "local_k_term_val", & - "local_term_coeff", "local_flux1", "local_flux2"]) + $:GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds, & + & local_q_cons_val, local_k_term_val, & + & local_term_coeff, local_flux1, local_flux2]') do q_idx = 0, p; do l_idx = 0, n; do k_idx = 0, m local_inv_ds = 1._wp/dx(k_idx) local_q_cons_val = q_cons_vf_arg%vf(advxe)%sf(k_idx, l_idx, q_idx) @@ -1214,10 +1214,10 @@ contains local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) end do; end do; end do - $:GPU_PARALLEL_LOOP(collapse=3, private=["local_inv_ds",& - "local_q_cons_val", "local_k_term_val", & - "local_term_coeff", "local_flux1", & - "local_flux2"]) + $:GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds,& + & local_q_cons_val, local_k_term_val, & + & local_term_coeff, local_flux1, & + & local_flux2]') do q_idx = 0, p; do l_idx = 0, n; do k_idx = 0, m local_inv_ds = 1._wp/dx(k_idx) local_q_cons_val = q_cons_vf_arg%vf(advxb)%sf(k_idx, l_idx, q_idx) @@ -1230,8 +1230,8 @@ contains end do; end do; end do end if else ! NOT alt_soundspeed - $:GPU_PARALLEL_LOOP(collapse=4,private=["local_inv_ds", & - & "local_term_coeff","local_flux1","local_flux2"]) + $:GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, & + & local_term_coeff,local_flux1,local_flux2]') do j_adv = advxb, advxe do q_idx = 0, p; do l_idx = 0, n; do k_idx = 0, m local_inv_ds = 1._wp/dx(k_idx) @@ -1248,8 +1248,8 @@ contains case (2) ! y-direction: loops q_idx (x), k_idx (y), l_idx (z); sf(q_idx, k_idx, l_idx); dy(k_idx); Kterm(q_idx,k_idx,l_idx) use_standard_riemann = (riemann_solver == 1 .or. riemann_solver == 4) if (use_standard_riemann) then - $:GPU_PARALLEL_LOOP(collapse=4,private=["local_inv_ds", & - & "local_term_coeff","local_flux1","local_flux2"]) + $:GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, & + & local_term_coeff,local_flux1,local_flux2]') do j_adv = advxb, advxe do l_idx = 0, p ! z_extent do k_idx = 0, n ! y_extent @@ -1267,10 +1267,10 @@ contains else ! Other Riemann solvers if (alt_soundspeed) then if (bubbles_euler .neqv. .true.) then - $:GPU_PARALLEL_LOOP(collapse=3, private=["local_inv_ds", & - "local_q_cons_val", "local_k_term_val", & - "local_term_coeff", "local_flux1", & - "local_flux2"]) + $:GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds, & + & local_q_cons_val, local_k_term_val, & + & local_term_coeff, local_flux1, & + & local_flux2]') do l_idx = 0, p; do k_idx = 0, n; do q_idx = 0, m local_inv_ds = 1._wp/dy(k_idx) local_q_cons_val = q_cons_vf_arg%vf(advxe)%sf(q_idx, k_idx, l_idx) @@ -1286,10 +1286,10 @@ contains end if end do; end do; end do - $:GPU_PARALLEL_LOOP(collapse=3, private=["local_inv_ds", & - "local_q_cons_val", "local_k_term_val", & - "local_term_coeff", "local_flux1", & - "local_flux2"]) + $:GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds, & + & local_q_cons_val, local_k_term_val, & + & local_term_coeff, local_flux1, & + & local_flux2]') do l_idx = 0, p; do k_idx = 0, n; do q_idx = 0, m local_inv_ds = 1._wp/dy(k_idx) local_q_cons_val = q_cons_vf_arg%vf(advxb)%sf(q_idx, k_idx, l_idx) @@ -1306,8 +1306,8 @@ contains end do; end do; end do end if else ! NOT alt_soundspeed - $:GPU_PARALLEL_LOOP(collapse=4,private=["local_inv_ds", & - & "local_term_coeff","local_flux1","local_flux2"]) + $:GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, & + & local_term_coeff,local_flux1,local_flux2]') do j_adv = advxb, advxe do l_idx = 0, p; do k_idx = 0, n; do q_idx = 0, m local_inv_ds = 1._wp/dy(k_idx) @@ -1329,8 +1329,8 @@ contains end if if (use_standard_riemann) then - $:GPU_PARALLEL_LOOP(collapse=4,private=["local_inv_ds", & - & "local_term_coeff","local_flux1","local_flux2"]) + $:GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, & + & local_term_coeff,local_flux1,local_flux2]') do j_adv = advxb, advxe do k_idx = 0, p ! z_extent do q_idx = 0, n ! y_extent @@ -1348,10 +1348,10 @@ contains else ! Other Riemann solvers if (alt_soundspeed) then if (bubbles_euler .neqv. .true.) then - $:GPU_PARALLEL_LOOP(collapse=3, private=["local_inv_ds", & - "local_q_cons_val", "local_k_term_val", & - "local_term_coeff", "local_flux1", & - "local_flux2"]) + $:GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds, & + & local_q_cons_val, local_k_term_val, & + & local_term_coeff, local_flux1, & + & local_flux2]') do k_idx = 0, p; do q_idx = 0, n; do l_idx = 0, m local_inv_ds = 1._wp/dz(k_idx) local_q_cons_val = q_cons_vf_arg%vf(advxe)%sf(l_idx, q_idx, k_idx) @@ -1363,10 +1363,10 @@ contains local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) end do; end do; end do - $:GPU_PARALLEL_LOOP(collapse=3, private=["local_inv_ds", & - "local_q_cons_val", "local_k_term_val", & - "local_term_coeff", "local_flux1", & - "local_flux2"]) + $:GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds, & + & local_q_cons_val, local_k_term_val, & + & local_term_coeff, local_flux1, & + & local_flux2]') do k_idx = 0, p; do q_idx = 0, n; do l_idx = 0, m local_inv_ds = 1._wp/dz(k_idx) local_q_cons_val = q_cons_vf_arg%vf(advxb)%sf(l_idx, q_idx, k_idx) @@ -1379,8 +1379,8 @@ contains end do; end do; end do end if else ! NOT alt_soundspeed - $:GPU_PARALLEL_LOOP(collapse=4,private=["local_inv_ds", & - & "local_term_coeff","local_flux1","local_flux2"]) + $:GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, & + & local_term_coeff,local_flux1,local_flux2]') do j_adv = advxb, advxe do k_idx = 0, p; do q_idx = 0, n; do l_idx = 0, m local_inv_ds = 1._wp/dz(k_idx) @@ -1729,7 +1729,7 @@ contains end if - $:GPU_UPDATE(device=["is1","is2","is3","iv"]) + $:GPU_UPDATE(device='[is1,is2,is3,iv]') if (recon_dir == 1) then $:GPU_PARALLEL_LOOP(collapse=4) @@ -1787,13 +1787,13 @@ contains @:DEALLOCATE(q_cons_qp%vf(j)%sf) @:DEALLOCATE(q_prim_qp%vf(j)%sf) else - $:GPU_EXIT_DATA(detach=["q_prim_qp%vf(j)%sf"]) + $:GPU_EXIT_DATA(detach='[q_prim_qp%vf(j)%sf]') nullify (q_prim_qp%vf(j)%sf) end if end do do j = adv_idx%beg, adv_idx%end - $:GPU_EXIT_DATA(detach=["q_prim_qp%vf(j)%sf"]) + $:GPU_EXIT_DATA(detach='[q_prim_qp%vf(j)%sf]') nullify (q_prim_qp%vf(j)%sf) end do @@ -1826,7 +1826,7 @@ contains end if if (mpp_lim .and. bubbles_euler) then - $:GPU_EXIT_DATA(delete=["alf_sum%sf"]) + $:GPU_EXIT_DATA(delete='[alf_sum%sf]') deallocate (alf_sum%sf) end if diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp index a55b0c144c..e1eb2ed76d 100644 --- a/src/simulation/m_riemann_solvers.fpp +++ b/src/simulation/m_riemann_solvers.fpp @@ -65,7 +65,7 @@ module m_riemann_solvers real(wp), allocatable, dimension(:, :, :, :) :: flux_rsx_vf, flux_src_rsx_vf real(wp), allocatable, dimension(:, :, :, :) :: flux_rsy_vf, flux_src_rsy_vf real(wp), allocatable, dimension(:, :, :, :) :: flux_rsz_vf, flux_src_rsz_vf - $:GPU_DECLARE(create=["flux_rsx_vf","flux_src_rsx_vf","flux_rsy_vf","flux_src_rsy_vf","flux_rsz_vf","flux_src_rsz_vf"]) + $:GPU_DECLARE(create='[flux_rsx_vf,flux_src_rsx_vf,flux_rsy_vf,flux_src_rsy_vf,flux_rsz_vf,flux_src_rsz_vf]') !> @} !> The cell-boundary values of the geometrical source flux that are computed @@ -76,7 +76,7 @@ module m_riemann_solvers real(wp), allocatable, dimension(:, :, :, :) :: flux_gsrc_rsx_vf !< real(wp), allocatable, dimension(:, :, :, :) :: flux_gsrc_rsy_vf !< real(wp), allocatable, dimension(:, :, :, :) :: flux_gsrc_rsz_vf !< - $:GPU_DECLARE(create=["flux_gsrc_rsx_vf","flux_gsrc_rsy_vf","flux_gsrc_rsz_vf"]) + $:GPU_DECLARE(create='[flux_gsrc_rsx_vf,flux_gsrc_rsy_vf,flux_gsrc_rsz_vf]') !> @} ! The cell-boundary values of the velocity. vel_src_rs_vf is determined as @@ -85,17 +85,17 @@ module m_riemann_solvers real(wp), allocatable, dimension(:, :, :, :) :: vel_src_rsx_vf real(wp), allocatable, dimension(:, :, :, :) :: vel_src_rsy_vf real(wp), allocatable, dimension(:, :, :, :) :: vel_src_rsz_vf - $:GPU_DECLARE(create=["vel_src_rsx_vf","vel_src_rsy_vf","vel_src_rsz_vf"]) + $:GPU_DECLARE(create='[vel_src_rsx_vf,vel_src_rsy_vf,vel_src_rsz_vf]') real(wp), allocatable, dimension(:, :, :, :) :: mom_sp_rsx_vf real(wp), allocatable, dimension(:, :, :, :) :: mom_sp_rsy_vf real(wp), allocatable, dimension(:, :, :, :) :: mom_sp_rsz_vf - $:GPU_DECLARE(create=["mom_sp_rsx_vf","mom_sp_rsy_vf","mom_sp_rsz_vf"]) + $:GPU_DECLARE(create='[mom_sp_rsx_vf,mom_sp_rsy_vf,mom_sp_rsz_vf]') real(wp), allocatable, dimension(:, :, :, :) :: Re_avg_rsx_vf real(wp), allocatable, dimension(:, :, :, :) :: Re_avg_rsy_vf real(wp), allocatable, dimension(:, :, :, :) :: Re_avg_rsz_vf - $:GPU_DECLARE(create=["Re_avg_rsx_vf","Re_avg_rsy_vf","Re_avg_rsz_vf"]) + $:GPU_DECLARE(create='[Re_avg_rsx_vf,Re_avg_rsy_vf,Re_avg_rsz_vf]') !> @name Indical bounds in the s1-, s2- and s3-directions !> @{ @@ -103,13 +103,13 @@ module m_riemann_solvers type(int_bounds_info) :: isx, isy, isz !> @} - $:GPU_DECLARE(create=["is1","is2","is3","isx","isy","isz"]) + $:GPU_DECLARE(create='[is1,is2,is3,isx,isy,isz]') real(wp), allocatable, dimension(:) :: Gs - $:GPU_DECLARE(create=["Gs"]) + $:GPU_DECLARE(create='[Gs]') real(wp), allocatable, dimension(:, :) :: Res - $:GPU_DECLARE(create=["Res"]) + $:GPU_DECLARE(create='[Res]') contains @@ -354,14 +354,14 @@ contains #:for NORM_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (norm_dir == ${NORM_DIR}$) then - $:GPU_PARALLEL_LOOP(collapse=3, private=["alpha_rho_L", "alpha_rho_R", & - "vel_L", "vel_R", "alpha_L", "alpha_R", "tau_e_L", "tau_e_R", & - "G_L", "G_R", "Re_L", "Re_R", "rho_avg", "h_avg", "gamma_avg", & - "s_L", "s_R", "s_S", "Ys_L", "Ys_R", "xi_field_L", "xi_field_R", & - "Cp_iL", "Cp_iR", "Xs_L", "Xs_R", "Gamma_iL", "Gamma_iR", & - "Yi_avg", "Phi_avg", "h_iL", "h_iR", "h_avg_2", "c_fast", & - "pres_mag", "B", "Ga", "vdotB", "B2", "b4", "cm", "pcorr", & - "zcoef", "vel_L_tmp", "vel_R_tmp"]) + $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_rho_L, alpha_rho_R, & + & vel_L, vel_R, alpha_L, alpha_R, tau_e_L, tau_e_R, & + & G_L, G_R, Re_L, Re_R, rho_avg, h_avg, gamma_avg, & + & s_L, s_R, s_S, Ys_L, Ys_R, xi_field_L, xi_field_R, & + & Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, & + & Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, c_fast, & + & pres_mag, B, Ga, vdotB, B2, b4, cm, pcorr, & + & zcoef, vel_L_tmp, vel_R_tmp]') do l = is3%beg, is3%end do k = is2%beg, is2%end do j = is1%beg, is1%end @@ -1283,14 +1283,14 @@ contains ! 6-EQUATION MODEL WITH HLLC if (model_eqns == 3) then !ME3 - $:GPU_PARALLEL_LOOP(collapse=3, private=["vel_L", "vel_R", & - "vel_K_Star", "Re_L", "Re_R", "rho_avg", "h_avg", & - "gamma_avg", "s_L", "s_R", "s_S", "vel_avg_rms", & - "alpha_L", "alpha_R", "Ys_L", "Ys_R", "Xs_L", "Xs_R", & - "Gamma_iL", "Gamma_iR", "Cp_iL", "Cp_iR", "Yi_avg", & - "Phi_avg", "h_iL", "h_iR", "h_avg_2", "tau_e_L", & - "tau_e_R", "G_L", "G_R", "flux_ene_e", "xi_field_L", & - "xi_field_R", "pcorr", "zcoef", "vel_L_tmp", "vel_R_tmp"]) + $:GPU_PARALLEL_LOOP(collapse=3, private='[vel_L, vel_R, & + & vel_K_Star, Re_L, Re_R, rho_avg, h_avg, & + & gamma_avg, s_L, s_R, s_S, vel_avg_rms, & + & alpha_L, alpha_R, Ys_L, Ys_R, Xs_L, Xs_R, & + & Gamma_iL, Gamma_iR, Cp_iL, Cp_iR, Yi_avg, & + & Phi_avg, h_iL, h_iR, h_avg_2, tau_e_L, & + & tau_e_R, G_L, G_R, flux_ene_e, xi_field_L, & + & xi_field_R, pcorr, zcoef, vel_L_tmp, vel_R_tmp]') do l = is3%beg, is3%end do k = is2%beg, is2%end do j = is1%beg, is1%end @@ -1716,10 +1716,10 @@ contains elseif (model_eqns == 4) then !ME4 - $:GPU_PARALLEL_LOOP(collapse=3, private=["alpha_rho_L", & - "alpha_rho_R", "vel_L", "vel_R", "alpha_L", "alpha_R", & - "rho_avg", "h_avg", "gamma_avg", "s_L", "s_R", "s_S", & - "vel_avg_rms", "nbub_L", "nbub_R", "ptilde_L", "ptilde_R"]) + $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_rho_L, & + & alpha_rho_R, vel_L, vel_R, alpha_L, alpha_R, & + & rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, & + & vel_avg_rms, nbub_L, nbub_R, ptilde_L, ptilde_R]') do l = is3%beg, is3%end do k = is2%beg, is2%end do j = is1%beg, is1%end @@ -1969,12 +1969,12 @@ contains !$acc end parallel loop elseif (model_eqns == 2 .and. bubbles_euler) then - $:GPU_PARALLEL_LOOP(collapse=3, private=["R0_L", "R0_R", "V0_L", & - "V0_R", "P0_L", "P0_R", "pbw_L", "pbw_R", "vel_L", & - "vel_R", "rho_avg", "alpha_L", "alpha_R", "h_avg", & - "gamma_avg", "s_L", "s_R", "s_S", "nbub_L", "nbub_R", & - "ptilde_L", "ptilde_R", "vel_avg_rms", "Re_L", "Re_R", & - "pcorr", "zcoef", "vel_L_tmp", "vel_R_tmp"]) + $:GPU_PARALLEL_LOOP(collapse=3, private='[R0_L, R0_R, V0_L, & + & V0_R, P0_L, P0_R, pbw_L, pbw_R, vel_L, & + & vel_R, rho_avg, alpha_L, alpha_R, h_avg, & + & gamma_avg, s_L, s_R, s_S, nbub_L, nbub_R, & + & ptilde_L, ptilde_R, vel_avg_rms, Re_L, Re_R, & + & pcorr, zcoef, vel_L_tmp, vel_R_tmp]') do l = is3%beg, is3%end do k = is2%beg, is2%end do j = is1%beg, is1%end @@ -2440,15 +2440,15 @@ contains !$acc end parallel loop else ! 5-EQUATION MODEL WITH HLLC - $:GPU_PARALLEL_LOOP(collapse=3, private=["vel_L", "vel_R", & - "Re_L", "Re_R", "rho_avg", "h_avg", "gamma_avg", & - "alpha_L", "alpha_R", "s_L", "s_R", "s_S", & - "vel_avg_rms", "pcorr", "zcoef", "vel_L_tmp", & - "vel_R_tmp", "Ys_L", "Ys_R", "Xs_L", "Xs_R", & - "Gamma_iL", "Gamma_iR", "Cp_iL", "Cp_iR", "tau_e_L", & - "tau_e_R", "xi_field_L", "xi_field_R", "Yi_avg", & - "Phi_avg", "h_iL", "h_iR", "h_avg_2"], & - copyin=["is1", "is2", "is3"]) + $:GPU_PARALLEL_LOOP(collapse=3, private='[vel_L, vel_R, & + & Re_L, Re_R, rho_avg, h_avg, gamma_avg, & + & alpha_L, alpha_R, s_L, s_R, s_S, & + & vel_avg_rms, pcorr, zcoef, vel_L_tmp, & + & vel_R_tmp, Ys_L, Ys_R, Xs_L, Xs_R, & + & Gamma_iL, Gamma_iR, Cp_iL, Cp_iR, tau_e_L, & + & tau_e_R, xi_field_L, xi_field_R, Yi_avg, & + & Phi_avg, h_iL, h_iR, h_avg_2]', & + & copyin='[is1, is2, is3]') do l = is3%beg, is3%end do k = is2%beg, is2%end do j = is1%beg, is1%end @@ -3065,12 +3065,12 @@ contains #:for NORM_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (norm_dir == ${NORM_DIR}$) then - $:GPU_PARALLEL_LOOP(collapse=3, private=["alpha_rho_L", & - "alpha_rho_R", "vel", "alpha_L", "alpha_R", "rho", "pres", & - "E", "H_no_mag", "gamma", "pi_inf", "qv", "vel_rms", "B", & - "c", "c_fast", "pres_mag", "U_L", "U_R", "U_starL", & - "U_starR", "U_doubleL", "U_doubleR", "F_L", "F_R", & - "F_starL", "F_starR", "F_hlld"]) + $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_rho_L, & + & alpha_rho_R, vel, alpha_L, alpha_R, rho, pres, & + & E, H_no_mag, gamma, pi_inf, qv, vel_rms, B, & + & c, c_fast, pres_mag, U_L, U_R, U_starL, & + & U_starR, U_doubleL, U_doubleR, F_L, F_R, & + & F_starL, F_starR, F_hlld]') do l = is3%beg, is3%end do k = is2%beg, is2%end do j = is1%beg, is1%end @@ -3322,7 +3322,7 @@ contains do i = 1, num_fluids Gs(i) = fluid_pp(i)%G end do - $:GPU_UPDATE(device=["Gs"]) + $:GPU_UPDATE(device='[Gs]') if (viscous) then @:ALLOCATE(Res(1:2, 1:maxval(Re_size))) @@ -3334,10 +3334,10 @@ contains Res(i, j) = fluid_pp(Re_idx(i, j))%Re(i) end do end do - $:GPU_UPDATE(device=["Res","Re_idx","Re_size"]) + $:GPU_UPDATE(device='[Res,Re_idx,Re_size]') end if - $:GPU_ENTER_DATA(copyin=["is1","is2","is3","isx","isy","isz"]) + $:GPU_ENTER_DATA(copyin='[is1,is2,is3,isx,isy,isz]') is1%beg = -1; is2%beg = 0; is3%beg = 0 is1%end = m; is2%end = n; is3%end = p @@ -3480,7 +3480,7 @@ contains dir_idx = (/3, 1, 2/); dir_flg = (/0._wp, 0._wp, 1._wp/) end if - $:GPU_UPDATE(device=["is1","is2","is3"]) + $:GPU_UPDATE(device='[is1,is2,is3]') if (elasticity) then if (norm_dir == 1) then @@ -3494,9 +3494,9 @@ contains isx = ix; isy = iy; isz = iz ! for stuff in the same module - $:GPU_UPDATE(device=["isx","isy","isz"]) + $:GPU_UPDATE(device='[isx,isy,isz]') ! for stuff in different modules - $:GPU_UPDATE(device=["dir_idx","dir_flg","dir_idx_tau"]) + $:GPU_UPDATE(device='[dir_idx,dir_flg,dir_idx_tau]') ! Population of Buffers in x-direction if (norm_dir == 1) then @@ -3984,10 +3984,10 @@ contains integer :: i_vel !!< Loop iterator for velocity components. integer :: idx_rp(3) !!< Indices $(j,k,l)$ of 'right' point for averaging. - $:GPU_PARALLEL_LOOP(collapse=3, private=["idx_rp", "avg_v_int", & - "avg_dvdx_int", "avg_dvdy_int", "avg_dvdz_int", "Re_s", "Re_b", & - "vel_src_int", "r_eff", "divergence_cyl", "stress_vector_shear", & - "stress_normal_bulk", "div_v_term_const"]) + $:GPU_PARALLEL_LOOP(collapse=3, private='[idx_rp, avg_v_int, & + & avg_dvdx_int, avg_dvdy_int, avg_dvdz_int, Re_s, Re_b, & + & vel_src_int, r_eff, divergence_cyl, stress_vector_shear, & + & stress_normal_bulk, div_v_term_const]') do l = iz%beg, iz%end do k = iy%beg, iy%end do j = ix%beg, ix%end @@ -4147,9 +4147,9 @@ contains real(wp) :: divergence_v !< Velocity divergence at interface. - $:GPU_PARALLEL_LOOP(collapse=3, private=["idx_right_phys", "vel_grad_avg", & - "current_tau_shear", "current_tau_bulk", "vel_src_at_interface", & - "Re_shear", "Re_bulk", "divergence_v", "i_dim", "vel_comp_idx"]) + $:GPU_PARALLEL_LOOP(collapse=3, private='[idx_right_phys, vel_grad_avg, & + & current_tau_shear, current_tau_bulk, vel_src_at_interface, & + & Re_shear, Re_bulk, divergence_v, i_dim, vel_comp_idx]') do l_loop = isz%beg, isz%end do k_loop = isy%beg, isy%end do j_loop = isx%beg, isx%end diff --git a/src/simulation/m_sim_helpers.fpp b/src/simulation/m_sim_helpers.fpp index 9be0c31c2b..1fed473b65 100644 --- a/src/simulation/m_sim_helpers.fpp +++ b/src/simulation/m_sim_helpers.fpp @@ -276,17 +276,17 @@ contains bc_type(1, -1)%sf(:, :, :) = bc_x%beg bc_type(1, 1)%sf(:, :, :) = bc_x%end - $:GPU_UPDATE(device=["bc_type(1,-1)%sf","bc_type(1,1)%sf"]) + $:GPU_UPDATE(device='[bc_type(1,-1)%sf,bc_type(1,1)%sf]') if (n > 0) then bc_type(2, -1)%sf(:, :, :) = bc_y%beg bc_type(2, 1)%sf(:, :, :) = bc_y%end - $:GPU_UPDATE(device=["bc_type(2,-1)%sf","bc_type(2,1)%sf"]) + $:GPU_UPDATE(device='[bc_type(2,-1)%sf,bc_type(2,1)%sf]') if (p > 0) then bc_type(3, -1)%sf(:, :, :) = bc_z%beg bc_type(3, 1)%sf(:, :, :) = bc_z%end - $:GPU_UPDATE(device=["bc_type(3,-1)%sf","bc_type(3,1)%sf"]) + $:GPU_UPDATE(device='[bc_type(3,-1)%sf,bc_type(3,1)%sf]') end if end if diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index a89d882b82..5bf26ad81f 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -1263,12 +1263,12 @@ contains if (cfl_dt) then if ((mytime + dt) >= t_stop) then dt = t_stop - mytime - $:GPU_UPDATE(device=["dt"]) + $:GPU_UPDATE(device='[dt]') end if else if ((mytime + dt) >= finaltime) then dt = finaltime - mytime - $:GPU_UPDATE(device=["dt"]) + $:GPU_UPDATE(device='[dt]') end if end if @@ -1292,7 +1292,7 @@ contains if (probe_wrt) then do i = 1, sys_size - $:GPU_UPDATE(host=["q_cons_ts(1)%vf(i)%sf"]) + $:GPU_UPDATE(host='[q_cons_ts(1)%vf(i)%sf]') end do end if @@ -1395,7 +1395,7 @@ contains call cpu_time(start) call nvtxStartRange("SAVE-DATA") do i = 1, sys_size - $:GPU_UPDATE(host=["q_cons_ts(1)%vf(i)%sf"]) + $:GPU_UPDATE(host='[q_cons_ts(1)%vf(i)%sf]') do l = 0, p do k = 0, n do j = 0, m @@ -1409,8 +1409,8 @@ contains end do if (qbmm .and. .not. polytropic) then - $:GPU_UPDATE(host=["pb_ts(1)%sf"]) - $:GPU_UPDATE(host=["mv_ts(1)%sf"]) + $:GPU_UPDATE(host='[pb_ts(1)%sf]') + $:GPU_UPDATE(host='[mv_ts(1)%sf]') end if if (cfl_dt) then @@ -1420,16 +1420,16 @@ contains end if if (bubbles_lagrange) then - $:GPU_UPDATE(host=["intfc_rad"]) + $:GPU_UPDATE(host='[intfc_rad]') do i = 1, nBubs if (ieee_is_nan(intfc_rad(i, 1)) .or. intfc_rad(i, 1) <= 0._wp) then call s_mpi_abort("Bubble radius is negative or NaN, please reduce dt.") end if end do - $:GPU_UPDATE(host=["q_beta%vf(1)%sf"]) + $:GPU_UPDATE(host='[q_beta%vf(1)%sf]') call s_write_data_files(q_cons_ts(1)%vf, q_T_sf, q_prim_vf, save_count, q_beta%vf(1)) - $:GPU_UPDATE(host=["Rmax_stats","Rmin_stats","gas_p","gas_mv","intfc_vel"]) + $:GPU_UPDATE(host='[Rmax_stats,Rmin_stats,gas_p,gas_mv,intfc_vel]') call s_write_restart_lag_bubbles(save_count) !parallel if (lag_params%write_bubbles_stats) call s_write_lag_bubble_stats() else @@ -1626,44 +1626,44 @@ contains integer :: i !Update GPU DATA do i = 1, sys_size - $:GPU_UPDATE(device=["q_cons_ts(1)%vf(i)%sf"]) + $:GPU_UPDATE(device='[q_cons_ts(1)%vf(i)%sf]') end do if (qbmm .and. .not. polytropic) then - $:GPU_UPDATE(device=["pb_ts(1)%sf","mv_ts(1)%sf"]) + $:GPU_UPDATE(device='[pb_ts(1)%sf,mv_ts(1)%sf]') end if if (chemistry) then - $:GPU_UPDATE(device=["q_T_sf%sf"]) + $:GPU_UPDATE(device='[q_T_sf%sf]') end if - $:GPU_UPDATE(device=["nb","R0ref","Ca","Web","Re_inv","weight","R0","V0", & - & "bubbles_euler","polytropic","polydisperse","qbmm","R0_type", & - & "ptil","bubble_model","thermal","poly_sigma","adv_n","adap_dt", & - & "adap_dt_tol","n_idx","pi_fac","low_Mach"]) - $:GPU_UPDATE(device=["R_n","R_v","phi_vn","phi_nv","Pe_c","Tw","pv","M_n", & - & "M_v","k_n","k_v","pb0","mass_n0","mass_v0","Pe_T","Re_trans_T", & - & "Re_trans_c","Im_trans_T","Im_trans_c","omegaN","mul0","ss", & - & "gamma_v","mu_v","gamma_m","gamma_n","mu_n","gam"]) + $:GPU_UPDATE(device='[nb,R0ref,Ca,Web,Re_inv,weight,R0,V0, & + & bubbles_euler,polytropic,polydisperse,qbmm,R0_type, & + & ptil,bubble_model,thermal,poly_sigma,adv_n,adap_dt, & + & adap_dt_tol,n_idx,pi_fac,low_Mach]') + $:GPU_UPDATE(device='[R_n,R_v,phi_vn,phi_nv,Pe_c,Tw,pv,M_n, & + & M_v,k_n,k_v,pb0,mass_n0,mass_v0,Pe_T,Re_trans_T, & + & Re_trans_c,Im_trans_T,Im_trans_c,omegaN,mul0,ss, & + & gamma_v,mu_v,gamma_m,gamma_n,mu_n,gam]') - $:GPU_UPDATE(device=["acoustic_source", "num_source"]) - $:GPU_UPDATE(device=["sigma", "surface_tension"]) + $:GPU_UPDATE(device='[acoustic_source, num_source]') + $:GPU_UPDATE(device='[sigma, surface_tension]') - $:GPU_UPDATE(device=["dx","dy","dz","x_cb","x_cc","y_cb","y_cc","z_cb","z_cc"]) + $:GPU_UPDATE(device='[dx,dy,dz,x_cb,x_cc,y_cb,y_cc,z_cb,z_cc]') - $:GPU_UPDATE(device=["bc_x%vb1","bc_x%vb2","bc_x%vb3","bc_x%ve1","bc_x%ve2","bc_x%ve3"]) - $:GPU_UPDATE(device=["bc_y%vb1","bc_y%vb2","bc_y%vb3","bc_y%ve1","bc_y%ve2","bc_y%ve3"]) - $:GPU_UPDATE(device=["bc_z%vb1","bc_z%vb2","bc_z%vb3","bc_z%ve1","bc_z%ve2","bc_z%ve3"]) + $:GPU_UPDATE(device='[bc_x%vb1,bc_x%vb2,bc_x%vb3,bc_x%ve1,bc_x%ve2,bc_x%ve3]') + $:GPU_UPDATE(device='[bc_y%vb1,bc_y%vb2,bc_y%vb3,bc_y%ve1,bc_y%ve2,bc_y%ve3]') + $:GPU_UPDATE(device='[bc_z%vb1,bc_z%vb2,bc_z%vb3,bc_z%ve1,bc_z%ve2,bc_z%ve3]') - $:GPU_UPDATE(device=["bc_x%grcbc_in","bc_x%grcbc_out","bc_x%grcbc_vel_out"]) - $:GPU_UPDATE(device=["bc_y%grcbc_in","bc_y%grcbc_out","bc_y%grcbc_vel_out"]) - $:GPU_UPDATE(device=["bc_z%grcbc_in","bc_z%grcbc_out","bc_z%grcbc_vel_out"]) + $:GPU_UPDATE(device='[bc_x%grcbc_in,bc_x%grcbc_out,bc_x%grcbc_vel_out]') + $:GPU_UPDATE(device='[bc_y%grcbc_in,bc_y%grcbc_out,bc_y%grcbc_vel_out]') + $:GPU_UPDATE(device='[bc_z%grcbc_in,bc_z%grcbc_out,bc_z%grcbc_vel_out]') - $:GPU_UPDATE(device=["relax", "relax_model"]) + $:GPU_UPDATE(device='[relax, relax_model]') if (relax) then - $:GPU_UPDATE(device=["palpha_eps", "ptgalpha_eps"]) + $:GPU_UPDATE(device='[palpha_eps, ptgalpha_eps]') end if if (ib) then - $:GPU_UPDATE(device=["ib_markers%sf"]) + $:GPU_UPDATE(device='[ib_markers%sf]') end if end subroutine s_initialize_gpu_vars diff --git a/src/simulation/m_surface_tension.fpp b/src/simulation/m_surface_tension.fpp index f6181237ee..fd84e50664 100644 --- a/src/simulation/m_surface_tension.fpp +++ b/src/simulation/m_surface_tension.fpp @@ -29,16 +29,16 @@ module m_surface_tension !> @{ type(scalar_field), allocatable, dimension(:) :: c_divs !> @) - $:GPU_DECLARE(create=["c_divs"]) + $:GPU_DECLARE(create='[c_divs]') !> @name cell boundary reconstructed gradient components and magnitude !> @{ real(wp), allocatable, dimension(:, :, :, :) :: gL_x, gR_x, gL_y, gR_y, gL_z, gR_z !> @} - $:GPU_DECLARE(create=["gL_x","gR_x","gL_y","gR_y","gL_z","gR_z"]) + $:GPU_DECLARE(create='[gL_x,gR_x,gL_y,gR_y,gL_z,gR_z]') type(int_bounds_info) :: is1, is2, is3, iv - $:GPU_DECLARE(create=["is1","is2","is3","iv"]) + $:GPU_DECLARE(create='[is1,is2,is3,iv]') contains @@ -85,9 +85,9 @@ contains integer :: j, k, l, i if (id == 1) then - $:GPU_PARALLEL_LOOP(collapse=3, private=["Omega", "w1L", "w2L", "w3L", & - "w1R", "w2R", "w3R", "w1", "w2", "w3", "normWL", & - "normWR", "normW"]) + $:GPU_PARALLEL_LOOP(collapse=3, private='[Omega, w1L, w2L, w3L, & + & w1R, w2R, w3R, w1, w2, w3, normWL, & + & normWR, normW]') do l = isz%beg, isz%end do k = isy%beg, isy%end do j = isx%beg, isx%end @@ -132,9 +132,9 @@ contains elseif (id == 2) then - $:GPU_PARALLEL_LOOP(collapse=3, private=["Omega", "w1L", "w2L", "w3L", & - "w1R", "w2R", "w3R", "w1", "w2", "w3", "normWL", "normWR", & - "normW"]) + $:GPU_PARALLEL_LOOP(collapse=3, private='[Omega, w1L, w2L, w3L, & + & w1R, w2R, w3R, w1, w2, w3, normWL, normWR, & + & normW]') do l = isz%beg, isz%end do k = isy%beg, isy%end do j = isx%beg, isx%end @@ -179,9 +179,9 @@ contains elseif (id == 3) then - $:GPU_PARALLEL_LOOP(collapse=3, private=["Omega", "w1L", "w2L", "w3L", & - "w1R", "w2R", "w3R", "w1", "w2", "w3", "normWL", "normWR", & - "normW"]) + $:GPU_PARALLEL_LOOP(collapse=3, private='[Omega, w1L, w2L, w3L, & + & w1R, w2R, w3R, w1, w2, w3, normWL, normWR, & + & normW]') do l = isz%beg, isz%end do k = isy%beg, isy%end do j = isx%beg, isx%end @@ -335,7 +335,7 @@ contains end if - $:GPU_UPDATE(device=["is1","is2","is3","iv"]) + $:GPU_UPDATE(device='[is1,is2,is3,iv]') if (recon_dir == 1) then $:GPU_PARALLEL_LOOP(collapse=4) diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 658f5abce8..570552a405 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -77,7 +77,7 @@ module m_time_steppers integer, private :: num_ts !< !! Number of time stages in the time-stepping scheme - $:GPU_DECLARE(create=["q_cons_ts","q_prim_vf","q_T_sf","rhs_vf","q_prim_ts","rhs_mv","rhs_pb","max_dt"]) + $:GPU_DECLARE(create='[q_cons_ts,q_prim_vf,q_T_sf,rhs_vf,q_prim_ts,rhs_mv,rhs_pb,max_dt]') contains @@ -948,7 +948,7 @@ contains if (stage == 3) then if (lag_params%write_bubbles_stats) call s_calculate_lag_bubble_stats() if (lag_params%write_bubbles) then - $:GPU_UPDATE(host=["gas_p","gas_mv","intfc_rad","intfc_vel"]) + $:GPU_UPDATE(host='[gas_p,gas_mv,intfc_rad,intfc_vel]') call s_write_lag_particles(mytime) end if call s_write_void_evol(mytime) @@ -981,7 +981,7 @@ contains q_prim_vf, & idwint) - $:GPU_PARALLEL_LOOP(collapse=3, private=["vel", "alpha", "Re"]) + $:GPU_PARALLEL_LOOP(collapse=3, private='[vel, alpha, Re]') do l = 0, p do k = 0, n do j = 0, m @@ -1005,7 +1005,7 @@ contains call s_mpi_allreduce_min(dt_local, dt) end if - $:GPU_UPDATE(device=["dt"]) + $:GPU_UPDATE(device='[dt]') end subroutine s_compute_dt @@ -1050,7 +1050,7 @@ contains integer :: i !< Generic loop iterator do i = 1, sys_size - $:GPU_UPDATE(host=["q_prim_vf(i)%sf"]) + $:GPU_UPDATE(host='[q_prim_vf(i)%sf]') end do if (t_step == t_step_start) then diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp index da65b0d0a0..d65c9bb029 100644 --- a/src/simulation/m_viscous.fpp +++ b/src/simulation/m_viscous.fpp @@ -24,10 +24,10 @@ module m_viscous type(int_bounds_info) :: iv type(int_bounds_info) :: is1_viscous, is2_viscous, is3_viscous - $:GPU_DECLARE(create=["is1_viscous","is2_viscous","is3_viscous","iv"]) + $:GPU_DECLARE(create='[is1_viscous,is2_viscous,is3_viscous,iv]') real(wp), allocatable, dimension(:, :) :: Res_viscous - $:GPU_DECLARE(create=["Res_viscous"]) + $:GPU_DECLARE(create='[Res_viscous]') contains @@ -42,8 +42,8 @@ contains Res_viscous(i, j) = fluid_pp(Re_idx(i, j))%Re(i) end do end do - $:GPU_UPDATE(device=["Res_viscous","Re_idx","Re_size"]) - $:GPU_ENTER_DATA(copyin=["is1_viscous","is2_viscous","is3_viscous","iv"]) + $:GPU_UPDATE(device='[Res_viscous,Re_idx,Re_size]') + $:GPU_ENTER_DATA(copyin='[is1_viscous,is2_viscous,is3_viscous,iv]') end subroutine s_initialize_viscous_module @@ -75,7 +75,7 @@ contains is1_viscous = ix; is2_viscous = iy; is3_viscous = iz - $:GPU_UPDATE(device=["is1_viscous","is2_viscous","is3_viscous"]) + $:GPU_UPDATE(device='[is1_viscous,is2_viscous,is3_viscous]') $:GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end @@ -89,8 +89,8 @@ contains end do end do if (shear_stress) then ! Shear stresses - $:GPU_PARALLEL_LOOP(collapse=3, private=["alpha_visc", & - "alpha_rho_visc", "Re_visc", "tau_Re"]) + $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_visc, & + & alpha_rho_visc, Re_visc, tau_Re]') do l = is3_viscous%beg, is3_viscous%end do k = -1, 1 do j = is1_viscous%beg, is1_viscous%end @@ -197,8 +197,8 @@ contains end if if (bulk_stress) then ! Bulk stresses - $:GPU_PARALLEL_LOOP(collapse=3, private=["alpha_visc", & - "alpha_rho_visc", "Re_visc", "tau_Re"]) + $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_visc, & + & alpha_rho_visc, Re_visc, tau_Re]') do l = is3_viscous%beg, is3_viscous%end do k = -1, 1 do j = is1_viscous%beg, is1_viscous%end @@ -302,8 +302,8 @@ contains if (p == 0) return if (shear_stress) then ! Shear stresses - $:GPU_PARALLEL_LOOP(collapse=3, private=["alpha_visc", & - "alpha_rho_visc", "Re_visc", "tau_Re"]) + $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_visc, & + & alpha_rho_visc, Re_visc, tau_Re]') do l = is3_viscous%beg, is3_viscous%end do k = -1, 1 do j = is1_viscous%beg, is1_viscous%end @@ -411,8 +411,8 @@ contains end if if (bulk_stress) then ! Bulk stresses - $:GPU_PARALLEL_LOOP(collapse=3, private=["alpha_visc", & - "alpha_rho_visc", "Re_visc", "tau_Re"]) + $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_visc, & + & alpha_rho_visc, Re_visc, tau_Re]') do l = is3_viscous%beg, is3_viscous%end do k = -1, 1 do j = is1_viscous%beg, is1_viscous%end @@ -549,7 +549,7 @@ contains iv%beg = mom_idx%beg; iv%end = mom_idx%end - $:GPU_UPDATE(device=["iv"]) + $:GPU_UPDATE(device='[iv]') call s_reconstruct_cell_boundary_values_visc( & q_prim_qp%vf(iv%beg:iv%end), & @@ -587,11 +587,11 @@ contains else ! Compute velocity gradient at cell centers using finite differences iv%beg = mom_idx%beg; iv%end = mom_idx%end - $:GPU_UPDATE(device=["iv"]) + $:GPU_UPDATE(device='[iv]') is1_viscous = ix; is2_viscous = iy; is3_viscous = iz - $:GPU_UPDATE(device=["is1_viscous","is2_viscous","is3_viscous"]) + $:GPU_UPDATE(device='[is1_viscous,is2_viscous,is3_viscous]') $:GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end @@ -1319,7 +1319,7 @@ contains is1_viscous = ix; is2_viscous = iy; is3_viscous = iz - $:GPU_UPDATE(device=["is1_viscous","is2_viscous","is3_viscous"]) + $:GPU_UPDATE(device='[is1_viscous,is2_viscous,is3_viscous]') $:GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end diff --git a/src/simulation/m_weno.fpp b/src/simulation/m_weno.fpp index d965efa377..ce5d5bf82c 100644 --- a/src/simulation/m_weno.fpp +++ b/src/simulation/m_weno.fpp @@ -42,7 +42,7 @@ module m_weno !> @{ real(wp), allocatable, dimension(:, :, :, :) :: v_rs_ws_x, v_rs_ws_y, v_rs_ws_z !> @} - $:GPU_DECLARE(create=["v_rs_ws_x","v_rs_ws_y","v_rs_ws_z"]) + $:GPU_DECLARE(create='[v_rs_ws_x,v_rs_ws_y,v_rs_ws_z]') ! WENO Coefficients @@ -59,8 +59,8 @@ module m_weno real(wp), target, allocatable, dimension(:, :, :) :: poly_coef_cbR_y real(wp), target, allocatable, dimension(:, :, :) :: poly_coef_cbR_z !> @} - $:GPU_DECLARE(create=["poly_coef_cbL_x","poly_coef_cbL_y","poly_coef_cbL_z"]) - $:GPU_DECLARE(create=["poly_coef_cbR_x","poly_coef_cbR_y","poly_coef_cbR_z"]) + $:GPU_DECLARE(create='[poly_coef_cbL_x,poly_coef_cbL_y,poly_coef_cbL_z]') + $:GPU_DECLARE(create='[poly_coef_cbR_x,poly_coef_cbR_y,poly_coef_cbR_z]') !> @name The ideal weights at the left and the right cell-boundaries and at the !! left and the right quadrature points, in x-, y- and z-directions. Note @@ -75,7 +75,7 @@ module m_weno real(wp), target, allocatable, dimension(:, :) :: d_cbR_y real(wp), target, allocatable, dimension(:, :) :: d_cbR_z !> @} - $:GPU_DECLARE(create=["d_cbL_x","d_cbL_y","d_cbL_z","d_cbR_x","d_cbR_y","d_cbR_z"]) + $:GPU_DECLARE(create='[d_cbL_x,d_cbL_y,d_cbL_z,d_cbR_x,d_cbR_y,d_cbR_z]') !> @name Smoothness indicator coefficients in the x-, y-, and z-directions. Note !! that the first array dimension identifies the smoothness indicator, the @@ -86,22 +86,22 @@ module m_weno real(wp), target, allocatable, dimension(:, :, :) :: beta_coef_y real(wp), target, allocatable, dimension(:, :, :) :: beta_coef_z !> @} - $:GPU_DECLARE(create=["beta_coef_x","beta_coef_y","beta_coef_z"]) + $:GPU_DECLARE(create='[beta_coef_x,beta_coef_y,beta_coef_z]') ! END: WENO Coefficients integer :: v_size !< Number of WENO-reconstructed cell-average variables - $:GPU_DECLARE(create=["v_size"]) + $:GPU_DECLARE(create='[v_size]') !> @name Indical bounds in the s1-, s2- and s3-directions !> @{ type(int_bounds_info) :: is1_weno, is2_weno, is3_weno - $:GPU_DECLARE(create=["is1_weno","is2_weno","is3_weno"]) + $:GPU_DECLARE(create='[is1_weno,is2_weno,is3_weno]') ! !> @} real(wp) :: test - $:GPU_DECLARE(create=["test"]) + $:GPU_DECLARE(create='[test]') contains @@ -624,11 +624,11 @@ contains #:endfor if (weno_dir == 1) then - $:GPU_UPDATE(device=["poly_coef_cbL_x","poly_coef_cbR_x","d_cbL_x","d_cbR_x","beta_coef_x"]) + $:GPU_UPDATE(device='[poly_coef_cbL_x,poly_coef_cbR_x,d_cbL_x,d_cbR_x,beta_coef_x]') elseif (weno_dir == 2) then - $:GPU_UPDATE(device=["poly_coef_cbL_y","poly_coef_cbR_y","d_cbL_y","d_cbR_y","beta_coef_y"]) + $:GPU_UPDATE(device='[poly_coef_cbL_y,poly_coef_cbR_y,d_cbL_y,d_cbR_y,beta_coef_y]') else - $:GPU_UPDATE(device=["poly_coef_cbL_z","poly_coef_cbR_z","d_cbL_z","d_cbR_z","beta_coef_z"]) + $:GPU_UPDATE(device='[poly_coef_cbL_z,poly_coef_cbR_z,d_cbL_z,d_cbR_z,beta_coef_z]') end if ! Nullifying WENO coefficients and cell-boundary locations pointers @@ -662,7 +662,7 @@ contains is2_weno = is2_weno_d is3_weno = is3_weno_d - $:GPU_UPDATE(device=["is1_weno","is2_weno","is3_weno"]) + $:GPU_UPDATE(device='[is1_weno,is2_weno,is3_weno]') if (weno_order /= 1) then call s_initialize_weno(v_vf, & @@ -713,7 +713,7 @@ contains elseif (weno_order == 3) then #:for WENO_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (weno_dir == ${WENO_DIR}$) then - $:GPU_PARALLEL_LOOP(collapse=4,private=["beta","dvd","poly","omega","alpha","tau"]) + $:GPU_PARALLEL_LOOP(collapse=4,private='[beta,dvd,poly,omega,alpha,tau]') do l = is3_weno%beg, is3_weno%end do k = is2_weno%beg, is2_weno%end do j = is1_weno%beg, is1_weno%end @@ -792,7 +792,7 @@ contains elseif (weno_order == 5) then #:for WENO_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (weno_dir == ${WENO_DIR}$) then - $:GPU_PARALLEL_LOOP(collapse=3,private=["dvd","poly","beta","alpha","omega","tau","delta"]) + $:GPU_PARALLEL_LOOP(collapse=3,private='[dvd,poly,beta,alpha,omega,tau,delta]') do l = is3_weno%beg, is3_weno%end do k = is2_weno%beg, is2_weno%end do j = is1_weno%beg, is1_weno%end @@ -912,7 +912,7 @@ contains elseif (weno_order == 7) then #:for WENO_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (weno_dir == ${WENO_DIR}$) then - $:GPU_PARALLEL_LOOP(collapse=3,private=["poly","beta","alpha","omega","tau","delta","dvd","v"]) + $:GPU_PARALLEL_LOOP(collapse=3,private='[poly,beta,alpha,omega,tau,delta,dvd,v]') do l = is3_weno%beg, is3_weno%end do k = is2_weno%beg, is2_weno%end do j = is1_weno%beg, is1_weno%end @@ -1132,7 +1132,7 @@ contains ! as to reshape the inputted data in the coordinate direction of ! the WENO reconstruction v_size = ubound(v_vf, 1) - $:GPU_UPDATE(device=["v_size"]) + $:GPU_UPDATE(device='[v_size]') if (weno_dir == 1) then $:GPU_PARALLEL_LOOP(collapse=4) @@ -1266,7 +1266,7 @@ contains real(wp), parameter :: alpha_mp = 2._wp real(wp), parameter :: beta_mp = 4._wp/3._wp - $:GPU_PARALLEL_LOOP(collapse=4,private=["d"]) + $:GPU_PARALLEL_LOOP(collapse=4,private='[d]') do l = is3_weno%beg, is3_weno%end do k = is2_weno%beg, is2_weno%end do j = is1_weno%beg, is1_weno%end From ca8f970ee857c35f109ab0d68106b9d07f5d48c9 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Wed, 18 Jun 2025 17:35:45 -0400 Subject: [PATCH 50/75] Fixed gpu loop defaults and changed some asserts --- src/common/include/parallel_macros.fpp | 6 +++--- src/common/m_phase_change.fpp | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/common/include/parallel_macros.fpp b/src/common/include/parallel_macros.fpp index d34b32065f..37fc16c4a7 100644 --- a/src/common/include/parallel_macros.fpp +++ b/src/common/include/parallel_macros.fpp @@ -20,8 +20,8 @@ #:elif c == ')' #:set count = count - 1 #:endif - #:if count > 1 - #:stop 'Nested parentheses is not supported. Incorrect clause: {}'.format(clause_str) + #:if c == ',' and count > 1 + #:stop 'Nested parentheses with comma inside is not supported. Incorrect clause: {}'.format(clause_str) #:elif count < 0 #:stop 'Missing parentheses. Incorrect clause: {}'.format(clause_str) #:endif @@ -289,7 +289,7 @@ $:acc_directive #:enddef -#:def GPU_LOOP(collapse=None, parallelism=None, data_dependency=None, reduction=None, reductionOp=None, private=None, extraAccArgs=None) +#:def GPU_LOOP(collapse=None, parallelism=['seq'], data_dependency=None, reduction=None, reductionOp=None, private=None, extraAccArgs=None) #:set collapse_val = GEN_COLLAPSE_STR(collapse) diff --git a/src/common/m_phase_change.fpp b/src/common/m_phase_change.fpp index 27c25e09e5..60710eae7e 100644 --- a/src/common/m_phase_change.fpp +++ b/src/common/m_phase_change.fpp @@ -100,9 +100,9 @@ contains ! starting equilibrium solver $:GPU_PARALLEL_LOOP(collapse=3, private='[p_infOV, p_infpT, p_infSL, & - sk, hk, gk, ek, rhok,pS, pSOV, pSSL, & - TS, TSOV, TSatOV, TSatSL, TSSL, rhoe, & - dynE, rhos, rho, rM, m1, m2, MCT, TvF]') + & sk, hk, gk, ek, rhok,pS, pSOV, pSSL, & + & TS, TSOV, TSatOV, TSatSL, TSSL, rhoe, & + & dynE, rhos, rho, rM, m1, m2, MCT, TvF]') do j = 0, m do k = 0, n do l = 0, p From ba189e99219aaff27d76778e7b58bd462526e0d9 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Wed, 18 Jun 2025 17:47:12 -0400 Subject: [PATCH 51/75] Switched parallelism from python list to string list --- src/common/include/parallel_macros.fpp | 15 ++++++----- src/common/m_boundary_common.fpp | 22 ++++++++-------- src/common/m_helper.fpp | 4 +-- src/common/m_helper_basic.fpp | 6 ++--- src/common/m_phase_change.fpp | 12 ++++----- src/common/m_variables_conversion.fpp | 10 ++++---- src/pre_process/m_assign_variables.fpp | 4 +-- src/pre_process/m_patches.fpp | 8 +++--- src/simulation/m_acoustic_src.fpp | 6 ++--- src/simulation/m_bubbles.fpp | 34 ++++++++++++------------- src/simulation/m_bubbles_EL.fpp | 4 +-- src/simulation/m_bubbles_EL_kernels.fpp | 12 ++++----- src/simulation/m_compute_cbc.fpp | 16 ++++++------ src/simulation/m_hyperelastic.fpp | 4 +-- src/simulation/m_ibm.fpp | 2 +- src/simulation/m_qbmm.fpp | 14 +++++----- src/simulation/m_riemann_solvers.fpp | 4 +-- src/simulation/m_sim_helpers.fpp | 6 ++--- 18 files changed, 93 insertions(+), 90 deletions(-) diff --git a/src/common/include/parallel_macros.fpp b/src/common/include/parallel_macros.fpp index 37fc16c4a7..6bfc432a0f 100644 --- a/src/common/include/parallel_macros.fpp +++ b/src/common/include/parallel_macros.fpp @@ -118,10 +118,13 @@ #:def GEN_PARALLELISM_STR(parallelism) #:if parallelism is not None - $:ASSERT_LIST(parallelism, str) + #:assert isinstance(parallelism, str) + #:assert parallelism[0] == '[' and parallelism[-1] == ']' + #:set parallelism_list = [x.strip() for x in parallelism.strip('[]').split(',')] + $:ASSERT_LIST(parallelism_list, str) #:assert all((element == 'gang' or element == 'worker' or & - & element == 'vector' or element == 'seq') for element in parallelism) - #:set parallelism_val = ' '.join(parallelism) + ' ' + & element == 'vector' or element == 'seq') for element in parallelism_list) + #:set parallelism_val = ' '.join(parallelism_list) + ' ' #:else #:set parallelism_val = '' #:endif @@ -191,7 +194,7 @@ #:enddef -#:def GPU_PARALLEL_LOOP(collapse=None, private=None, parallelism=['gang', 'vector'], & +#:def GPU_PARALLEL_LOOP(collapse=None, private=None, parallelism='[gang, vector]', & & default='present', firstprivate=None, reduction=None, reductionOp=None, & & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, & & no_create=None, present=None, deviceptr=None, attach=None, extraAccArgs=None) @@ -235,7 +238,7 @@ $:acc_directive #:enddef -#:def GPU_ROUTINE(function_name=None, parallelism=['seq'], nohost=False, cray_inline=False, extraAccArgs=None) +#:def GPU_ROUTINE(function_name=None, parallelism='[seq]', nohost=False, cray_inline=False, extraAccArgs=None) #:assert isinstance(cray_inline, bool) #:set parallelism_val = GEN_PARALLELISM_STR(parallelism) @@ -289,7 +292,7 @@ $:acc_directive #:enddef -#:def GPU_LOOP(collapse=None, parallelism=['seq'], data_dependency=None, reduction=None, reductionOp=None, private=None, extraAccArgs=None) +#:def GPU_LOOP(collapse=None, parallelism='[seq]', data_dependency=None, reduction=None, reductionOp=None, private=None, extraAccArgs=None) #:set collapse_val = GEN_COLLAPSE_STR(collapse) diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index eb08f05b39..79e992be22 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -276,7 +276,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_ghost_cell_extrapolation #else - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') #endif type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf integer, intent(in) :: bc_dir, bc_loc @@ -337,7 +337,7 @@ contains end subroutine s_ghost_cell_extrapolation pure subroutine s_symmetry(q_prim_vf, bc_dir, bc_loc, k, l, pb, mv) - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf real(wp), optional, dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv integer, intent(in) :: bc_dir, bc_loc @@ -597,7 +597,7 @@ contains end subroutine s_symmetry pure subroutine s_periodic(q_prim_vf, bc_dir, bc_loc, k, l, pb, mv) - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf real(wp), optional, dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv integer, intent(in) :: bc_dir, bc_loc @@ -736,7 +736,7 @@ contains end subroutine s_periodic pure subroutine s_axis(q_prim_vf, pb, mv, k, l) - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv integer, intent(in) :: k, l @@ -798,7 +798,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_slip_wall #else - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') #endif type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf integer, intent(in) :: bc_dir, bc_loc @@ -892,7 +892,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_no_slip_wall #else - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') #endif type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf integer, intent(in) :: bc_dir, bc_loc @@ -1022,7 +1022,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_dirichlet #else - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') #endif type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf integer, intent(in) :: bc_dir, bc_loc @@ -1087,7 +1087,7 @@ contains end subroutine s_dirichlet pure subroutine s_qbmm_extrapolation(bc_dir, bc_loc, k, l, pb, mv) - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') real(wp), optional, dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv integer, intent(in) :: bc_dir, bc_loc integer, intent(in) :: k, l @@ -1285,7 +1285,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_color_function_periodic #else - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') #endif type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs integer, intent(in) :: bc_dir, bc_loc @@ -1343,7 +1343,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_color_function_reflective #else - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') #endif type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs integer, intent(in) :: bc_dir, bc_loc @@ -1425,7 +1425,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_color_function_ghost_cell_extrapolation #else - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') #endif type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs integer, intent(in) :: bc_dir, bc_loc diff --git a/src/common/m_helper.fpp b/src/common/m_helper.fpp index 3554d8f2ef..f222e24d50 100644 --- a/src/common/m_helper.fpp +++ b/src/common/m_helper.fpp @@ -44,7 +44,7 @@ contains !! @param Rtmp is the bubble radii !! @param ntmp is the output number bubble density pure subroutine s_comp_n_from_prim(vftmp, Rtmp, ntmp, weights) - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: vftmp real(wp), dimension(nb), intent(in) :: Rtmp real(wp), intent(out) :: ntmp @@ -58,7 +58,7 @@ contains end subroutine s_comp_n_from_prim pure subroutine s_comp_n_from_cons(vftmp, nRtmp, ntmp, weights) - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: vftmp real(wp), dimension(nb), intent(in) :: nRtmp real(wp), intent(out) :: ntmp diff --git a/src/common/m_helper_basic.fpp b/src/common/m_helper_basic.fpp index 35300ff3a6..7a624dfbb4 100644 --- a/src/common/m_helper_basic.fpp +++ b/src/common/m_helper_basic.fpp @@ -26,7 +26,7 @@ contains !! @param tol_input Relative error (default = 1e-10_wp). !! @return Result of the comparison. logical pure elemental function f_approx_equal(a, b, tol_input) result(res) - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: a, b real(wp), optional, intent(in) :: tol_input real(wp) :: tol @@ -78,7 +78,7 @@ contains !> Checks if a real(wp) variable is of default value. !! @param var Variable to check. logical pure elemental function f_is_default(var) result(res) - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: var res = f_approx_equal(var, dflt_real) @@ -103,7 +103,7 @@ contains !> Checks if a real(wp) variable is an integer. !! @param var Variable to check. logical pure elemental function f_is_integer(var) result(res) - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: var res = f_approx_equal(var, real(nint(var), wp)) diff --git a/src/common/m_phase_change.fpp b/src/common/m_phase_change.fpp index 60710eae7e..884fb8bc96 100644 --- a/src/common/m_phase_change.fpp +++ b/src/common/m_phase_change.fpp @@ -291,7 +291,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_infinite_pt_relaxation_k #else - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') #endif ! initializing variables @@ -395,7 +395,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_infinite_ptg_relaxation_k #else - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') #endif integer, intent(in) :: j, k, l @@ -520,7 +520,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_correct_partial_densities #else - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') #endif !> @name variables for the correction of the reacting partial densities @@ -583,7 +583,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_jacobian_matrix #else - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') #endif real(wp), dimension(2, 2), intent(out) :: InvJac @@ -690,7 +690,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_pTg_residue #else - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') #endif integer, intent(in) :: j, k, l @@ -741,7 +741,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_TSat #else - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') #endif real(wp), intent(in) :: pSat diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp index 6c7ce05828..f861738684 100644 --- a/src/common/m_variables_conversion.fpp +++ b/src/common/m_variables_conversion.fpp @@ -120,7 +120,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_pressure #else - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') #endif real(wp), intent(in) :: energy, alf @@ -461,7 +461,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_convert_species_to_mixture_variables_acc #else - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') #endif real(wp), intent(out) :: rho_K, gamma_K, pi_inf_K, qv_K @@ -542,7 +542,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_convert_species_to_mixture_variables_bubbles_acc #else - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') #endif real(wp), intent(inout) :: rho_K, gamma_K, pi_inf_K, qv_K @@ -1607,7 +1607,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_speed_of_sound #else - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') #endif real(wp), intent(in) :: pres @@ -1677,7 +1677,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_fast_magnetosonic_speed #else - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') #endif real(wp), intent(in) :: B(3), rho, c diff --git a/src/pre_process/m_assign_variables.fpp b/src/pre_process/m_assign_variables.fpp index b814265a6e..7bc91f1479 100644 --- a/src/pre_process/m_assign_variables.fpp +++ b/src/pre_process/m_assign_variables.fpp @@ -104,7 +104,7 @@ contains !! @param patch_id_fp Array to track patch ids pure subroutine s_assign_patch_mixture_primitive_variables(patch_id, j, k, l, & eta, q_prim_vf, patch_id_fp) - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') integer, intent(in) :: patch_id integer, intent(in) :: j, k, l @@ -277,7 +277,7 @@ contains !! @param patch_id_fp Array to track patch ids impure subroutine s_assign_patch_species_primitive_variables(patch_id, j, k, l, & eta, q_prim_vf, patch_id_fp) - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') integer, intent(in) :: patch_id integer, intent(in) :: j, k, l diff --git a/src/pre_process/m_patches.fpp b/src/pre_process/m_patches.fpp index 13455a3985..cd043b8f09 100644 --- a/src/pre_process/m_patches.fpp +++ b/src/pre_process/m_patches.fpp @@ -2378,7 +2378,7 @@ contains end subroutine s_model subroutine s_convert_cylindrical_to_cartesian_coord(cyl_y, cyl_z) - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: cyl_y, cyl_z @@ -2389,7 +2389,7 @@ contains pure function f_convert_cyl_to_cart(cyl) result(cart) - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') t_vec3, intent(in) :: cyl t_vec3 :: cart @@ -2401,7 +2401,7 @@ contains end function f_convert_cyl_to_cart subroutine s_convert_cylindrical_to_spherical_coord(cyl_x, cyl_y) - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(IN) :: cyl_x, cyl_y @@ -2414,7 +2414,7 @@ contains !! @param offset Thickness !! @param a Starting position pure elemental function f_r(myth, offset, a) - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: myth, offset, a real(wp) :: b real(wp) :: f_r diff --git a/src/simulation/m_acoustic_src.fpp b/src/simulation/m_acoustic_src.fpp index c604d2bb35..fac80b8032 100644 --- a/src/simulation/m_acoustic_src.fpp +++ b/src/simulation/m_acoustic_src.fpp @@ -346,7 +346,7 @@ contains !! @param gauss_sigma_time_local sigma in time for Gaussian pulse !! @param source Source term amplitude pure elemental subroutine s_source_temporal(sim_time, c, ai, term_index, frequency_local, gauss_sigma_time_local, source, sum_BB) - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') integer, intent(in) :: ai, term_index real(wp), intent(in) :: sim_time, c, sum_BB real(wp), intent(in) :: frequency_local, gauss_sigma_time_local @@ -698,7 +698,7 @@ contains !! @param c Speed of sound !! @return frequency_local Converted frequency pure elemental function f_frequency_local(freq_conv_flag, ai, c) - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') logical, intent(in) :: freq_conv_flag integer, intent(in) :: ai real(wp), intent(in) :: c @@ -717,7 +717,7 @@ contains !! @param ai Acoustic source index !! @return gauss_sigma_time_local Converted Gaussian sigma time pure elemental function f_gauss_sigma_time_local(gauss_conv_flag, ai, c) - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') logical, intent(in) :: gauss_conv_flag integer, intent(in) :: ai real(wp), intent(in) :: c diff --git a/src/simulation/m_bubbles.fpp b/src/simulation/m_bubbles.fpp index c7b05ce116..7cf1c80f11 100644 --- a/src/simulation/m_bubbles.fpp +++ b/src/simulation/m_bubbles.fpp @@ -41,7 +41,7 @@ contains !! @param f_divu Divergence of velocity !! @param fCson Speed of sound from fP (EL) pure elemental function f_rddot(fRho, fP, fR, fV, fR0, fpb, fpbdot, alf, fntait, fBtait, f_bub_adv_src, f_divu, fCson) - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fRho, fP, fR, fV, fR0, fpb, fpbdot, alf real(wp), intent(in) :: fntait, fBtait, f_bub_adv_src, f_divu real(wp), intent(in) :: fCson @@ -82,7 +82,7 @@ contains !! @param fV Current bubble velocity !! @param fpb Internal bubble pressure pure elemental function f_cpbw(fR0, fR, fV, fpb) - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fR0, fR, fV, fpb real(wp) :: f_cpbw @@ -101,7 +101,7 @@ contains !! @param fntait Tait EOS parameter !! @param fBtait Tait EOS parameter pure elemental function f_H(fCpbw, fCpinf, fntait, fBtait) - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fCpbw, fCpinf, fntait, fBtait real(wp) :: tmp1, tmp2, tmp3 @@ -121,7 +121,7 @@ contains !! @param fBtait Tait EOS parameter !! @param fH Bubble enthalpy pure elemental function f_cgas(fCpinf, fntait, fBtait, fH) - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fCpinf, fntait, fBtait, fH real(wp) :: tmp @@ -144,7 +144,7 @@ contains !! @param advsrc Advection equation source term !! @param divu Divergence of velocity pure elemental function f_cpinfdot(fRho, fP, falf, fntait, fBtait, advsrc, divu) - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fRho, fP, falf, fntait, fBtait, advsrc, divu real(wp) :: c2_liquid @@ -174,7 +174,7 @@ contains !! @param fR0 Equilibrium bubble radius !! @param fpbdot Time derivative of the internal bubble pressure pure elemental function f_Hdot(fCpbw, fCpinf, fCpinf_dot, fntait, fBtait, fR, fV, fR0, fpbdot) - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fCpbw, fCpinf, fCpinf_dot, fntait, fBtait real(wp), intent(in) :: fR, fV, fR0, fpbdot @@ -210,7 +210,7 @@ contains !! @param fR0 Equilibrium bubble radius !! @param fCpbw Boundary wall pressure pure elemental function f_rddot_RP(fCp, fRho, fR, fV, fCpbw) - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fCp, fRho, fR, fV, fCpbw real(wp) :: f_rddot_RP @@ -233,7 +233,7 @@ contains !! @param fntait Tait EOS parameter !! @param fBtait Tait EOS parameter pure elemental function f_rddot_G(fCpbw, fR, fV, fH, fHdot, fcgas, fntait, fBtait) - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fCpbw, fR, fV, fH, fHdot real(wp), intent(in) :: fcgas, fntait, fBtait @@ -256,7 +256,7 @@ contains !! @param fV Current bubble velocity !! @param fpb Internal bubble pressure pure elemental function f_cpbw_KM(fR0, fR, fV, fpb) - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fR0, fR, fV, fpb real(wp) :: f_cpbw_KM @@ -283,7 +283,7 @@ contains !! @param fR0 Equilibrium bubble radius !! @param fC Current sound speed pure elemental function f_rddot_KM(fpbdot, fCp, fCpbw, fRho, fR, fV, fR0, fC) - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fpbdot, fCp, fCpbw real(wp), intent(in) :: fRho, fR, fV, fR0, fC @@ -317,7 +317,7 @@ contains !! @param pb Internal bubble pressure !! @param iR0 Current bubble size index pure elemental subroutine s_bwproperty(pb, iR0, chi_vw, k_mw, rho_mw) - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: pb integer, intent(in) :: iR0 real(wp), intent(out) :: chi_vw !< Bubble wall properties (Ando 2010) @@ -347,7 +347,7 @@ contains !! @param fR_m Mixture gas constant (EL) !! @param fgamma_m Mixture gamma (EL) pure elemental subroutine s_vflux(fR, fV, fpb, fmass_v, iR0, vflux, fmass_n, fbeta_c, fR_m, fgamma_m) - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fR real(wp), intent(in) :: fV real(wp), intent(in) :: fpb @@ -405,7 +405,7 @@ contains !! @param fR_m Mixture gas constant (EL) !! @param fgamma_m Mixture gamma (EL) pure elemental function f_bpres_dot(fvflux, fR, fV, fpb, fmass_v, iR0, fbeta_t, fR_m, fgamma_m) - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: fvflux real(wp), intent(in) :: fR real(wp), intent(in) :: fV @@ -468,7 +468,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_advance_step #else - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') #endif real(wp), intent(inout) :: fR, fV, fpb, fmass_v real(wp), intent(in) :: fRho, fP, fR0, fpbdot, alf @@ -602,7 +602,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_initial_substep_h #else - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') #endif real(wp), intent(IN) :: fRho, fP, fR, fV, fR0, fpb, fpbdot, alf real(wp), intent(IN) :: fntait, fBtait, f_bub_adv_src, f_divu @@ -688,7 +688,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_advance_substep #else - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') #endif real(wp), intent(OUT) :: err real(wp), intent(IN) :: fRho, fP, fR, fV, fR0, fpb, fpbdot, alf @@ -786,7 +786,7 @@ contains !! @param fdMvdt_tmp Rate of change of the mass of vapor in the bubble pure elemental subroutine s_advance_EL(fR_tmp, fV_tmp, fPb_tmp, fMv_tmp, bub_id, & fmass_n, fbeta_c, fbeta_t, fdPbdt_tmp, advance_EL) - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(IN) :: fR_tmp, fV_tmp, fPb_tmp, fMv_tmp real(wp), intent(IN) :: fmass_n, fbeta_c, fbeta_t integer, intent(IN) :: bub_id diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index f556cea019..f573e2879e 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -743,7 +743,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_cson_from_pinf #else - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') #endif type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf real(wp), intent(in) :: pinf, rhol, gamma, pi_inf @@ -813,7 +813,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_get_pinf #else - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') #endif integer, intent(in) :: bub_id, ptype type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp index daac6b8238..c37f71df3b 100644 --- a/src/simulation/m_bubbles_EL_kernels.fpp +++ b/src/simulation/m_bubbles_EL_kernels.fpp @@ -203,7 +203,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_applygaussian #else - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') #endif real(wp), dimension(3), intent(in) :: center integer, dimension(3), intent(in) :: cellaux @@ -273,7 +273,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_check_celloutside #else - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') #endif integer, dimension(3), intent(inout) :: cellaux logical, intent(out) :: celloutside @@ -309,7 +309,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_shift_cell_symmetric_bc #else - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') #endif integer, dimension(3), intent(inout) :: cellaux integer, dimension(3), intent(in) :: cell @@ -350,7 +350,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_stddsv #else - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') #endif integer, dimension(3), intent(in) :: cell real(wp), intent(in) :: volpart @@ -391,7 +391,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_get_char_vol #else - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') #endif integer, intent(in) :: cellx, celly, cellz real(wp), intent(out) :: Charvol @@ -416,7 +416,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_get_cell #else - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') #endif real(wp), dimension(3), intent(in) :: s_cell integer, dimension(3), intent(out) :: get_cell diff --git a/src/simulation/m_compute_cbc.fpp b/src/simulation/m_compute_cbc.fpp index 5bdbc6715d..58a12cabb9 100644 --- a/src/simulation/m_compute_cbc.fpp +++ b/src/simulation/m_compute_cbc.fpp @@ -88,7 +88,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_slip_wall_L #else - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') #endif real(wp), dimension(3), intent(in) :: lambda real(wp), dimension(sys_size), intent(inout) :: L @@ -106,7 +106,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_nonreflecting_subsonic_buffer_L #else - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') #endif real(wp), dimension(3), intent(in) :: lambda real(wp), dimension(sys_size), intent(inout) :: L @@ -136,7 +136,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_nonreflecting_subsonic_inflow_L #else - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') #endif real(wp), dimension(3), intent(in) :: lambda real(wp), dimension(sys_size), intent(inout) :: L @@ -153,7 +153,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_nonreflecting_subsonic_outflow_L #else - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') #endif real(wp), dimension(3), intent(in) :: lambda real(wp), dimension(sys_size), intent(inout) :: L @@ -177,7 +177,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_force_free_subsonic_outflow_L #else - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') #endif real(wp), dimension(3), intent(in) :: lambda real(wp), dimension(sys_size), intent(inout) :: L @@ -199,7 +199,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_constant_pressure_subsonic_outflow_L #else - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') #endif real(wp), dimension(3), intent(in) :: lambda real(wp), dimension(sys_size), intent(inout) :: L @@ -221,7 +221,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_supersonic_inflow_L #else - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') #endif real(wp), dimension(sys_size), intent(inout) :: L L(1:advxe) = 0._wp @@ -233,7 +233,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_supersonic_outflow_L #else - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') #endif real(wp), dimension(3), intent(in) :: lambda real(wp), dimension(sys_size), intent(inout) :: L diff --git a/src/simulation/m_hyperelastic.fpp b/src/simulation/m_hyperelastic.fpp index ae31b0084b..84170ef0a1 100644 --- a/src/simulation/m_hyperelastic.fpp +++ b/src/simulation/m_hyperelastic.fpp @@ -220,7 +220,7 @@ contains !! calculate the FFtranspose to obtain the btensor, btensor is nxn tensor !! btensor is symmetric, save the data space pure subroutine s_neoHookean_cauchy_solver(btensor, q_prim_vf, G, j, k, l) - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf type(scalar_field), dimension(b_size), intent(inout) :: btensor real(wp), intent(in) :: G @@ -259,7 +259,7 @@ contains !! calculate the FFtranspose to obtain the btensor, btensor is nxn tensor !! btensor is symmetric, save the data space pure subroutine s_Mooney_Rivlin_cauchy_solver(btensor, q_prim_vf, G, j, k, l) - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf type(scalar_field), dimension(b_size), intent(inout) :: btensor real(wp), intent(in) :: G diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index c9fc9ca1c4..a156efd94f 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -764,7 +764,7 @@ contains !> Function that uses the interpolation coefficients and the current state !! at the cell centers in order to estimate the state at the image point pure subroutine s_interpolate_image_point(q_prim_vf, gp, alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP, r_IP, v_IP, pb_IP, mv_IP, nmom_IP, pb, mv, presb_IP, massv_IP) - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') type(scalar_field), & dimension(sys_size), & intent(IN) :: q_prim_vf !< Primitive Variables diff --git a/src/simulation/m_qbmm.fpp b/src/simulation/m_qbmm.fpp index 93361bf0a5..e9764a5d0f 100644 --- a/src/simulation/m_qbmm.fpp +++ b/src/simulation/m_qbmm.fpp @@ -566,7 +566,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_coeff_nonpoly #else - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') #endif real(wp), intent(in) :: pres, rho, c real(wp), dimension(nterms, 0:2, 0:2), intent(out) :: coeffs @@ -639,7 +639,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_coeff #else - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') #endif real(wp), intent(in) :: pres, rho, c @@ -864,7 +864,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_chyqmom #else - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') #endif real(wp), intent(in) :: pres, rho, c real(wp), dimension(nterms, 0:2, 0:2), intent(out) :: coeff @@ -880,7 +880,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_chyqmom #else - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') #endif real(wp), dimension(nmom), intent(in) :: momin real(wp), dimension(nnode), intent(inout) :: wght, abscX, abscY @@ -940,7 +940,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_hyqmom #else - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') #endif real(wp), dimension(2), intent(inout) :: frho, fup real(wp), dimension(3), intent(in) :: fmom @@ -959,7 +959,7 @@ contains end subroutine s_hyqmom pure function f_quad(abscX, abscY, wght_in, q, r, s) - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') real(wp), dimension(nnode, nb), intent(in) :: abscX, abscY, wght_in real(wp), intent(in) :: q, r, s @@ -975,7 +975,7 @@ contains end function f_quad pure function f_quad2D(abscX, abscY, wght_in, pow) - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') real(wp), dimension(nnode), intent(in) :: abscX, abscY, wght_in real(wp), dimension(3), intent(in) :: pow diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp index 58578f89cb..448264d38a 100644 --- a/src/simulation/m_riemann_solvers.fpp +++ b/src/simulation/m_riemann_solvers.fpp @@ -4243,7 +4243,7 @@ contains !! @param[in] divergence_v Velocity divergence (du/dx + dv/dy + dw/dz). !! @param[out] tau_shear_out Calculated shear stress tensor (stress on i-face, j-direction). pure subroutine s_calculate_shear_stress_tensor(vel_grad_avg, Re_shear, divergence_v, tau_shear_out) - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') implicit none @@ -4277,7 +4277,7 @@ contains !! @param[in] divergence_v Velocity divergence (du/dx + dv/dy + dw/dz). !! @param[out] tau_bulk_out Calculated bulk stress tensor (stress on i-face, i-direction). pure subroutine s_calculate_bulk_stress_tensor(Re_bulk, divergence_v, tau_bulk_out) - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') implicit none diff --git a/src/simulation/m_sim_helpers.fpp b/src/simulation/m_sim_helpers.fpp index 3e17e8364f..e8efc4be29 100644 --- a/src/simulation/m_sim_helpers.fpp +++ b/src/simulation/m_sim_helpers.fpp @@ -34,7 +34,7 @@ contains #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_compute_enthalpy #else - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') #endif type(scalar_field), intent(in), dimension(sys_size) :: q_prim_vf @@ -99,7 +99,7 @@ contains !! @param vcfl_sf (optional) cell centered viscous cfl number !! @param Rc_sf (optional) cell centered Rc pure subroutine s_compute_stability_from_dt(vel, c, rho, Re_l, j, k, l, icfl_sf, vcfl_sf, Rc_sf) - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in), dimension(num_vels) :: vel real(wp), intent(in) :: c, rho real(wp), dimension(0:m, 0:n, 0:p), intent(inout) :: icfl_sf @@ -196,7 +196,7 @@ contains !! @param k y coordinate !! @param l z coordinate pure subroutine s_compute_dt_from_cfl(vel, c, max_dt, rho, Re_l, j, k, l) - $:GPU_ROUTINE(parallelism=['seq']) + $:GPU_ROUTINE(parallelism='[seq]') real(wp), dimension(num_vels), intent(in) :: vel real(wp), intent(in) :: c, rho real(wp), dimension(0:m, 0:n, 0:p), intent(inout) :: max_dt From 630163b3ec06b26390d5c250b373bfff11564e60 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Wed, 18 Jun 2025 18:23:03 -0400 Subject: [PATCH 52/75] Removed default for GPU atomic and fixed GPU wait --- src/common/include/parallel_macros.fpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/common/include/parallel_macros.fpp b/src/common/include/parallel_macros.fpp index 6bfc432a0f..7dc0329235 100644 --- a/src/common/include/parallel_macros.fpp +++ b/src/common/include/parallel_macros.fpp @@ -389,7 +389,7 @@ $:acc_directive #:enddef -#:def GPU_ATOMIC(atomic='update', extraAccArgs=None) +#:def GPU_ATOMIC(atomic, extraAccArgs=None) #:assert isinstance(atomic, str) #:assert (atomic == 'read' or atomic == 'write' or atomic == 'update' or atomic == 'capture') @@ -414,11 +414,11 @@ $:acc_directive #:enddef -#:def GPU_WAIT(host=None, device=None, extraAccArgs=None) +#:def GPU_WAIT(extraAccArgs=None) #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) - #:set clause_val = host_val.strip('\n') + device_val.strip('\n') - #:set acc_directive = '!$acc update ' + clause_val + extraAccArgs_val.strip('\n') + #:set clause_val = '' + #:set acc_directive = '!$acc wait ' + clause_val + extraAccArgs_val.strip('\n') $:acc_directive #:enddef ! New line at end of file is required for FYPP \ No newline at end of file From c03adf944cdc0ed174e7110d426d5aabbdd99bf1 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Wed, 18 Jun 2025 18:58:52 -0400 Subject: [PATCH 53/75] Replaced many more directives --- src/common/m_boundary_common.fpp | 14 +++---- src/common/m_helper_basic.fpp | 2 +- src/simulation/m_acoustic_src.fpp | 4 +- src/simulation/m_bubbles_EL_kernels.fpp | 2 +- src/simulation/m_compute_cbc.fpp | 10 ++--- src/simulation/m_fftw.fpp | 2 +- src/simulation/m_mpi_proxy.fpp | 14 +++---- src/simulation/m_pressure_relaxation.fpp | 52 ++++++++++++------------ src/simulation/m_qbmm.fpp | 4 +- src/simulation/m_viscous.fpp | 6 +-- 10 files changed, 55 insertions(+), 55 deletions(-) diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index 79e992be22..d76feef205 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -1638,7 +1638,7 @@ contains do dir = 1, num_dims do loc = -1, 1, 2 read (1) bc_type(dir, loc)%sf - !$acc update device(bc_type(dir, loc)%sf) + $:GPU_UPDATE(device='[bc_type(dir, loc)%sf]') end do end do close (1) @@ -1654,7 +1654,7 @@ contains do dir = 1, num_dims do loc = -1, 1, 2 read (1) bc_buffers(dir, loc)%sf - !$acc update device(bc_buffers(dir, loc)%sf) + $:GPU_UPDATE(device='[bc_buffers(dir, loc)%sf]') end do end do close (1) @@ -1704,7 +1704,7 @@ contains call MPI_File_set_view(file_id, int(offset, KIND=MPI_ADDRESS_KIND), MPI_INTEGER, MPI_BC_TYPE_TYPE(dir, loc), 'native', MPI_INFO_NULL, ierr) call MPI_File_read_all(file_id, bc_type(dir, loc)%sf, 1, MPI_BC_TYPE_TYPE(dir, loc), MPI_STATUS_IGNORE, ierr) offset = offset + sizeof(bc_type(dir, loc)%sf) - !$acc update device(bc_type(dir, loc)%sf) + $:GPU_UPDATE(device='[bc_type(dir, loc)%sf]') end do end do @@ -1714,7 +1714,7 @@ contains call MPI_File_set_view(file_id, int(offset, KIND=MPI_ADDRESS_KIND), mpi_p, MPI_BC_BUFFER_TYPE(dir, loc), 'native', MPI_INFO_NULL, ierr) call MPI_File_read_all(file_id, bc_buffers(dir, loc)%sf, 1, MPI_BC_BUFFER_TYPE(dir, loc), MPI_STATUS_IGNORE, ierr) offset = offset + sizeof(bc_buffers(dir, loc)%sf) - !$acc update device(bc_buffers(dir, loc)%sf) + $:GPU_UPDATE(device='[bc_buffers(dir, loc)%sf]') end do end do @@ -1767,17 +1767,17 @@ contains bc_type(1, -1)%sf(:, :, :) = bc_x%beg bc_type(1, 1)%sf(:, :, :) = bc_x%end - !$acc update device(bc_type(1,-1)%sf, bc_type(1,1)%sf) + $:GPU_UPDATE(device='[bc_type(1,-1)%sf,bc_type(1,1)%sf]') if (n > 0) then bc_type(2, -1)%sf(:, :, :) = bc_y%beg bc_type(2, 1)%sf(:, :, :) = bc_y%end - !$acc update device(bc_type(2,-1)%sf, bc_type(2,1)%sf) + $:GPU_UPDATE(device='[bc_type(2,-1)%sf,bc_type(2,1)%sf]') if (p > 0) then bc_type(3, -1)%sf(:, :, :) = bc_z%beg bc_type(3, 1)%sf(:, :, :) = bc_z%end - !$acc update device(bc_type(3,-1)%sf, bc_type(3,1)%sf) + $:GPU_UPDATE(device='[bc_type(3,-1)%sf,bc_type(3,1)%sf]') end if end if diff --git a/src/common/m_helper_basic.fpp b/src/common/m_helper_basic.fpp index 7a624dfbb4..c21956628b 100644 --- a/src/common/m_helper_basic.fpp +++ b/src/common/m_helper_basic.fpp @@ -52,7 +52,7 @@ contains !! @param tol_input Relative error (default = 1e-10_wp). !! @return Result of the comparison. logical pure function f_approx_in_array(a, b, tol_input) result(res) - !$acc routine seq + $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: a real(wp), intent(in) :: b(:) real(wp), optional, intent(in) :: tol_input diff --git a/src/simulation/m_acoustic_src.fpp b/src/simulation/m_acoustic_src.fpp index fac80b8032..cdaf92828a 100644 --- a/src/simulation/m_acoustic_src.fpp +++ b/src/simulation/m_acoustic_src.fpp @@ -205,7 +205,7 @@ contains call s_mpi_send_random_number(phi_rn, bb_num_freq(ai)) end if - !$acc loop reduction(+:sum_BB) + $:GPU_LOOP(reduction='[[sum_BB]]', reductionOp='[+]') do k = 1, bb_num_freq(ai) ! Acoustic period of the wave at each discrete frequency period_BB = 1._wp/(bb_lowest_freq(ai) + k*bb_bandwidth(ai)) @@ -230,7 +230,7 @@ contains B_tait = 0._wp small_gamma = 0._wp - !$acc loop + $:GPU_LOOP() do q = 1, num_fluids myalpha_rho(q) = q_cons_vf(q)%sf(j, k, l) myalpha(q) = q_cons_vf(advxb + q - 1)%sf(j, k, l) diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp index c37f71df3b..433a4fa8d1 100644 --- a/src/simulation/m_bubbles_EL_kernels.fpp +++ b/src/simulation/m_bubbles_EL_kernels.fpp @@ -134,7 +134,7 @@ contains strength_vol = volpart strength_vel = 4._wp*pi*lbk_rad(l, 2)**2._wp*lbk_vel(l, 2) - !$acc loop collapse(3) private(cellaux, nodecoord) + $:GPU_LOOP(collapse=3,private='[cellaux,nodecoord]') do i = 1, smearGrid do j = 1, smearGrid do k = 1, smearGridz diff --git a/src/simulation/m_compute_cbc.fpp b/src/simulation/m_compute_cbc.fpp index 58a12cabb9..de6af78c45 100644 --- a/src/simulation/m_compute_cbc.fpp +++ b/src/simulation/m_compute_cbc.fpp @@ -20,7 +20,7 @@ module m_compute_cbc contains !> Base L1 calculation pure function f_base_L1(lambda, rho, c, dpres_ds, dvel_ds) result(L1) - !$acc routine seq + $:GPU_ROUTINE(parallelism='[seq]') real(wp), dimension(3), intent(in) :: lambda real(wp), intent(in) :: rho, c, dpres_ds real(wp), dimension(num_dims), intent(in) :: dvel_ds @@ -30,7 +30,7 @@ contains !> Fill density L variables pure subroutine s_fill_density_L(L, lambda_factor, lambda2, c, mf, dalpha_rho_ds, dpres_ds) - !$acc routine seq + $:GPU_ROUTINE(parallelism='[seq]') real(wp), dimension(sys_size), intent(inout) :: L real(wp), intent(in) :: lambda_factor, lambda2, c real(wp), dimension(num_fluids), intent(in) :: mf, dalpha_rho_ds @@ -44,7 +44,7 @@ contains !> Fill velocity L variables pure subroutine s_fill_velocity_L(L, lambda_factor, lambda2, dvel_ds) - !$acc routine seq + $:GPU_ROUTINE(parallelism='[seq]') real(wp), dimension(sys_size), intent(inout) :: L real(wp), intent(in) :: lambda_factor, lambda2 real(wp), dimension(num_dims), intent(in) :: dvel_ds @@ -57,7 +57,7 @@ contains !> Fill advection L variables pure subroutine s_fill_advection_L(L, lambda_factor, lambda2, dadv_ds) - !$acc routine seq + $:GPU_ROUTINE(parallelism='[seq]') real(wp), dimension(sys_size), intent(inout) :: L real(wp), intent(in) :: lambda_factor, lambda2 real(wp), dimension(num_fluids), intent(in) :: dadv_ds @@ -70,7 +70,7 @@ contains !> Fill chemistry L variables pure subroutine s_fill_chemistry_L(L, lambda_factor, lambda2, dYs_ds) - !$acc routine seq + $:GPU_ROUTINE(parallelism='[seq]') real(wp), dimension(sys_size), intent(inout) :: L real(wp), intent(in) :: lambda_factor, lambda2 real(wp), dimension(num_species), intent(in) :: dYs_ds diff --git a/src/simulation/m_fftw.fpp b/src/simulation/m_fftw.fpp index b30bdbcf93..a8bc439606 100644 --- a/src/simulation/m_fftw.fpp +++ b/src/simulation/m_fftw.fpp @@ -161,7 +161,7 @@ contains p_cmplx => data_cmplx_gpu p_fltr_cmplx => data_fltr_cmplx_gpu -!$acc data attach(p_real, p_cmplx, p_fltr_cmplx) +$:GPU_DATA(attach='[p_real, p_cmplx, p_fltr_cmplx]') !$acc host_data use_device(p_real, p_cmplx, p_fltr_cmplx) #if defined(__PGI) ierr = cufftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu) diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index 2cd0f97e2a..e500a00898 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -71,7 +71,7 @@ contains i_halo_size = -1 + gp_layers end if - !$acc update device(i_halo_size) + $:GPU_UPDATE(device='[i_halo_size]') @:ALLOCATE(ib_buff_send(0:i_halo_size), ib_buff_recv(0:i_halo_size)) end if #endif @@ -297,7 +297,7 @@ contains #:for mpi_dir in [1, 2, 3] if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 - !$acc parallel loop collapse(3) gang vector default(present) private(r) + $:GPU_PARALLEL_LOOP(collapse=3,private='[r]') do l = 0, p do k = 0, n do j = 0, gp_layers - 1 @@ -307,7 +307,7 @@ contains end do end do #:elif mpi_dir == 2 - !$acc parallel loop collapse(3) gang vector default(present) private(r) + $:GPU_PARALLEL_LOOP(collapse=3,private='[r]') do l = 0, p do k = 0, gp_layers - 1 do j = -gp_layers, m + gp_layers @@ -318,7 +318,7 @@ contains end do end do #:else - !$acc parallel loop collapse(3) gang vector default(present) private(r) + $:GPU_PARALLEL_LOOP(collapse=3,private='[r]') do l = 0, gp_layers - 1 do k = -gp_layers, n + gp_layers do j = -gp_layers, m + gp_layers @@ -345,7 +345,7 @@ contains #:for mpi_dir in [1, 2, 3] if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 - !$acc parallel loop collapse(3) gang vector default(present) private(r) + $:GPU_PARALLEL_LOOP(collapse=3,private='[r]') do l = 0, p do k = 0, n do j = -gp_layers, -1 @@ -355,7 +355,7 @@ contains end do end do #:elif mpi_dir == 2 - !$acc parallel loop collapse(3) gang vector default(present) private(r) + $:GPU_PARALLEL_LOOP(collapse=3,private='[r]') do l = 0, p do k = -gp_layers, -1 do j = -gp_layers, m + gp_layers @@ -367,7 +367,7 @@ contains end do #:else ! Unpacking buffer from bc_z%beg - !$acc parallel loop collapse(3) gang vector default(present) private(r) + $:GPU_PARALLEL_LOOP(collapse=3,private='[r]') do l = -gp_layers, -1 do k = -gp_layers, n + gp_layers do j = -gp_layers, m + gp_layers diff --git a/src/simulation/m_pressure_relaxation.fpp b/src/simulation/m_pressure_relaxation.fpp index 8aa78ad88a..d3a63ad6f6 100644 --- a/src/simulation/m_pressure_relaxation.fpp +++ b/src/simulation/m_pressure_relaxation.fpp @@ -21,10 +21,10 @@ module m_pressure_relaxation s_finalize_pressure_relaxation_module real(wp), allocatable, dimension(:) :: gamma_min, pres_inf - !$acc declare create(gamma_min, pres_inf) + $:GPU_DECLARE(create='[gamma_min, pres_inf]') real(wp), allocatable, dimension(:, :) :: Res - !$acc declare create(Res) + $:GPU_DECLARE(create='[Res]') contains @@ -39,7 +39,7 @@ contains gamma_min(i) = 1._wp/fluid_pp(i)%gamma + 1._wp pres_inf(i) = fluid_pp(i)%pi_inf/(1._wp + fluid_pp(i)%gamma) end do - !$acc update device(gamma_min, pres_inf) + $:GPU_UPDATE(device='[gamma_min, pres_inf]') if (viscous) then @:ALLOCATE(Res(1:2, 1:maxval(Re_size))) @@ -48,7 +48,7 @@ contains Res(i, j) = fluid_pp(Re_idx(i, j))%Re(i) end do end do - !$acc update device(Res, Re_idx, Re_size) + $:GPU_UPDATE(device='[Res, Re_idx, Re_size]') end if end subroutine s_initialize_pressure_relaxation_module @@ -70,7 +70,7 @@ contains type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf integer :: j, k, l - !$acc parallel loop collapse(3) gang vector default(present) + $:GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -83,7 +83,7 @@ contains !> Process pressure relaxation for a single cell pure subroutine s_relax_cell_pressure(q_cons_vf, j, k, l) - !$acc routine seq + $:GPU_ROUTINE(parallelism='[seq]') type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf integer, intent(in) :: j, k, l @@ -103,14 +103,14 @@ contains !> Check if pressure relaxation is needed for this cell pure logical function s_needs_pressure_relaxation(q_cons_vf, j, k, l) - !$acc routine seq + $:GPU_ROUTINE(parallelism='[seq]') type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf integer, intent(in) :: j, k, l integer :: i s_needs_pressure_relaxation = .true. - !$acc loop seq + $:GPU_LOOP() do i = 1, num_fluids if (q_cons_vf(i + advxb - 1)%sf(j, k, l) > (1._wp - sgm_eps)) then s_needs_pressure_relaxation = .false. @@ -121,7 +121,7 @@ contains !> Correct volume fractions to physical bounds pure subroutine s_correct_volume_fractions(q_cons_vf, j, k, l) - !$acc routine seq + $:GPU_ROUTINE(parallelism='[seq]') type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf integer, intent(in) :: j, k, l @@ -129,7 +129,7 @@ contains integer :: i sum_alpha = 0._wp - !$acc loop seq + $:GPU_LOOP() do i = 1, num_fluids if ((q_cons_vf(i + contxb - 1)%sf(j, k, l) < 0._wp) .or. & (q_cons_vf(i + advxb - 1)%sf(j, k, l) < 0._wp)) then @@ -142,7 +142,7 @@ contains sum_alpha = sum_alpha + q_cons_vf(i + advxb - 1)%sf(j, k, l) end do - !$acc loop seq + $:GPU_LOOP() do i = 1, num_fluids q_cons_vf(i + advxb - 1)%sf(j, k, l) = q_cons_vf(i + advxb - 1)%sf(j, k, l)/sum_alpha end do @@ -151,7 +151,7 @@ contains !> Main pressure equilibration using Newton-Raphson pure subroutine s_equilibrate_pressure(q_cons_vf, j, k, l) - !$acc routine seq + $:GPU_ROUTINE(parallelism='[seq]') type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf integer, intent(in) :: j, k, l @@ -164,7 +164,7 @@ contains ! Initialize pressures pres_relax = 0._wp - !$acc loop seq + $:GPU_LOOP() do i = 1, num_fluids if (q_cons_vf(i + advxb - 1)%sf(j, k, l) > sgm_eps) then pres_K_init(i) = (q_cons_vf(i + intxb - 1)%sf(j, k, l)/ & @@ -180,7 +180,7 @@ contains ! Newton-Raphson iteration f_pres = 1e-9_wp df_pres = 1e9_wp - !$acc loop seq + $:GPU_LOOP() do iter = 0, MAX_ITER - 1 if (abs(f_pres) > TOLERANCE) then pres_relax = pres_relax - f_pres/df_pres @@ -194,7 +194,7 @@ contains ! Newton-Raphson step f_pres = -1._wp df_pres = 0._wp - !$acc loop seq + $:GPU_LOOP() do i = 1, num_fluids if (q_cons_vf(i + advxb - 1)%sf(j, k, l) > sgm_eps) then rho_K_s(i) = q_cons_vf(i + contxb - 1)%sf(j, k, l)/ & @@ -210,7 +210,7 @@ contains end do ! Update volume fractions - !$acc loop seq + $:GPU_LOOP() do i = 1, num_fluids if (q_cons_vf(i + advxb - 1)%sf(j, k, l) > sgm_eps) & q_cons_vf(i + advxb - 1)%sf(j, k, l) = q_cons_vf(i + contxb - 1)%sf(j, k, l)/rho_K_s(i) @@ -220,7 +220,7 @@ contains !> Correct internal energies using equilibrated pressure pure subroutine s_correct_internal_energies(q_cons_vf, j, k, l) - !$acc routine seq + $:GPU_ROUTINE(parallelism='[seq]') type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf integer, intent(in) :: j, k, l @@ -230,7 +230,7 @@ contains real(wp), dimension(2) :: Re integer :: i, q - !$acc loop seq + $:GPU_LOOP() do i = 1, num_fluids alpha_rho(i) = q_cons_vf(i)%sf(j, k, l) alpha(i) = q_cons_vf(E_idx + i)%sf(j, k, l) @@ -243,14 +243,14 @@ contains if (bubbles_euler) then if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then - !$acc loop seq + $:GPU_LOOP() do i = 1, num_fluids rho = rho + alpha_rho(i) gamma = gamma + alpha(i)*gammas(i) pi_inf = pi_inf + alpha(i)*pi_infs(i) end do else if ((model_eqns == 2) .and. (num_fluids > 2)) then - !$acc loop seq + $:GPU_LOOP() do i = 1, num_fluids - 1 rho = rho + alpha_rho(i) gamma = gamma + alpha(i)*gammas(i) @@ -264,7 +264,7 @@ contains else sum_alpha = 0._wp if (mpp_lim) then - !$acc loop seq + $:GPU_LOOP() do i = 1, num_fluids alpha_rho(i) = max(0._wp, alpha_rho(i)) alpha(i) = min(max(0._wp, alpha(i)), 1._wp) @@ -273,7 +273,7 @@ contains alpha = alpha/max(sum_alpha, sgm_eps) end if - !$acc loop seq + $:GPU_LOOP() do i = 1, num_fluids rho = rho + alpha_rho(i) gamma = gamma + alpha(i)*gammas(i) @@ -281,11 +281,11 @@ contains end do if (viscous) then - !$acc loop seq + $:GPU_LOOP() do i = 1, 2 Re(i) = dflt_real if (Re_size(i) > 0) Re(i) = 0._wp - !$acc loop seq + $:GPU_LOOP() do q = 1, Re_size(i) Re(i) = alpha(Re_idx(i, q))/Res(i, q) + Re(i) end do @@ -296,7 +296,7 @@ contains ! Compute dynamic pressure and update internal energies dyn_pres = 0._wp - !$acc loop seq + $:GPU_LOOP() do i = momxb, momxe dyn_pres = dyn_pres + 5e-1_wp*q_cons_vf(i)%sf(j, k, l)* & q_cons_vf(i)%sf(j, k, l)/max(rho, sgm_eps) @@ -304,7 +304,7 @@ contains pres_relax = (q_cons_vf(E_idx)%sf(j, k, l) - dyn_pres - pi_inf)/gamma - !$acc loop seq + $:GPU_LOOP() do i = 1, num_fluids q_cons_vf(i + intxb - 1)%sf(j, k, l) = & q_cons_vf(i + advxb - 1)%sf(j, k, l)*(gammas(i)*pres_relax + pi_infs(i)) diff --git a/src/simulation/m_qbmm.fpp b/src/simulation/m_qbmm.fpp index e9764a5d0f..1311afc5d6 100644 --- a/src/simulation/m_qbmm.fpp +++ b/src/simulation/m_qbmm.fpp @@ -538,13 +538,13 @@ contains ! The following block is not repeated and is left as is if (idir == 1) then - !$acc parallel loop collapse(3) gang vector default(present) + $:GPU_PARALLEL_LOOP(collapse=3) do l = 0, p do q = 0, n do i = 0, m rhs_vf(alf_idx)%sf(i, q, l) = rhs_vf(alf_idx)%sf(i, q, l) + mom_sp(2)%sf(i, q, l) j = bubxb - !$acc loop seq + $:GPU_LOOP() do k = 1, nb rhs_vf(j)%sf(i, q, l) = rhs_vf(j)%sf(i, q, l) + mom_3d(0, 0, k)%sf(i, q, l) rhs_vf(j + 1)%sf(i, q, l) = rhs_vf(j + 1)%sf(i, q, l) + mom_3d(1, 0, k)%sf(i, q, l) diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp index cecfb80d34..16420ab3de 100644 --- a/src/simulation/m_viscous.fpp +++ b/src/simulation/m_viscous.fpp @@ -999,7 +999,7 @@ contains end if - !$acc update device(is1_viscous, is2_viscous, is3_viscous, iv) + $:GPU_UPDATE(device='[is1_viscous, is2_viscous, is3_viscous, iv]') if (n > 0) then if (p > 0) then @@ -1096,7 +1096,7 @@ contains end if - !$acc update device(is1_viscous, is2_viscous, is3_viscous, iv) + $:GPU_UPDATE(device='[is1_viscous, is2_viscous, is3_viscous, iv]') if (n > 0) then if (p > 0) then @@ -1199,7 +1199,7 @@ contains is3_viscous = iz iv = iv_in - !$acc update device(is1_viscous, is2_viscous, is3_viscous, iv) + $:GPU_UPDATE(device='[is1_viscous, is2_viscous, is3_viscous, iv]') ! First-Order Spatial Derivatives in x-direction if (norm_dir == 1) then From c07345015bb75ed1a709cd75ffc388ff97f6b7c6 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Wed, 18 Jun 2025 18:59:34 -0400 Subject: [PATCH 54/75] Add host_data directive --- src/common/include/parallel_macros.fpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/common/include/parallel_macros.fpp b/src/common/include/parallel_macros.fpp index 7dc0329235..b8fe35a0e2 100644 --- a/src/common/include/parallel_macros.fpp +++ b/src/common/include/parallel_macros.fpp @@ -193,6 +193,11 @@ $:device_val #:enddef +#:def GEN_USE_DEVICE_STR(use_device) + #:set use_device_val = GEN_PARENTHESES_CLAUSE('use_device', use_device) + $:use_device_val +#:enddef + #:def GPU_PARALLEL_LOOP(collapse=None, private=None, parallelism='[gang, vector]', & & default='present', firstprivate=None, reduction=None, reductionOp=None, & @@ -351,6 +356,15 @@ $:acc_directive #:enddef +#:def GPU_HOST_DATA(use_device=None, extraAccArgs=None) + #:set use_device_val = GEN_USE_DEVICE_STR(use_device) + #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) + + #:set clause_val = use_device_val.strip('\n') + #:set acc_directive = '!$acc host_data ' + clause_val + extraAccArgs_val.strip('\n') + $:acc_directive +#:enddef + #:def GPU_ENTER_DATA(copyin=None, copyinReadOnly=None, create=None, attach=None, extraAccArgs=None) #:set copyin_val = GEN_COPYIN_STR(copyin, False).strip('\n') + GEN_COPYIN_STR(copyinReadOnly, True).strip('\n') From c32c956f39f8547133de51e651d89c70c744cf0d Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Wed, 18 Jun 2025 19:03:33 -0400 Subject: [PATCH 55/75] Replaced atomic directives --- src/simulation/m_bubbles_EL_kernels.fpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp index 433a4fa8d1..3d2f1089ac 100644 --- a/src/simulation/m_bubbles_EL_kernels.fpp +++ b/src/simulation/m_bubbles_EL_kernels.fpp @@ -74,19 +74,19 @@ contains !Update void fraction field addFun1 = strength_vol/Vol - !$acc atomic update + $:GPU_ATOMIC(atomic='update') updatedvar%vf(1)%sf(cell(1), cell(2), cell(3)) = updatedvar%vf(1)%sf(cell(1), cell(2), cell(3)) + addFun1 !Update time derivative of void fraction addFun2 = strength_vel/Vol - !$acc atomic update + $:GPU_ATOMIC(atomic='update') updatedvar%vf(2)%sf(cell(1), cell(2), cell(3)) = updatedvar%vf(2)%sf(cell(1), cell(2), cell(3)) + addFun2 !Product of two smeared functions !Update void fraction * time derivative of void fraction if (lag_params%cluster_type >= 4) then addFun3 = (strength_vol*strength_vel)/Vol - !$acc atomic update + $:GPU_ATOMIC(atomic='update') updatedvar%vf(5)%sf(cell(1), cell(2), cell(3)) = updatedvar%vf(5)%sf(cell(1), cell(2), cell(3)) + addFun3 end if end do @@ -170,14 +170,14 @@ contains !Update void fraction field addFun1 = func*strength_vol - !$acc atomic update + $:GPU_ATOMIC(atomic='update') updatedvar%vf(1)%sf(cellaux(1), cellaux(2), cellaux(3)) = & updatedvar%vf(1)%sf(cellaux(1), cellaux(2), cellaux(3)) & + addFun1 !Update time derivative of void fraction addFun2 = func*strength_vel - !$acc atomic update + $:GPU_ATOMIC(atomic='update') updatedvar%vf(2)%sf(cellaux(1), cellaux(2), cellaux(3)) = & updatedvar%vf(2)%sf(cellaux(1), cellaux(2), cellaux(3)) & + addFun2 @@ -186,7 +186,7 @@ contains !Update void fraction * time derivative of void fraction if (lag_params%cluster_type >= 4) then addFun3 = func2*strength_vol*strength_vel - !$acc atomic update + $:GPU_ATOMIC(atomic='update') updatedvar%vf(5)%sf(cellaux(1), cellaux(2), cellaux(3)) = & updatedvar%vf(5)%sf(cellaux(1), cellaux(2), cellaux(3)) & + addFun3 From eb01272ff295a9aa474a065fc209f0e3ef67781a Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Wed, 18 Jun 2025 19:06:08 -0400 Subject: [PATCH 56/75] Replaced host data directives --- src/simulation/m_fftw.fpp | 8 ++++---- src/simulation/m_weno.fpp | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/simulation/m_fftw.fpp b/src/simulation/m_fftw.fpp index a8bc439606..f37196b82e 100644 --- a/src/simulation/m_fftw.fpp +++ b/src/simulation/m_fftw.fpp @@ -162,7 +162,7 @@ contains p_fltr_cmplx => data_fltr_cmplx_gpu $:GPU_DATA(attach='[p_real, p_cmplx, p_fltr_cmplx]') -!$acc host_data use_device(p_real, p_cmplx, p_fltr_cmplx) +$:GPU_HOST_DATA(use_device='[p_real, p_cmplx, p_fltr_cmplx]') #if defined(__PGI) ierr = cufftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu) #else @@ -182,7 +182,7 @@ $:GPU_DATA(attach='[p_real, p_cmplx, p_fltr_cmplx]') end do end do -!$acc host_data use_device(p_real, p_fltr_cmplx) +$:GPU_HOST_DATA(use_device='[p_real, p_fltr_cmplx]') #if defined(__PGI) ierr = cufftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu) #else @@ -221,7 +221,7 @@ $:GPU_DATA(attach='[p_real, p_cmplx, p_fltr_cmplx]') end do end do -!$acc host_data use_device(p_real, p_cmplx) +$:GPU_HOST_DATA(use_device='[p_real, p_cmplx]') #if defined(__PGI) ierr = cufftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu) #else @@ -242,7 +242,7 @@ $:GPU_DATA(attach='[p_real, p_cmplx, p_fltr_cmplx]') end do end do -!$acc host_data use_device(p_real, p_fltr_cmplx) +$:GPU_HOST_DATA(use_device='[p_real, p_fltr_cmplx]') #if defined(__PGI) ierr = cufftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu) #else diff --git a/src/simulation/m_weno.fpp b/src/simulation/m_weno.fpp index f049e913f9..1c88dcbaf2 100644 --- a/src/simulation/m_weno.fpp +++ b/src/simulation/m_weno.fpp @@ -1155,7 +1155,7 @@ contains block use CuTensorEx - !$acc host_data use_device(v_rs_ws_x, v_rs_ws_y) + $:GPU_HOST_DATA(use_device='[v_rs_ws_x, v_rs_ws_y]') v_rs_ws_y = reshape(v_rs_ws_x, shape=[n + 1 + 2*buff_size, m + 2*buff_size + 1, p + 1, sys_size], order=[2, 1, 3, 4]) !$acc end host_data end block @@ -1163,7 +1163,7 @@ contains block use CuTensorEx - !$acc host_data use_device(v_rs_ws_x, v_rs_ws_y) + $:GPU_HOST_DATA(use_device='[v_rs_ws_x, v_rs_ws_y]') v_rs_ws_y = reshape(v_rs_ws_x, shape=[n + 1 + 2*buff_size, m + 2*buff_size + 1, p + 1 + 2*buff_size, sys_size], order=[2, 1, 3, 4]) !$acc end host_data end block @@ -1194,7 +1194,7 @@ contains block use CuTensorEx - !$acc host_data use_device(v_rs_ws_x, v_rs_ws_z) + $:GPU_HOST_DATA(use_device='[v_rs_ws_x, v_rs_ws_z]') v_rs_ws_z = reshape(v_rs_ws_x, shape=[p + 1 + 2*buff_size, n + 2*buff_size + 1, m + 2*buff_size + 1, sys_size], order=[3, 2, 1, 4]) !$acc end host_data end block From 4159b74aebb5f76aaebafca9164185c8d04ee294 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Wed, 18 Jun 2025 19:49:56 -0400 Subject: [PATCH 57/75] Removed all end directives excpet host_data and kernels --- src/common/m_variables_conversion.fpp | 1 - src/simulation/m_data_output.fpp | 1 - src/simulation/m_hyperelastic.fpp | 1 - src/simulation/m_hypoelastic.fpp | 6 ------ src/simulation/m_mhd.fpp | 1 - src/simulation/m_rhs.fpp | 3 --- src/simulation/m_riemann_solvers.fpp | 6 ------ src/simulation/m_surface_tension.fpp | 3 --- src/simulation/m_weno.fpp | 13 ------------- 9 files changed, 35 deletions(-) diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp index f861738684..067ae58142 100644 --- a/src/common/m_variables_conversion.fpp +++ b/src/common/m_variables_conversion.fpp @@ -1145,7 +1145,6 @@ contains end do end do end do - !$acc end parallel loop end subroutine s_convert_conservative_to_primitive_variables diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp index 6d11a68ade..6046113cd5 100644 --- a/src/simulation/m_data_output.fpp +++ b/src/simulation/m_data_output.fpp @@ -297,7 +297,6 @@ contains end do end do end do - !$acc end parallel loop ! end: Computing Stability Criteria at Current Time-step diff --git a/src/simulation/m_hyperelastic.fpp b/src/simulation/m_hyperelastic.fpp index 84170ef0a1..806765c981 100644 --- a/src/simulation/m_hyperelastic.fpp +++ b/src/simulation/m_hyperelastic.fpp @@ -208,7 +208,6 @@ contains end do end do end do - !$acc end parallel loop end subroutine s_hyperelastic_rmt_stress_update !> The following subroutine handles the calculation of the btensor. diff --git a/src/simulation/m_hypoelastic.fpp b/src/simulation/m_hypoelastic.fpp index 3e42ecf364..03a15d5471 100644 --- a/src/simulation/m_hypoelastic.fpp +++ b/src/simulation/m_hypoelastic.fpp @@ -112,7 +112,6 @@ contains end do end do end do - !$acc end parallel loop $:GPU_PARALLEL_LOOP(collapse=3) do q = 0, p @@ -127,7 +126,6 @@ contains end do end do end do - !$acc end parallel loop if (ndirs > 1) then $:GPU_PARALLEL_LOOP(collapse=3) @@ -138,7 +136,6 @@ contains end do end do end do - !$acc end parallel loop $:GPU_PARALLEL_LOOP(collapse=3) do q = 0, p @@ -156,7 +153,6 @@ contains end do end do end do - !$acc end parallel loop ! 3D if (ndirs == 3) then @@ -170,7 +166,6 @@ contains end do end do end do - !$acc end parallel loop $:GPU_PARALLEL_LOOP(collapse=3) do q = 0, p @@ -192,7 +187,6 @@ contains end do end do end do - !$acc end parallel loop end if end if diff --git a/src/simulation/m_mhd.fpp b/src/simulation/m_mhd.fpp index b17ec69271..e6ee464c22 100644 --- a/src/simulation/m_mhd.fpp +++ b/src/simulation/m_mhd.fpp @@ -129,7 +129,6 @@ contains end do end do end do - !$acc end parallel loop end subroutine s_compute_mhd_powell_rhs diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index fe90fb7248..8e7ef5f628 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -1742,7 +1742,6 @@ contains end do end do end do - !$acc end parallel loop else if (recon_dir == 2) then $:GPU_PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end @@ -1755,7 +1754,6 @@ contains end do end do end do - !$acc end parallel loop else if (recon_dir == 3) then $:GPU_PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end @@ -1768,7 +1766,6 @@ contains end do end do end do - !$acc end parallel loop end if end subroutine s_reconstruct_cell_boundary_values_first_order diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp index 448264d38a..eec1e9fafd 100644 --- a/src/simulation/m_riemann_solvers.fpp +++ b/src/simulation/m_riemann_solvers.fpp @@ -1968,7 +1968,6 @@ contains end do end do end do - !$acc end parallel loop elseif (model_eqns == 2 .and. bubbles_euler) then $:GPU_PARALLEL_LOOP(collapse=3, private='[R0_L, R0_R, V0_L, & @@ -2439,7 +2438,6 @@ contains end do end do end do - !$acc end parallel loop else ! 5-EQUATION MODEL WITH HLLC $:GPU_PARALLEL_LOOP(collapse=3, private='[vel_L, vel_R, & @@ -2954,7 +2952,6 @@ contains end do end do end do - !$acc end parallel loop end if end if #:endfor @@ -3301,7 +3298,6 @@ contains end do end do end do - !$acc end parallel loop end if #:endfor @@ -4095,7 +4091,6 @@ contains end do end do end do - !$acc end parallel loop end subroutine s_compute_cylindrical_viscous_source_flux @@ -4232,7 +4227,6 @@ contains end do end do end do - !$acc end parallel loop end subroutine s_compute_cartesian_viscous_source_flux diff --git a/src/simulation/m_surface_tension.fpp b/src/simulation/m_surface_tension.fpp index fd84e50664..30b6fcec2b 100644 --- a/src/simulation/m_surface_tension.fpp +++ b/src/simulation/m_surface_tension.fpp @@ -349,7 +349,6 @@ contains end do end do end do - !$acc end parallel loop else if (recon_dir == 2) then $:GPU_PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end @@ -362,7 +361,6 @@ contains end do end do end do - !$acc end parallel loop else if (recon_dir == 3) then $:GPU_PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end @@ -375,7 +373,6 @@ contains end do end do end do - !$acc end parallel loop end if end subroutine s_reconstruct_cell_boundary_values_capillary diff --git a/src/simulation/m_weno.fpp b/src/simulation/m_weno.fpp index 1c88dcbaf2..2dd5823668 100644 --- a/src/simulation/m_weno.fpp +++ b/src/simulation/m_weno.fpp @@ -679,7 +679,6 @@ contains end do end do end do - !$acc end parallel loop else if (weno_dir == 2) then $:GPU_PARALLEL_LOOP(collapse=4) do i = 1, ubound(v_vf, 1) @@ -692,7 +691,6 @@ contains end do end do end do - !$acc end parallel loop else if (weno_dir == 3) then $:GPU_PARALLEL_LOOP(collapse=4) do i = 1, ubound(v_vf, 1) @@ -705,7 +703,6 @@ contains end do end do end do - !$acc end parallel loop end if elseif (weno_order == 3) then #:for WENO_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] @@ -783,7 +780,6 @@ contains end do end do end do - !$acc end parallel loop end if #:endfor elseif (weno_order == 5) then @@ -898,7 +894,6 @@ contains end do end do end do - !$acc end parallel loop if (mp_weno) then call s_preserve_monotonicity(v_rs_ws_${XYZ}$, vL_rs_vf_${XYZ}$, & @@ -1094,7 +1089,6 @@ contains end do end do end do - !$acc end parallel loop end if #:endfor @@ -1142,7 +1136,6 @@ contains end do end do end do - !$acc end parallel loop end if ! Reshaping/Projecting onto Characteristic Fields in y-direction @@ -1157,7 +1150,6 @@ contains $:GPU_HOST_DATA(use_device='[v_rs_ws_x, v_rs_ws_y]') v_rs_ws_y = reshape(v_rs_ws_x, shape=[n + 1 + 2*buff_size, m + 2*buff_size + 1, p + 1, sys_size], order=[2, 1, 3, 4]) - !$acc end host_data end block else block @@ -1165,7 +1157,6 @@ contains $:GPU_HOST_DATA(use_device='[v_rs_ws_x, v_rs_ws_y]') v_rs_ws_y = reshape(v_rs_ws_x, shape=[n + 1 + 2*buff_size, m + 2*buff_size + 1, p + 1 + 2*buff_size, sys_size], order=[2, 1, 3, 4]) - !$acc end host_data end block end if else @@ -1180,7 +1171,6 @@ contains end do end do end do -!$acc end parallel loop #if MFC_cuTENSOR end if #endif @@ -1196,7 +1186,6 @@ contains $:GPU_HOST_DATA(use_device='[v_rs_ws_x, v_rs_ws_z]') v_rs_ws_z = reshape(v_rs_ws_x, shape=[p + 1 + 2*buff_size, n + 2*buff_size + 1, m + 2*buff_size + 1, sys_size], order=[3, 2, 1, 4]) - !$acc end host_data end block else #endif @@ -1210,7 +1199,6 @@ contains end do end do end do -!$acc end parallel loop #if MFC_cuTENSOR end if #endif @@ -1388,7 +1376,6 @@ contains end do end do end do - !$acc end parallel loop end subroutine s_preserve_monotonicity From 3c64b6b6b7730711c7fcd5ebb889dac42a3c3660 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Wed, 18 Jun 2025 20:04:30 -0400 Subject: [PATCH 58/75] Replaced end host data directives --- src/simulation/m_fftw.fpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/simulation/m_fftw.fpp b/src/simulation/m_fftw.fpp index f37196b82e..e66b670677 100644 --- a/src/simulation/m_fftw.fpp +++ b/src/simulation/m_fftw.fpp @@ -169,7 +169,7 @@ $:GPU_HOST_DATA(use_device='[p_real, p_cmplx, p_fltr_cmplx]') ierr = hipfftExecD2Z(fwd_plan_gpu, c_loc(p_real), c_loc(p_cmplx)) call hipCheck(hipDeviceSynchronize()) #endif - !$acc end host_data + $:GPU_END_HOST_DATA() Nfq = 3 $:GPU_UPDATE(device='[Nfq]') @@ -189,7 +189,7 @@ $:GPU_HOST_DATA(use_device='[p_real, p_fltr_cmplx]') ierr = hipfftExecZ2D(bwd_plan_gpu, c_loc(p_fltr_cmplx), c_loc(p_real)) call hipCheck(hipDeviceSynchronize()) #endif - !$acc end host_data + $:GPU_END_HOST_DATA() $:GPU_PARALLEL_LOOP(collapse=3) do k = 1, sys_size @@ -228,7 +228,7 @@ $:GPU_HOST_DATA(use_device='[p_real, p_cmplx]') ierr = hipfftExecD2Z(fwd_plan_gpu, c_loc(p_real), c_loc(p_cmplx)) call hipCheck(hipDeviceSynchronize()) #endif - !$acc end host_data + $:GPU_END_HOST_DATA() Nfq = min(floor(2_dp*real(i, dp)*pi), cmplx_size) $:GPU_UPDATE(device='[Nfq]') @@ -249,7 +249,7 @@ $:GPU_HOST_DATA(use_device='[p_real, p_fltr_cmplx]') ierr = hipfftExecZ2D(bwd_plan_gpu, c_loc(p_fltr_cmplx), c_loc(p_real)) call hipCheck(hipDeviceSynchronize()) #endif - !$acc end host_data + $:GPU_END_HOST_DATA() $:GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]') do k = 1, sys_size From f74dd1d0686defe338c4182a255113966f900818 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Wed, 18 Jun 2025 20:05:21 -0400 Subject: [PATCH 59/75] Added end host data directive --- src/common/include/parallel_macros.fpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/common/include/parallel_macros.fpp b/src/common/include/parallel_macros.fpp index b8fe35a0e2..2f93b7a8c0 100644 --- a/src/common/include/parallel_macros.fpp +++ b/src/common/include/parallel_macros.fpp @@ -365,6 +365,11 @@ $:acc_directive #:enddef +#:def GPU_END_HOST_DATA() + #:set acc_directive = '!$acc end host_data' + $:acc_directive +#:enddef + #:def GPU_ENTER_DATA(copyin=None, copyinReadOnly=None, create=None, attach=None, extraAccArgs=None) #:set copyin_val = GEN_COPYIN_STR(copyin, False).strip('\n') + GEN_COPYIN_STR(copyinReadOnly, True).strip('\n') From 26a3b6f449447cd0c2d19c0ce659a868fc0907ad Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Wed, 18 Jun 2025 20:40:41 -0400 Subject: [PATCH 60/75] Updated allocate and deallocate macros --- src/common/include/macros.fpp | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/src/common/include/macros.fpp b/src/common/include/macros.fpp index 531653f370..759ee24175 100644 --- a/src/common/include/macros.fpp +++ b/src/common/include/macros.fpp @@ -14,14 +14,16 @@ #:def ALLOCATE(*args) @:LOG({'@:ALLOCATE(${re.sub(' +', ' ', ', '.join(args))}$)'}) - allocate (${', '.join(args)}$) - !$acc enter data create(${', '.join(args)}$) + #:set allocated_variables = ', '.join(args) + allocate (${allocated_variables}$) + $:GPU_ENTER_DATA(create=('[' + allocated_variables + ']')) #:enddef ALLOCATE #:def DEALLOCATE(*args) @:LOG({'@:DEALLOCATE(${re.sub(' +', ' ', ', '.join(args))}$)'}) - !$acc exit data delete(${', '.join(args)}$) - deallocate (${', '.join(args)}$) + #:set allocated_variables = ', '.join(args) + $:GPU_EXIT_DATA(delete=('[' + allocated_variables + ']')) + deallocate (${allocated_variables}$) #:enddef DEALLOCATE #:def ACC_SETUP_VFs(*args) @@ -32,13 +34,13 @@ @:LOG({'@:ACC_SETUP_VFs(${', '.join(args)}$)'}) #:for arg in args - !$acc enter data copyin(${arg}$) - !$acc enter data copyin(${arg}$%vf) + $:GPU_ENTER_DATA(copyin=('[' + arg + ']')) + $:GPU_ENTER_DATA(copyin=('[' + arg + '%vf]')) if (allocated(${arg}$%vf)) then do macros_setup_vfs_i = lbound(${arg}$%vf, 1), ubound(${arg}$%vf, 1) if (associated(${arg}$%vf(macros_setup_vfs_i)%sf)) then - !$acc enter data copyin(${arg}$%vf(macros_setup_vfs_i)) - !$acc enter data create(${arg}$%vf(macros_setup_vfs_i)%sf) + $:GPU_ENTER_DATA(copyin=('[' + arg + '%vf(macros_setup_vfs_i)]')) + $:GPU_ENTER_DATA(copyin=('[' + arg + '%vf(macros_setup_vfs_i)%sf]')) end if end do end if @@ -54,9 +56,9 @@ @:LOG({'@:ACC_SETUP_SFs(${', '.join(args)}$)'}) #:for arg in args - !$acc enter data copyin(${arg}$) + $:GPU_ENTER_DATA(copyin=('[' + arg + ']')) if (associated(${arg}$%sf)) then - !$acc enter data create(${arg}$%sf) + $:GPU_ENTER_DATA(copyin=('[' + arg + '%sf]')) end if #:endfor end block @@ -70,18 +72,18 @@ @:LOG({'@:ACC_SETUP_source_spatials(${', '.join(args)}$)'}) #:for arg in args - !$acc enter data copyin(${arg}$) + $:GPU_ENTER_DATA(copyin=('[' + arg + ']')) if (allocated(${arg}$%coord)) then - !$acc enter data create(${arg}$%coord) + $:GPU_ENTER_DATA(copyin=('[' + arg + '%coord]')) end if if (allocated(${arg}$%val)) then - !$acc enter data create(${arg}$%val) + $:GPU_ENTER_DATA(copyin=('[' + arg + '%val]')) end if if (allocated(${arg}$%angle)) then - !$acc enter data create(${arg}$%angle) + $:GPU_ENTER_DATA(copyin=('[' + arg + '%angle]')) end if if (allocated(${arg}$%xyz_to_r_ratios)) then - !$acc enter data create(${arg}$%xyz_to_r_ratios) + $:GPU_ENTER_DATA(copyin=('[' + arg + '%xyz_to_r_ratios]')) end if #:endfor end block From 96bfabe44ab6d48cdd56bc2c2a7e4df746f5ffcf Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Wed, 18 Jun 2025 21:05:04 -0400 Subject: [PATCH 61/75] Ran formatter --- src/simulation/m_fftw.fpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/simulation/m_fftw.fpp b/src/simulation/m_fftw.fpp index e66b670677..8c64193e28 100644 --- a/src/simulation/m_fftw.fpp +++ b/src/simulation/m_fftw.fpp @@ -161,8 +161,8 @@ contains p_cmplx => data_cmplx_gpu p_fltr_cmplx => data_fltr_cmplx_gpu -$:GPU_DATA(attach='[p_real, p_cmplx, p_fltr_cmplx]') -$:GPU_HOST_DATA(use_device='[p_real, p_cmplx, p_fltr_cmplx]') + $:GPU_DATA(attach='[p_real, p_cmplx, p_fltr_cmplx]') + $:GPU_HOST_DATA(use_device='[p_real, p_cmplx, p_fltr_cmplx]') #if defined(__PGI) ierr = cufftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu) #else @@ -182,7 +182,7 @@ $:GPU_HOST_DATA(use_device='[p_real, p_cmplx, p_fltr_cmplx]') end do end do -$:GPU_HOST_DATA(use_device='[p_real, p_fltr_cmplx]') + $:GPU_HOST_DATA(use_device='[p_real, p_fltr_cmplx]') #if defined(__PGI) ierr = cufftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu) #else @@ -221,7 +221,7 @@ $:GPU_HOST_DATA(use_device='[p_real, p_fltr_cmplx]') end do end do -$:GPU_HOST_DATA(use_device='[p_real, p_cmplx]') + $:GPU_HOST_DATA(use_device='[p_real, p_cmplx]') #if defined(__PGI) ierr = cufftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu) #else @@ -242,7 +242,7 @@ $:GPU_HOST_DATA(use_device='[p_real, p_cmplx]') end do end do -$:GPU_HOST_DATA(use_device='[p_real, p_fltr_cmplx]') + $:GPU_HOST_DATA(use_device='[p_real, p_fltr_cmplx]') #if defined(__PGI) ierr = cufftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu) #else From 62cac03ff731c812370b35987220e37f7b8f1a30 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Thu, 19 Jun 2025 17:56:48 -0400 Subject: [PATCH 62/75] Updated parallelism defaults, Updated cray inlining and used replaced cray inlines --- src/common/include/parallel_macros.fpp | 10 +- src/common/m_boundary_common.fpp | 50 ++--- src/common/m_chemistry.fpp | 10 +- src/common/m_phase_change.fpp | 60 ++--- src/common/m_variables_conversion.fpp | 109 ++++----- src/simulation/include/inline_riemann.fpp | 4 +- src/simulation/m_acoustic_src.fpp | 10 +- src/simulation/m_bubbles.fpp | 24 +- src/simulation/m_bubbles_EE.fpp | 20 +- src/simulation/m_bubbles_EL.fpp | 30 ++- src/simulation/m_bubbles_EL_kernels.fpp | 48 ++-- src/simulation/m_cbc.fpp | 64 +++--- src/simulation/m_compute_cbc.fpp | 64 ++---- src/simulation/m_hyperelastic.fpp | 16 +- src/simulation/m_hypoelastic.fpp | 6 +- src/simulation/m_ibm.fpp | 24 +- src/simulation/m_mhd.fpp | 6 +- src/simulation/m_pressure_relaxation.fpp | 32 +-- src/simulation/m_qbmm.fpp | 64 +++--- src/simulation/m_rhs.fpp | 20 +- src/simulation/m_riemann_solvers.fpp | 262 +++++++++++----------- src/simulation/m_sim_helpers.fpp | 13 +- src/simulation/m_viscous.fpp | 104 ++++----- src/simulation/m_weno.fpp | 4 +- 24 files changed, 474 insertions(+), 580 deletions(-) diff --git a/src/common/include/parallel_macros.fpp b/src/common/include/parallel_macros.fpp index 2f93b7a8c0..87868d1285 100644 --- a/src/common/include/parallel_macros.fpp +++ b/src/common/include/parallel_macros.fpp @@ -1,4 +1,3 @@ - #:def ASSERT_LIST(data, datatype) #:assert data is not None #:assert isinstance(data, list) @@ -243,7 +242,7 @@ $:acc_directive #:enddef -#:def GPU_ROUTINE(function_name=None, parallelism='[seq]', nohost=False, cray_inline=False, extraAccArgs=None) +#:def GPU_ROUTINE(function_name=None, parallelism=None, nohost=False, cray_inline=False, extraAccArgs=None) #:assert isinstance(cray_inline, bool) #:set parallelism_val = GEN_PARALLELISM_STR(parallelism) @@ -264,8 +263,11 @@ #:stop "When inlining for Cray Compiler, function name must be given and given as a string" #:endif #:set cray_directive = ('!DIR$ INLINEALWAYS ' + function_name).strip('\n') - #:set cray_directive = '#ifdef _CRAYFTN\n' + cray_directive + '\n#else\n' + acc_directive + '\n#endif' +#ifdef _CRAYFTN $:cray_directive +#else + $:acc_directive +#endif #:else $:acc_directive #:endif @@ -297,7 +299,7 @@ $:acc_directive #:enddef -#:def GPU_LOOP(collapse=None, parallelism='[seq]', data_dependency=None, reduction=None, reductionOp=None, private=None, extraAccArgs=None) +#:def GPU_LOOP(collapse=None, parallelism=None, data_dependency=None, reduction=None, reductionOp=None, private=None, extraAccArgs=None) #:set collapse_val = GEN_COLLAPSE_STR(collapse) diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index d76feef205..2d92f371e1 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -273,11 +273,8 @@ contains end subroutine s_populate_variables_buffers pure subroutine s_ghost_cell_extrapolation(q_prim_vf, bc_dir, bc_loc, k, l) -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_ghost_cell_extrapolation -#else - $:GPU_ROUTINE(parallelism='[seq]') -#endif + $:GPU_ROUTINE(function_name='s_ghost_cell_extrapolation', & + & parallelism='[seq]', cray_inline=True) type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf integer, intent(in) :: bc_dir, bc_loc integer, intent(in) :: k, l @@ -795,11 +792,8 @@ contains end subroutine s_axis pure subroutine s_slip_wall(q_prim_vf, bc_dir, bc_loc, k, l) -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_slip_wall -#else - $:GPU_ROUTINE(parallelism='[seq]') -#endif + $:GPU_ROUTINE(function_name='s_slip_wall',parallelism='[seq]', & + & cray_inline=True) type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf integer, intent(in) :: bc_dir, bc_loc integer, intent(in) :: k, l @@ -889,11 +883,9 @@ contains end subroutine s_slip_wall pure subroutine s_no_slip_wall(q_prim_vf, bc_dir, bc_loc, k, l) -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_no_slip_wall -#else - $:GPU_ROUTINE(parallelism='[seq]') -#endif + $:GPU_ROUTINE(function_name='s_no_slip_wall',parallelism='[seq]', & + & cray_inline=True) + type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf integer, intent(in) :: bc_dir, bc_loc integer, intent(in) :: k, l @@ -1019,11 +1011,8 @@ contains end subroutine s_no_slip_wall pure subroutine s_dirichlet(q_prim_vf, bc_dir, bc_loc, k, l) -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_dirichlet -#else - $:GPU_ROUTINE(parallelism='[seq]') -#endif + $:GPU_ROUTINE(function_name='s_dirichlet',parallelism='[seq]', & + & cray_inline=True) type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf integer, intent(in) :: bc_dir, bc_loc integer, intent(in) :: k, l @@ -1282,11 +1271,8 @@ contains end subroutine s_populate_capillary_buffers pure subroutine s_color_function_periodic(c_divs, bc_dir, bc_loc, k, l) -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_color_function_periodic -#else - $:GPU_ROUTINE(parallelism='[seq]') -#endif + $:GPU_ROUTINE(function_name='s_color_function_periodic', & + & parallelism='[seq]', cray_inline=True) type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs integer, intent(in) :: bc_dir, bc_loc integer, intent(in) :: k, l @@ -1340,11 +1326,8 @@ contains end subroutine s_color_function_periodic pure subroutine s_color_function_reflective(c_divs, bc_dir, bc_loc, k, l) -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_color_function_reflective -#else - $:GPU_ROUTINE(parallelism='[seq]') -#endif + $:GPU_ROUTINE(function_name='s_color_function_reflective', & + & parallelism='[seq]', cray_inline=True) type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs integer, intent(in) :: bc_dir, bc_loc integer, intent(in) :: k, l @@ -1422,11 +1405,8 @@ contains end subroutine s_color_function_reflective pure subroutine s_color_function_ghost_cell_extrapolation(c_divs, bc_dir, bc_loc, k, l) -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_color_function_ghost_cell_extrapolation -#else - $:GPU_ROUTINE(parallelism='[seq]') -#endif + $:GPU_ROUTINE(function_name='s_color_function_ghost_cell_extrapolation', & + & parallelism='[seq]', cray_inline=True) type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs integer, intent(in) :: bc_dir, bc_loc integer, intent(in) :: k, l diff --git a/src/common/m_chemistry.fpp b/src/common/m_chemistry.fpp index b1cac0a70b..71aa890e87 100644 --- a/src/common/m_chemistry.fpp +++ b/src/common/m_chemistry.fpp @@ -35,7 +35,7 @@ contains do z = bounds(3)%beg, bounds(3)%end do y = bounds(2)%beg, bounds(2)%end do x = bounds(1)%beg, bounds(1)%end - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do eqn = chemxb, chemxe Ys(eqn - chemxb + 1) = & q_cons_vf(eqn)%sf(x, y, z)/q_cons_vf(contxb)%sf(x, y, z) @@ -46,7 +46,7 @@ contains ! cons. contxb = \rho (1-fluid model) ! cons. momxb + i = \rho u_i energy = q_cons_vf(E_idx)%sf(x, y, z)/q_cons_vf(contxb)%sf(x, y, z) - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do eqn = momxb, momxe energy = energy - & 0.5_wp*(q_cons_vf(eqn)%sf(x, y, z)/q_cons_vf(contxb)%sf(x, y, z))**2._wp @@ -72,7 +72,7 @@ contains do z = bounds(3)%beg, bounds(3)%end do y = bounds(2)%beg, bounds(2)%end do x = bounds(1)%beg, bounds(1)%end - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = chemxb, chemxe Ys(i - chemxb + 1) = q_prim_vf(i)%sf(x, y, z) end do @@ -104,7 +104,7 @@ contains do y = bounds(2)%beg, bounds(2)%end do x = bounds(1)%beg, bounds(1)%end - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do eqn = chemxb, chemxe Ys(eqn - chemxb + 1) = q_prim_qp(eqn)%sf(x, y, z) end do @@ -114,7 +114,7 @@ contains call get_net_production_rates(rho, T, Ys, omega) - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do eqn = chemxb, chemxe omega_m = molecular_weights(eqn - chemxb + 1)*omega(eqn - chemxb + 1) diff --git a/src/common/m_phase_change.fpp b/src/common/m_phase_change.fpp index 884fb8bc96..431f6c9ec9 100644 --- a/src/common/m_phase_change.fpp +++ b/src/common/m_phase_change.fpp @@ -108,7 +108,7 @@ contains do l = 0, p rho = 0.0_wp; TvF = 0.0_wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids ! Mixture density @@ -134,7 +134,7 @@ contains ! kinetic energy as an auxiliary variable to the calculation of the total internal energy dynE = 0.0_wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = momxb, momxe dynE = dynE + 5.0e-1_wp*q_cons_vf(i)%sf(j, k, l)**2/rho @@ -256,7 +256,7 @@ contains ! calculating volume fractions, internal energies, and total entropy rhos = 0.0_wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids ! volume fractions @@ -287,12 +287,8 @@ contains !! @param rhoe mixture energy !! @param TS equilibrium temperature at the interface pure subroutine s_infinite_pt_relaxation_k(j, k, l, MFL, pS, p_infpT, q_cons_vf, rhoe, TS) - -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_infinite_pt_relaxation_k -#else - $:GPU_ROUTINE(parallelism='[seq]') -#endif + $:GPU_ROUTINE(function_name='s_infinite_pt_relaxation_k', & + & parallelism='[seq]', cray_inline=True) ! initializing variables integer, intent(in) :: j, k, l, MFL @@ -308,7 +304,7 @@ contains ! auxiliary variables for the pT-equilibrium solver mCP = 0.0_wp; mQ = 0.0_wp; p_infpT = ps_inf; ! Performing tests before initializing the pT-equilibrium - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids ! sum of the total alpha*rho*cp of the system @@ -357,7 +353,7 @@ contains ! updating functions used in the Newton's solver gpp = 0.0_wp; gp = 0.0_wp; hp = 0.0_wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids gp = gp + (gs_min(i) - 1.0_wp)*q_cons_vf(i + contxb - 1)%sf(j, k, l)*cvs(i) & @@ -391,12 +387,8 @@ contains !! @param q_cons_vf Cell-average conservative variables !! @param TS equilibrium temperature at the interface pure subroutine s_infinite_ptg_relaxation_k(j, k, l, pS, p_infpT, rhoe, q_cons_vf, TS) - -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_infinite_ptg_relaxation_k -#else - $:GPU_ROUTINE(parallelism='[seq]') -#endif + $:GPU_ROUTINE(function_name='s_infinite_ptg_relaxation_k', & + & parallelism='[seq]', cray_inline=True) integer, intent(in) :: j, k, l real(wp), intent(inout) :: pS @@ -451,7 +443,7 @@ contains mCP = 0.0_wp; mCPD = 0.0_wp; mCVGP = 0.0_wp; mCVGP2 = 0.0_wp; mQ = 0.0_wp; mQD = 0.0_wp ! Those must be updated through the iterations, as they either depend on ! the partial masses for all fluids, or on the equilibrium pressure - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids ! sum of the total alpha*rho*cp of the system @@ -516,12 +508,8 @@ contains !! @param k generic loop iterator for y direction !! @param l generic loop iterator for z direction pure subroutine s_correct_partial_densities(MCT, q_cons_vf, rM, j, k, l) - -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_correct_partial_densities -#else - $:GPU_ROUTINE(parallelism='[seq]') -#endif + $:GPU_ROUTINE(function_name='s_correct_partial_densities', & + & parallelism='[seq]', cray_inline=True) !> @name variables for the correction of the reacting partial densities !> @{ @@ -579,12 +567,8 @@ contains !! @param q_cons_vf Cell-average conservative variables !! @param TJac Transpose of the Jacobian Matrix pure subroutine s_compute_jacobian_matrix(InvJac, j, Jac, k, l, mCPD, mCVGP, mCVGP2, pS, q_cons_vf, TJac) - -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_compute_jacobian_matrix -#else - $:GPU_ROUTINE(parallelism='[seq]') -#endif + $:GPU_ROUTINE(function_name='s_compute_jacobian_matrix', & + & parallelism='[seq]', cray_inline=True) real(wp), dimension(2, 2), intent(out) :: InvJac integer, intent(in) :: j @@ -686,12 +670,8 @@ contains !! @param rhoe mixture energy !! @param R2D (2D) residue array pure subroutine s_compute_pTg_residue(j, k, l, mCPD, mCVGP, mQD, q_cons_vf, pS, rhoe, R2D) - -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_compute_pTg_residue -#else - $:GPU_ROUTINE(parallelism='[seq]') -#endif + $:GPU_ROUTINE(function_name='s_compute_pTg_residue', & + & parallelism='[seq]', cray_inline=True) integer, intent(in) :: j, k, l real(wp), intent(in) :: mCPD, mCVGP, mQD @@ -737,12 +717,8 @@ contains !! @param TSat Saturation Temperature !! @param TSIn equilibrium Temperature pure elemental subroutine s_TSat(pSat, TSat, TSIn) - -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_TSat -#else - $:GPU_ROUTINE(parallelism='[seq]') -#endif + $:GPU_ROUTINE(function_name='s_TSat',parallelism='[seq]', & + & cray_inline=True) real(wp), intent(in) :: pSat real(wp), intent(out) :: TSat diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp index 067ae58142..08034d3106 100644 --- a/src/common/m_variables_conversion.fpp +++ b/src/common/m_variables_conversion.fpp @@ -116,12 +116,8 @@ contains !! @param stress Shear Stress !! @param mom Momentum subroutine s_compute_pressure(energy, alf, dyn_p, pi_inf, gamma, rho, qv, rhoYks, pres, T, stress, mom, G, pres_mag) - -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_compute_pressure -#else - $:GPU_ROUTINE(parallelism='[seq]') -#endif + $:GPU_ROUTINE(function_name='s_compute_pressure',parallelism='[seq]', & + & cray_inline=True) real(wp), intent(in) :: energy, alf real(wp), intent(in) :: dyn_p @@ -458,11 +454,9 @@ contains gamma_K, pi_inf_K, qv_K, & alpha_K, alpha_rho_K, Re_K, & G_K, G) -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_convert_species_to_mixture_variables_acc -#else - $:GPU_ROUTINE(parallelism='[seq]') -#endif + $:GPU_ROUTINE(function_name='s_convert_species_to_mixture_variables_acc', & + & parallelism='[seq]', cray_inline=True) + real(wp), intent(out) :: rho_K, gamma_K, pi_inf_K, qv_K @@ -539,11 +533,8 @@ contains pure subroutine s_convert_species_to_mixture_variables_bubbles_acc(rho_K, & gamma_K, pi_inf_K, qv_K, & alpha_K, alpha_rho_K, Re_K) -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_convert_species_to_mixture_variables_bubbles_acc -#else - $:GPU_ROUTINE(parallelism='[seq]') -#endif + $:GPU_ROUTINE(function_name='s_convert_species_to_mixture_variables_bubbles_acc', & + & parallelism='[seq]', cray_inline=True) real(wp), intent(inout) :: rho_K, gamma_K, pi_inf_K, qv_K @@ -748,7 +739,7 @@ contains nbub_sc = qK_cons_vf(bubxb)%sf(j, k, l) - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, nb mu = qK_cons_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l)/nbub_sc sig = (qK_cons_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l)/nbub_sc - mu**2)**0.5_wp @@ -781,7 +772,7 @@ contains nbub_sc = qK_cons_vf(bubxb)%sf(j, k, l) - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, nb mu = qK_cons_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l)/nbub_sc sig = (qK_cons_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l)/nbub_sc - mu**2)**0.5_wp @@ -877,7 +868,7 @@ contains do j = ibounds(1)%beg, ibounds(1)%end dyn_pres_K = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids alpha_rho_K(i) = qK_cons_vf(i)%sf(j, k, l) alpha_K(i) = qK_cons_vf(advxb + i - 1)%sf(j, k, l) @@ -921,13 +912,13 @@ contains B2 = B(1)**2 + B(2)**2 + B(3)**2 m2 = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = momxb, momxe m2 = m2 + qK_cons_vf(i)%sf(j, k, l)**2 end do S = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, 3 S = S + qK_cons_vf(momxb + i - 1)%sf(j, k, l)*B(i) end do @@ -935,14 +926,14 @@ contains E = qK_cons_vf(E_idx)%sf(j, k, l) D = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, contxe D = D + qK_cons_vf(i)%sf(j, k, l) end do ! Newton-Raphson W = E + D - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do iter = 1, relativity_cons_to_prim_max_iter Ga = (W + B2)*W/sqrt((W + B2)**2*W**2 - (m2*W**2 + S**2*(2*W + B2))) pres = (W - D*Ga)/((gamma_K + 1)*Ga**2) ! Thermal pressure from EOS @@ -968,13 +959,13 @@ contains qK_prim_vf(E_idx)%sf(j, k, l) = (W - D*Ga)/((gamma_K + 1)*Ga**2) ! Recover the other primitive variables - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, 3 qK_prim_vf(momxb + i - 1)%sf(j, k, l) = (qK_cons_vf(momxb + i - 1)%sf(j, k, l) + (S/W)*B(i))/(W + B2) end do qK_prim_vf(1)%sf(j, k, l) = D/Ga ! Hard-coded for single-component for now - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = B_idx%beg, B_idx%end qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) end do @@ -984,22 +975,22 @@ contains if (chemistry) then rho_K = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = chemxb, chemxe rho_K = rho_K + max(0._wp, qK_cons_vf(i)%sf(j, k, l)) end do - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, contxe qK_prim_vf(i)%sf(j, k, l) = rho_K end do - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = chemxb, chemxe qK_prim_vf(i)%sf(j, k, l) = max(0._wp, qK_cons_vf(i)%sf(j, k, l)/rho_K) end do else - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, contxe qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) end do @@ -1009,7 +1000,7 @@ contains rho_K = max(rho_K, sgm_eps) #endif - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = momxb, momxe if (model_eqns /= 4) then qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) & @@ -1023,7 +1014,7 @@ contains end do if (chemistry) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_species rhoYks(i) = qK_cons_vf(chemxb + i - 1)%sf(j, k, l) end do @@ -1053,7 +1044,7 @@ contains end if if (bubbles_euler) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, nb nRtmp(i) = qK_cons_vf(bubrs(i))%sf(j, k, l) end do @@ -1065,7 +1056,7 @@ contains nbub_sc = qK_cons_vf(bubxb)%sf(j, k, l) !Convert cons to prim - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = bubxb, bubxe qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/nbub_sc end do @@ -1082,7 +1073,7 @@ contains call s_comp_n_from_cons(vftmp, nRtmp, nbub_sc, weight) end if - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = bubxb, bubxe qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/nbub_sc end do @@ -1090,21 +1081,21 @@ contains end if if (mhd) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = B_idx%beg, B_idx%end qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) end do end if if (elasticity) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = strxb, strxe qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/rho_K end do end if if (hypoelasticity) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = strxb, strxe ! subtracting elastic contribution for pressure calculation if (G_K > verysmall) then @@ -1121,13 +1112,13 @@ contains end if if (hyperelasticity) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = xibeg, xiend qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/rho_K end do end if - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = advxb, advxe qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) end do @@ -1475,22 +1466,22 @@ contains do k = is2b, is2e do j = is1b, is1e - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, contxe alpha_rho_K(i) = qK_prim_vf(j, k, l, i) end do - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = advxb, advxe alpha_K(i - E_idx) = qK_prim_vf(j, k, l, i) end do - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_vels vel_K(i) = qK_prim_vf(j, k, l, contxe + i) end do vel_K_sum = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_vels vel_K_sum = vel_K_sum + vel_K(i)**2._wp end do @@ -1511,7 +1502,7 @@ contains ! Computing the energy from the pressure if (chemistry) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = chemxb, chemxe Y_K(i - chemxb + 1) = qK_prim_vf(j, k, l, i) end do @@ -1528,12 +1519,12 @@ contains end if ! mass flux, this should be \alpha_i \rho_i u_i - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, contxe FK_vf(j, k, l, i) = alpha_rho_K(i)*vel_K(dir_idx(1)) end do - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_vels FK_vf(j, k, l, contxe + dir_idx(i)) = & rho_K*vel_K(dir_idx(1)) & @@ -1546,14 +1537,14 @@ contains ! Species advection Flux, \rho*u*Y if (chemistry) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_species FK_vf(j, k, l, i - 1 + chemxb) = vel_K(dir_idx(1))*(rho_K*Y_K(i)) end do end if if (riemann_solver == 1 .or. riemann_solver == 4) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = advxb, advxe FK_vf(j, k, l, i) = 0._wp FK_src_vf(j, k, l, i) = alpha_K(i - E_idx) @@ -1561,12 +1552,12 @@ contains else ! Could be bubbles_euler! - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = advxb, advxe FK_vf(j, k, l, i) = vel_K(dir_idx(1))*alpha_K(i - E_idx) end do - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = advxb, advxe FK_src_vf(j, k, l, i) = vel_K(dir_idx(1)) end do @@ -1603,11 +1594,9 @@ contains #ifndef MFC_PRE_PROCESS pure subroutine s_compute_speed_of_sound(pres, rho, gamma, pi_inf, H, adv, vel_sum, c_c, c) -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_compute_speed_of_sound -#else - $:GPU_ROUTINE(parallelism='[seq]') -#endif + $:GPU_ROUTINE(function_name='s_compute_speed_of_sound', & + & parallelism='[seq]', cray_inline=True) + real(wp), intent(in) :: pres real(wp), intent(in) :: rho, gamma, pi_inf @@ -1640,7 +1629,7 @@ contains c = (1._wp/(rho*(adv(1)/blkmod1 + adv(2)/blkmod2))) elseif (model_eqns == 3) then c = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do q = 1, num_fluids c = c + adv(q)*(1._wp/gammas(q) + 1._wp)* & (pres + pi_infs(q)/(gammas(q) + 1._wp)) @@ -1673,11 +1662,9 @@ contains #ifndef MFC_PRE_PROCESS pure subroutine s_compute_fast_magnetosonic_speed(rho, c, B, norm, c_fast, h) -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_compute_fast_magnetosonic_speed -#else - $:GPU_ROUTINE(parallelism='[seq]') -#endif + $:GPU_ROUTINE(function_name='s_compute_fast_magnetosonic_speed', & + & parallelism='[seq]', cray_inline=True) + real(wp), intent(in) :: B(3), rho, c real(wp), intent(in) :: h ! only used for relativity diff --git a/src/simulation/include/inline_riemann.fpp b/src/simulation/include/inline_riemann.fpp index b0ed99390b..047184e4e7 100644 --- a/src/simulation/include/inline_riemann.fpp +++ b/src/simulation/include/inline_riemann.fpp @@ -1,7 +1,7 @@ #:def arithmetic_avg() rho_avg = 5e-1_wp*(rho_L + rho_R) vel_avg_rms = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_vels vel_avg_rms = vel_avg_rms + (5e-1_wp*(vel_L(i) + vel_R(i)))**2._wp end do @@ -17,7 +17,7 @@ vel_avg_rms = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_vels vel_avg_rms = vel_avg_rms + (sqrt(rho_L)*vel_L(i) + sqrt(rho_R)*vel_R(i))**2._wp/ & (sqrt(rho_L) + sqrt(rho_R))**2._wp diff --git a/src/simulation/m_acoustic_src.fpp b/src/simulation/m_acoustic_src.fpp index cdaf92828a..cd22c96e22 100644 --- a/src/simulation/m_acoustic_src.fpp +++ b/src/simulation/m_acoustic_src.fpp @@ -230,7 +230,7 @@ contains B_tait = 0._wp small_gamma = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do q = 1, num_fluids myalpha_rho(q) = q_cons_vf(q)%sf(j, k, l) myalpha(q) = q_cons_vf(advxb + q - 1)%sf(j, k, l) @@ -238,7 +238,7 @@ contains if (bubbles_euler) then if (num_fluids > 2) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do q = 1, num_fluids - 1 myRho = myRho + myalpha_rho(q) B_tait = B_tait + myalpha(q)*pi_infs(q) @@ -252,7 +252,7 @@ contains end if if ((.not. bubbles_euler) .or. (mpp_lim .and. (num_fluids > 2))) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do q = 1, num_fluids myRho = myRho + myalpha_rho(q) B_tait = B_tait + myalpha(q)*pi_infs(q) @@ -323,11 +323,11 @@ contains do l = 0, p do k = 0, n do j = 0, m - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do q = contxb, contxe rhs_vf(q)%sf(j, k, l) = rhs_vf(q)%sf(j, k, l) + mass_src(j, k, l) end do - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do q = momxb, momxe rhs_vf(q)%sf(j, k, l) = rhs_vf(q)%sf(j, k, l) + mom_src(q - contxe, j, k, l) end do diff --git a/src/simulation/m_bubbles.fpp b/src/simulation/m_bubbles.fpp index 7cf1c80f11..3a9b017d1c 100644 --- a/src/simulation/m_bubbles.fpp +++ b/src/simulation/m_bubbles.fpp @@ -465,11 +465,9 @@ contains fntait, fBtait, f_bub_adv_src, f_divu, & bub_id, fmass_v, fmass_n, fbeta_c, & fbeta_t, fCson, adap_dt_stop) -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_advance_step -#else - $:GPU_ROUTINE(parallelism='[seq]') -#endif + $:GPU_ROUTINE(function_name='s_advance_step',parallelism='[seq]', & + & cray_inline=True) + real(wp), intent(inout) :: fR, fV, fpb, fmass_v real(wp), intent(in) :: fRho, fP, fR0, fpbdot, alf real(wp), intent(in) :: fntait, fBtait, f_bub_adv_src, f_divu @@ -599,11 +597,9 @@ contains pure subroutine s_initial_substep_h(fRho, fP, fR, fV, fR0, fpb, fpbdot, alf, & fntait, fBtait, f_bub_adv_src, f_divu, & fCson, h) -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_initial_substep_h -#else - $:GPU_ROUTINE(parallelism='[seq]') -#endif + $:GPU_ROUTINE(function_name='s_initial_substep_h',parallelism='[seq]', & + & cray_inline=True) + real(wp), intent(IN) :: fRho, fP, fR, fV, fR0, fpb, fpbdot, alf real(wp), intent(IN) :: fntait, fBtait, f_bub_adv_src, f_divu real(wp), intent(IN) :: fCson @@ -685,11 +681,9 @@ contains bub_id, fmass_v, fmass_n, fbeta_c, & fbeta_t, fCson, h, & myR_tmp, myV_tmp, myPb_tmp, myMv_tmp) -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_advance_substep -#else - $:GPU_ROUTINE(parallelism='[seq]') -#endif + $:GPU_ROUTINE(function_name='s_advance_substep',parallelism='[seq]', & + & cray_inline=True) + real(wp), intent(OUT) :: err real(wp), intent(IN) :: fRho, fP, fR, fV, fR0, fpb, fpbdot, alf real(wp), intent(IN) :: fntait, fBtait, f_bub_adv_src, f_divu, h diff --git a/src/simulation/m_bubbles_EE.fpp b/src/simulation/m_bubbles_EE.fpp index c7d85c90f0..fd6a8e14c3 100644 --- a/src/simulation/m_bubbles_EE.fpp +++ b/src/simulation/m_bubbles_EE.fpp @@ -81,7 +81,7 @@ contains do k = 0, n do j = 0, m nR3bar = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, nb nR3bar = nR3bar + weight(i)*(q_cons_vf(rs(i))%sf(j, k, l))**3._wp end do @@ -178,7 +178,7 @@ contains do j = 0, m bub_adv_src(j, k, l) = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do q = 1, nb bub_r_src(j, k, l, q) = 0._wp bub_v_src(j, k, l, q) = 0._wp @@ -200,7 +200,7 @@ contains if (adv_n) then nbub = q_prim_vf(n_idx)%sf(j, k, l) else - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do q = 1, nb Rtmp(q) = q_prim_vf(rs(q))%sf(j, k, l) Vtmp(q) = q_prim_vf(vs(q))%sf(j, k, l) @@ -208,7 +208,7 @@ contains R3 = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do q = 1, nb R3 = R3 + weight(q)*Rtmp(q)**3._wp end do @@ -219,7 +219,7 @@ contains if (.not. adap_dt) then R2Vav = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do q = 1, nb R2Vav = R2Vav + weight(q)*Rtmp(q)**2._wp*Vtmp(q) end do @@ -227,10 +227,10 @@ contains bub_adv_src(j, k, l) = 4._wp*pi*nbub*R2Vav end if - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do q = 1, nb - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do ii = 1, num_fluids myalpha_rho(ii) = q_cons_vf(ii)%sf(j, k, l) myalpha(ii) = q_cons_vf(advxb + ii - 1)%sf(j, k, l) @@ -241,14 +241,14 @@ contains B_tait = 0._wp if (mpp_lim .and. (num_fluids > 2)) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do ii = 1, num_fluids myRho = myRho + myalpha_rho(ii) n_tait = n_tait + myalpha(ii)*gammas(ii) B_tait = B_tait + myalpha(ii)*pi_infs(ii) end do else if (num_fluids > 2) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do ii = 1, num_fluids - 1 myRho = myRho + myalpha_rho(ii) n_tait = n_tait + myalpha(ii)*gammas(ii) @@ -331,7 +331,7 @@ contains rhs_vf(alf_idx)%sf(i, q, l) = rhs_vf(alf_idx)%sf(i, q, l) + bub_adv_src(i, q, l) if (num_fluids > 1) rhs_vf(advxb)%sf(i, q, l) = & rhs_vf(advxb)%sf(i, q, l) - bub_adv_src(i, q, l) - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do k = 1, nb rhs_vf(rs(k))%sf(i, q, l) = rhs_vf(rs(k))%sf(i, q, l) + bub_r_src(i, q, l, k) rhs_vf(vs(k))%sf(i, q, l) = rhs_vf(vs(k))%sf(i, q, l) + bub_v_src(i, q, l, k) diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index f573e2879e..20e7515c83 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -575,7 +575,7 @@ contains call s_get_pinf(k, q_prim_vf, 1, myPinf, cell, aux1, aux2) ! Obtain liquid density and computing speed of sound from pinf - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids myalpha_rho(i) = q_prim_vf(i)%sf(cell(1), cell(2), cell(3)) myalpha(i) = q_prim_vf(E_idx + i)%sf(cell(1), cell(2), cell(3)) @@ -740,11 +740,9 @@ contains !! @param pi_inf Liquid stiffness !! @param cson Calculated speed of sound pure subroutine s_compute_cson_from_pinf(q_prim_vf, pinf, cell, rhol, gamma, pi_inf, cson) -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_compute_cson_from_pinf -#else - $:GPU_ROUTINE(parallelism='[seq]') -#endif + $:GPU_ROUTINE(function_name='s_compute_cson_from_pinf', & + & parallelism='[seq]', cray_inline=True) + type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf real(wp), intent(in) :: pinf, rhol, gamma, pi_inf integer, dimension(3), intent(in) :: cell @@ -754,7 +752,7 @@ contains real(wp), dimension(num_dims) :: vel integer :: i - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims vel(i) = q_prim_vf(i + contxe)%sf(cell(1), cell(2), cell(3)) end do @@ -810,11 +808,9 @@ contains !! @param cell Bubble cell !! @param Romega Control volume radius pure subroutine s_get_pinf(bub_id, q_prim_vf, ptype, f_pinfl, cell, preterm1, term2, Romega) -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_get_pinf -#else - $:GPU_ROUTINE(parallelism='[seq]') -#endif + $:GPU_ROUTINE(function_name='s_get_pinf',parallelism='[seq]', & + & cray_inline=True) + integer, intent(in) :: bub_id, ptype type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf real(wp), intent(out) :: f_pinfl @@ -835,7 +831,7 @@ contains !< Find current bubble cell cell(:) = int(scoord(:)) - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims if (scoord(i) < 0._wp) cell(i) = cell(i) - 1 end do @@ -926,11 +922,11 @@ contains charpres2 = 0._wp vol = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, smearGrid - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do j = 1, smearGrid - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do k = 1, smearGridz cellaux(1) = cell(1) + i - (mapCells + 1) cellaux(2) = cell(2) + j - (mapCells + 1) @@ -1652,7 +1648,7 @@ contains integer :: i - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = bub_id, nBubs - 1 lag_id(i, 1) = lag_id(i + 1, 1) bub_R0(i) = bub_R0(i + 1) diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp index 3d2f1089ac..48ea3bad9a 100644 --- a/src/simulation/m_bubbles_EL_kernels.fpp +++ b/src/simulation/m_bubbles_EL_kernels.fpp @@ -200,11 +200,9 @@ contains !> The purpose of this subroutine is to apply the gaussian kernel function for each bubble (Maeda and Colonius, 2018)). pure subroutine s_applygaussian(center, cellaux, nodecoord, stddsv, strength_idx, func) -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_applygaussian -#else - $:GPU_ROUTINE(parallelism='[seq]') -#endif + $:GPU_ROUTINE(function_name='s_applygaussian',parallelism='[seq]', & + & cray_inline=True) + real(wp), dimension(3), intent(in) :: center integer, dimension(3), intent(in) :: cellaux real(wp), dimension(3), intent(in) :: nodecoord @@ -270,11 +268,9 @@ contains !! @param cellaux Tested cell to smear the bubble effect in. !! @param celloutside If true, then cellaux is outside the computational domain. pure subroutine s_check_celloutside(cellaux, celloutside) -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_check_celloutside -#else - $:GPU_ROUTINE(parallelism='[seq]') -#endif + $:GPU_ROUTINE(function_name='s_check_celloutside',parallelism='[seq]', & + & cray_inline=True) + integer, dimension(3), intent(inout) :: cellaux logical, intent(out) :: celloutside @@ -306,11 +302,9 @@ contains !! @param cell Cell of the current bubble !! @param cellaux Cell to map the bubble effect in. pure subroutine s_shift_cell_symmetric_bc(cellaux, cell) -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_shift_cell_symmetric_bc -#else - $:GPU_ROUTINE(parallelism='[seq]') -#endif + $:GPU_ROUTINE(function_name='s_shift_cell_symmetric_bc', & + & parallelism='[seq]', cray_inline=True) + integer, dimension(3), intent(inout) :: cellaux integer, dimension(3), intent(in) :: cell @@ -347,11 +341,9 @@ contains !! @param volpart Volume of the bubble !! @param stddsv Standard deviaton pure subroutine s_compute_stddsv(cell, volpart, stddsv) -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_compute_stddsv -#else - $:GPU_ROUTINE(parallelism='[seq]') -#endif + $:GPU_ROUTINE(function_name='s_compute_stddsv',parallelism='[seq]', & + & cray_inline=True) + integer, dimension(3), intent(in) :: cell real(wp), intent(in) :: volpart real(wp), intent(out) :: stddsv @@ -388,11 +380,9 @@ contains !! @param cell Computational coordinates (x, y, z) !! @param Charvol Characteristic volume pure elemental subroutine s_get_char_vol(cellx, celly, cellz, Charvol) -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_get_char_vol -#else - $:GPU_ROUTINE(parallelism='[seq]') -#endif + $:GPU_ROUTINE(function_name='s_get_char_vol',parallelism='[seq]', & + & cray_inline=True) + integer, intent(in) :: cellx, celly, cellz real(wp), intent(out) :: Charvol @@ -413,11 +403,9 @@ contains !! @param s Computational coordinates of the bubble, real type !! @param get_cell Computational coordinates of the bubble, integer type pure subroutine s_get_cell(s_cell, get_cell) -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_get_cell -#else - $:GPU_ROUTINE(parallelism='[seq]') -#endif + $:GPU_ROUTINE(function_name='s_get_cell',parallelism='[seq]', & + & cray_inline=True) + real(wp), dimension(3), intent(in) :: s_cell integer, dimension(3), intent(out) :: get_cell integer :: i diff --git a/src/simulation/m_cbc.fpp b/src/simulation/m_cbc.fpp index 1062130a7d..557ad6d1ae 100644 --- a/src/simulation/m_cbc.fpp +++ b/src/simulation/m_cbc.fpp @@ -779,25 +779,25 @@ contains do k = is2%beg, is2%end ! Transferring the Primitive Variables - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, contxe alpha_rho(i) = q_prim_rs${XYZ}$_vf(0, k, r, i) end do - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims vel(i) = q_prim_rs${XYZ}$_vf(0, k, r, contxe + i) end do vel_K_sum = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims vel_K_sum = vel_K_sum + vel(i)**2._wp end do pres = q_prim_rs${XYZ}$_vf(0, k, r, E_idx) - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, advxe - E_idx adv(i) = q_prim_rs${XYZ}$_vf(0, k, r, E_idx + i) end do @@ -808,13 +808,13 @@ contains call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv, adv, alpha_rho, Re_cbc) end if - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, contxe mf(i) = alpha_rho(i)/rho end do if (chemistry) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = chemxb, chemxe Ys(i - chemxb + 1) = q_prim_rs${XYZ}$_vf(0, k, r, i) end do @@ -847,39 +847,39 @@ contains ! First-Order Spatial Derivatives of Primitive Variables - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, contxe dalpha_rho_ds(i) = 0._wp end do - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims dvel_ds(i) = 0._wp end do dpres_ds = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, advxe - E_idx dadv_ds(i) = 0._wp end do if (chemistry) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_species dYs_ds(i) = 0._wp end do end if - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do j = 0, buff_size - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, contxe dalpha_rho_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, i)* & fd_coef_${XYZ}$ (j, cbc_loc) + & dalpha_rho_ds(i) end do - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims dvel_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, contxe + i)* & fd_coef_${XYZ}$ (j, cbc_loc) + & @@ -889,7 +889,7 @@ contains dpres_ds = q_prim_rs${XYZ}$_vf(j, k, r, E_idx)* & fd_coef_${XYZ}$ (j, cbc_loc) + & dpres_ds - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, advxe - E_idx dadv_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, E_idx + i)* & fd_coef_${XYZ}$ (j, cbc_loc) + & @@ -897,7 +897,7 @@ contains end do if (chemistry) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_species dYs_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, chemxb - 1 + i)* & fd_coef_${XYZ}$ (j, cbc_loc) + & @@ -924,7 +924,7 @@ contains call s_compute_nonreflecting_subsonic_inflow_L(lambda, L, rho, c, dpres_ds, dvel_ds) ! Add GRCBC for Subsonic Inflow if (bc_${XYZ}$%grcbc_in) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 2, momxb L(2) = c**3._wp*Ma*(alpha_rho(i - 1) - alpha_rho_in(i - 1, ${CBC_DIR}$))/Del_in(${CBC_DIR}$) - c*Ma*(pres - pres_in(${CBC_DIR}$))/Del_in(${CBC_DIR}$) end do @@ -934,7 +934,7 @@ contains L(momxb + 2) = c*Ma*(vel(dir_idx(3)) - vel_in(${CBC_DIR}$, dir_idx(3)))/Del_in(${CBC_DIR}$) end if end if - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = E_idx, advxe - 1 L(i) = c*Ma*(adv(i + 1 - E_idx) - alpha_in(i + 1 - E_idx, ${CBC_DIR}$))/Del_in(${CBC_DIR}$) end do @@ -974,13 +974,13 @@ contains dpres_dt = -5e-1_wp*(L(advxe) + L(1)) end if - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, contxe dalpha_rho_dt(i) = & -(L(i + 1) - mf(i)*dpres_dt)/(c*c) end do - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims dvel_dt(dir_idx(i)) = dir_flg(dir_idx(i))* & (L(1) - L(advxe))/(2._wp*rho*c) + & @@ -989,13 +989,13 @@ contains end do vel_dv_dt_sum = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims vel_dv_dt_sum = vel_dv_dt_sum + vel(i)*dvel_dt(i) end do if (chemistry) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_species dYs_dt(i) = -1._wp*L(chemxb + i - 1) end do @@ -1003,12 +1003,12 @@ contains ! The treatment of void fraction source is unclear if (cyl_coord .and. cbc_dir == 2 .and. cbc_loc == 1) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, advxe - E_idx dadv_dt(i) = -L(momxe + i) !+ adv(i) * vel(dir_idx(1))/y_cc(n) end do else - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, advxe - E_idx dadv_dt(i) = -L(momxe + i) end do @@ -1021,7 +1021,7 @@ contains dgamma_dt = dadv_dt(1) dpi_inf_dt = dadv_dt(2) else - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids drho_dt = drho_dt + dalpha_rho_dt(i) dgamma_dt = dgamma_dt + dadv_dt(i)*gammas(i) @@ -1031,13 +1031,13 @@ contains end if ! flux_rs_vf_l and flux_src_rs_vf_l at j = -1/2 - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, contxe flux_rs${XYZ}$_vf_l(-1, k, r, i) = flux_rs${XYZ}$_vf_l(0, k, r, i) & + ds(0)*dalpha_rho_dt(i) end do - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = momxb, momxe flux_rs${XYZ}$_vf_l(-1, k, r, i) = flux_rs${XYZ}$_vf_l(0, k, r, i) & + ds(0)*(vel(i - contxe)*drho_dt & @@ -1048,14 +1048,14 @@ contains ! Evolution of LODI equation of energy for real gases adjusted to perfect gas, doi:10.1006/jcph.2002.6990 call get_species_enthalpies_rt(T, h_k) sum_Enthalpies = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_species h_k(i) = h_k(i)*gas_constant/molecular_weights(i)*T sum_Enthalpies = sum_Enthalpies + (rho*h_k(i) - pres*Mw/molecular_weights(i)*Cp/R_gas)*dYs_dt(i) end do flux_rs${XYZ}$_vf_l(-1, k, r, E_idx) = flux_rs${XYZ}$_vf_l(0, k, r, E_idx) & + ds(0)*((E/rho + pres/rho)*drho_dt + rho*vel_dv_dt_sum + Cp*T*L(2)/(c*c) + sum_Enthalpies) - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_species flux_rs${XYZ}$_vf_l(-1, k, r, i - 1 + chemxb) = flux_rs${XYZ}$_vf_l(0, k, r, chemxb + i - 1) & + ds(0)*(drho_dt*Ys(i) + rho*dYs_dt(i)) @@ -1071,12 +1071,12 @@ contains end if if (riemann_solver == 1) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = advxb, advxe flux_rs${XYZ}$_vf_l(-1, k, r, i) = 0._wp end do - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = advxb, advxe flux_src_rs${XYZ}$_vf_l(-1, k, r, i) = & 1._wp/max(abs(vel(dir_idx(1))), sgm_eps) & @@ -1089,13 +1089,13 @@ contains else - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = advxb, advxe flux_rs${XYZ}$_vf_l(-1, k, r, i) = flux_rs${XYZ}$_vf_l(0, k, r, i) + & ds(0)*dadv_dt(i - E_idx) end do - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = advxb, advxe flux_src_rs${XYZ}$_vf_l(-1, k, r, i) = flux_src_rs${XYZ}$_vf_l(0, k, r, i) end do diff --git a/src/simulation/m_compute_cbc.fpp b/src/simulation/m_compute_cbc.fpp index de6af78c45..b73351b606 100644 --- a/src/simulation/m_compute_cbc.fpp +++ b/src/simulation/m_compute_cbc.fpp @@ -85,11 +85,9 @@ contains !> Slip wall CBC (Thompson 1990, pg. 451) pure subroutine s_compute_slip_wall_L(lambda, L, rho, c, dpres_ds, dvel_ds) -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_compute_slip_wall_L -#else - $:GPU_ROUTINE(parallelism='[seq]') -#endif + $:GPU_ROUTINE(function_name='s_compute_slip_wall_L',parallelism='[seq]', & + & cray_inline=True) + real(wp), dimension(3), intent(in) :: lambda real(wp), dimension(sys_size), intent(inout) :: L real(wp), intent(in) :: rho, c, dpres_ds @@ -103,11 +101,9 @@ contains !> Nonreflecting subsonic buffer CBC (Thompson 1987, pg. 13) pure subroutine s_compute_nonreflecting_subsonic_buffer_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds, dYs_ds) -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_compute_nonreflecting_subsonic_buffer_L -#else - $:GPU_ROUTINE(parallelism='[seq]') -#endif + $:GPU_ROUTINE(function_name='s_compute_nonreflecting_subsonic_buffer_L', & + & parallelism='[seq]', cray_inline=True) + real(wp), dimension(3), intent(in) :: lambda real(wp), dimension(sys_size), intent(inout) :: L real(wp), intent(in) :: rho, c @@ -133,11 +129,9 @@ contains !> Nonreflecting subsonic inflow CBC (Thompson 1990, pg. 455) pure subroutine s_compute_nonreflecting_subsonic_inflow_L(lambda, L, rho, c, dpres_ds, dvel_ds) -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_compute_nonreflecting_subsonic_inflow_L -#else - $:GPU_ROUTINE(parallelism='[seq]') -#endif + $:GPU_ROUTINE(function_name='s_compute_nonreflecting_subsonic_inflow_L', & + & parallelism='[seq]', cray_inline=True) + real(wp), dimension(3), intent(in) :: lambda real(wp), dimension(sys_size), intent(inout) :: L real(wp), intent(in) :: rho, c, dpres_ds @@ -150,11 +144,9 @@ contains !> Nonreflecting subsonic outflow CBC (Thompson 1990, pg. 454) pure subroutine s_compute_nonreflecting_subsonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds, dYs_ds) -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_compute_nonreflecting_subsonic_outflow_L -#else - $:GPU_ROUTINE(parallelism='[seq]') -#endif + $:GPU_ROUTINE(function_name='s_compute_nonreflecting_subsonic_outflow_L', & + & parallelism='[seq]', cray_inline=True) + real(wp), dimension(3), intent(in) :: lambda real(wp), dimension(sys_size), intent(inout) :: L real(wp), intent(in) :: rho, c @@ -174,11 +166,9 @@ contains !> Force-free subsonic outflow CBC (Thompson 1990, pg. 454) pure subroutine s_compute_force_free_subsonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds) -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_compute_force_free_subsonic_outflow_L -#else - $:GPU_ROUTINE(parallelism='[seq]') -#endif + $:GPU_ROUTINE(function_name='s_compute_force_free_subsonic_outflow_L', & + & parallelism='[seq]', cray_inline=True) + real(wp), dimension(3), intent(in) :: lambda real(wp), dimension(sys_size), intent(inout) :: L real(wp), intent(in) :: rho, c @@ -196,11 +186,9 @@ contains !> Constant pressure subsonic outflow CBC (Thompson 1990, pg. 455) pure subroutine s_compute_constant_pressure_subsonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds) -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_compute_constant_pressure_subsonic_outflow_L -#else - $:GPU_ROUTINE(parallelism='[seq]') -#endif + $:GPU_ROUTINE(function_name='s_compute_constant_pressure_subsonic_outflow_L', & + & parallelism='[seq]', cray_inline=True) + real(wp), dimension(3), intent(in) :: lambda real(wp), dimension(sys_size), intent(inout) :: L real(wp), intent(in) :: rho, c @@ -218,11 +206,9 @@ contains !> Supersonic inflow CBC (Thompson 1990, pg. 453) pure subroutine s_compute_supersonic_inflow_L(L) -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_compute_supersonic_inflow_L -#else - $:GPU_ROUTINE(parallelism='[seq]') -#endif + $:GPU_ROUTINE(function_name='s_compute_supersonic_inflow_L', & + & parallelism='[seq]', cray_inline=True) + real(wp), dimension(sys_size), intent(inout) :: L L(1:advxe) = 0._wp if (chemistry) L(chemxb:chemxe) = 0._wp @@ -230,11 +216,9 @@ contains !> Supersonic outflow CBC (Thompson 1990, pg. 453) pure subroutine s_compute_supersonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds, dYs_ds) -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_compute_supersonic_outflow_L -#else - $:GPU_ROUTINE(parallelism='[seq]') -#endif + $:GPU_ROUTINE(function_name='s_compute_supersonic_outflow_L', & + & parallelism='[seq]', cray_inline=True) + real(wp), dimension(3), intent(in) :: lambda real(wp), dimension(sys_size), intent(inout) :: L real(wp), intent(in) :: rho, c diff --git a/src/simulation/m_hyperelastic.fpp b/src/simulation/m_hyperelastic.fpp index 806765c981..06610a5977 100644 --- a/src/simulation/m_hyperelastic.fpp +++ b/src/simulation/m_hyperelastic.fpp @@ -55,7 +55,7 @@ contains @:ACC_SETUP_VFs(btensor) @:ALLOCATE(Gs(1:num_fluids)) - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids Gs(i) = fluid_pp(i)%G end do @@ -111,7 +111,7 @@ contains do l = 0, p do k = 0, n do j = 0, m - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids alpha_rho_k(i) = q_cons_vf(i)%sf(j, k, l) alpha_k(i) = q_cons_vf(advxb + i - 1)%sf(j, k, l) @@ -124,7 +124,7 @@ contains !if ( G <= verysmall ) G_K = 0_wp if (G > verysmall) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, tensor_size tensora(i) = 0._wp end do @@ -133,7 +133,7 @@ contains ! number for the tensor 1-3: dxix_dx, dxiy_dx, dxiz_dx ! 4-6 : dxix_dy, dxiy_dy, dxiz_dy ! 7-9 : dxix_dz, dxiy_dz, dxiz_dz - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do r = -fd_number, fd_number ! derivatives in the x-direction tensora(1) = tensora(1) + q_prim_vf(xibeg)%sf(j + r, k, l)*fd_coeff_x(r, j) @@ -167,7 +167,7 @@ contains if (tensorb(tensor_size) > verysmall) then ! STEP 2c: computing the inverse of grad_xi tensor = F ! tensorb is the adjoint, tensora becomes F - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, tensor_size - 1 tensora(i) = tensorb(i)/tensorb(tensor_size) end do @@ -198,7 +198,7 @@ contains q_prim_vf(E_idx)%sf(j, k, l) = q_prim_vf(E_idx)%sf(j, k, l) - & G*q_prim_vf(xiend + 1)%sf(j, k, l)/gamma ! STEP 5c: updating the Cauchy stress conservative scalar field - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, b_size - 1 q_cons_vf(strxb + i - 1)%sf(j, k, l) = & rho*q_prim_vf(strxb + i - 1)%sf(j, k, l) @@ -238,7 +238,7 @@ contains #:endfor ! dividing by the jacobian for neo-Hookean model ! setting the tensor to the stresses for riemann solver - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, b_size - 1 q_prim_vf(strxb + i - 1)%sf(j, k, l) = & G*btensor(i)%sf(j, k, l)/btensor(b_size)%sf(j, k, l) @@ -279,7 +279,7 @@ contains ! dividing by the jacobian for neo-Hookean model ! setting the tensor to the stresses for riemann solver - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, b_size - 1 q_prim_vf(strxb + i - 1)%sf(j, k, l) = & G*btensor(i)%sf(j, k, l)/btensor(b_size)%sf(j, k, l) diff --git a/src/simulation/m_hypoelastic.fpp b/src/simulation/m_hypoelastic.fpp index 03a15d5471..3f736b0b0b 100644 --- a/src/simulation/m_hypoelastic.fpp +++ b/src/simulation/m_hypoelastic.fpp @@ -117,7 +117,7 @@ contains do q = 0, p do l = 0, n do k = 0, m - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do r = -fd_number, fd_number du_dx(k, l, q) = du_dx(k, l, q) & + q_prim_vf(momxb)%sf(k + r, l, q)*fd_coeff_x_h(r, k) @@ -141,7 +141,7 @@ contains do q = 0, p do l = 0, n do k = 0, m - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do r = -fd_number, fd_number du_dy(k, l, q) = du_dy(k, l, q) & + q_prim_vf(momxb)%sf(k, l + r, q)*fd_coeff_y_h(r, l) @@ -171,7 +171,7 @@ contains do q = 0, p do l = 0, n do k = 0, m - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do r = -fd_number, fd_number du_dz(k, l, q) = du_dz(k, l, q) & + q_prim_vf(momxb)%sf(k, l, q + r)*fd_coeff_z_h(r, q) diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index a156efd94f..67b083584b 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -207,7 +207,7 @@ contains dyn_pres = 0._wp ! Set q_prim_vf params at GP so that mixture vars calculated properly - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do q = 1, num_fluids q_prim_vf(q)%sf(j, k, l) = alpha_rho_IP(q) q_prim_vf(advxb + q - 1)%sf(j, k, l) = alpha_IP(q) @@ -243,7 +243,7 @@ contains end if ! Set momentum - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do q = momxb, momxe q_cons_vf(q)%sf(j, k, l) = rho*vel_g(q - momxb + 1) dyn_pres = dyn_pres + q_cons_vf(q)%sf(j, k, l)* & @@ -251,7 +251,7 @@ contains end do ! Set continuity and adv vars - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do q = 1, num_fluids q_cons_vf(q)%sf(j, k, l) = alpha_rho_IP(q) q_cons_vf(advxb + q - 1)%sf(j, k, l) = alpha_IP(q) @@ -305,7 +305,7 @@ contains end if if (model_eqns == 3) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do q = intxb, intxe q_cons_vf(q)%sf(j, k, l) = alpha_IP(q - intxb + 1)*(gammas(q - intxb + 1)*pres_IP & + pi_infs(q - intxb + 1)) @@ -333,7 +333,7 @@ contains physical_loc = [x_cc(j), y_cc(k), 0._wp] end if - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do q = 1, num_fluids q_prim_vf(q)%sf(j, k, l) = alpha_rho_IP(q) q_prim_vf(advxb + q - 1)%sf(j, k, l) = alpha_IP(q) @@ -348,7 +348,7 @@ contains dyn_pres = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do q = momxb, momxe q_cons_vf(q)%sf(j, k, l) = rho*vel_g(q - momxb + 1) dyn_pres = dyn_pres + q_cons_vf(q)%sf(j, k, l)* & @@ -817,11 +817,11 @@ contains end if end if - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = i1, i2 - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do j = j1, j2 - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do k = k1, k2 coeff = gp%interp_coeffs(i - i1 + 1, j - j1 + 1, k - k1 + 1) @@ -829,13 +829,13 @@ contains pres_IP = pres_IP + coeff* & q_prim_vf(E_idx)%sf(i, j, k) - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do q = momxb, momxe vel_IP(q + 1 - momxb) = vel_IP(q + 1 - momxb) + coeff* & q_prim_vf(q)%sf(i, j, k) end do - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do l = contxb, contxe alpha_rho_IP(l) = alpha_rho_IP(l) + coeff* & q_prim_vf(l)%sf(i, j, k) @@ -848,7 +848,7 @@ contains end if if (bubbles_euler .and. .not. qbmm) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do l = 1, nb if (polytropic) then r_IP(l) = r_IP(l) + coeff*q_prim_vf(bubxb + (l - 1)*2)%sf(i, j, k) diff --git a/src/simulation/m_mhd.fpp b/src/simulation/m_mhd.fpp index e6ee464c22..8112b3af7e 100644 --- a/src/simulation/m_mhd.fpp +++ b/src/simulation/m_mhd.fpp @@ -82,16 +82,16 @@ contains do k = 0, m divB = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do r = -fd_number, fd_number divB = divB + q_prim_vf(B_idx%beg)%sf(k + r, l, q)*fd_coeff_x_h(r, k) end do - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do r = -fd_number, fd_number divB = divB + q_prim_vf(B_idx%beg + 1)%sf(k, l + r, q)*fd_coeff_y_h(r, l) end do if (p > 0) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do r = -fd_number, fd_number divB = divB + q_prim_vf(B_idx%beg + 2)%sf(k, l, q + r)*fd_coeff_z_h(r, q) end do diff --git a/src/simulation/m_pressure_relaxation.fpp b/src/simulation/m_pressure_relaxation.fpp index d3a63ad6f6..df1d22a83e 100644 --- a/src/simulation/m_pressure_relaxation.fpp +++ b/src/simulation/m_pressure_relaxation.fpp @@ -110,7 +110,7 @@ contains integer :: i s_needs_pressure_relaxation = .true. - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids if (q_cons_vf(i + advxb - 1)%sf(j, k, l) > (1._wp - sgm_eps)) then s_needs_pressure_relaxation = .false. @@ -129,7 +129,7 @@ contains integer :: i sum_alpha = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids if ((q_cons_vf(i + contxb - 1)%sf(j, k, l) < 0._wp) .or. & (q_cons_vf(i + advxb - 1)%sf(j, k, l) < 0._wp)) then @@ -142,7 +142,7 @@ contains sum_alpha = sum_alpha + q_cons_vf(i + advxb - 1)%sf(j, k, l) end do - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids q_cons_vf(i + advxb - 1)%sf(j, k, l) = q_cons_vf(i + advxb - 1)%sf(j, k, l)/sum_alpha end do @@ -164,7 +164,7 @@ contains ! Initialize pressures pres_relax = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids if (q_cons_vf(i + advxb - 1)%sf(j, k, l) > sgm_eps) then pres_K_init(i) = (q_cons_vf(i + intxb - 1)%sf(j, k, l)/ & @@ -180,7 +180,7 @@ contains ! Newton-Raphson iteration f_pres = 1e-9_wp df_pres = 1e9_wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do iter = 0, MAX_ITER - 1 if (abs(f_pres) > TOLERANCE) then pres_relax = pres_relax - f_pres/df_pres @@ -194,7 +194,7 @@ contains ! Newton-Raphson step f_pres = -1._wp df_pres = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids if (q_cons_vf(i + advxb - 1)%sf(j, k, l) > sgm_eps) then rho_K_s(i) = q_cons_vf(i + contxb - 1)%sf(j, k, l)/ & @@ -210,7 +210,7 @@ contains end do ! Update volume fractions - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids if (q_cons_vf(i + advxb - 1)%sf(j, k, l) > sgm_eps) & q_cons_vf(i + advxb - 1)%sf(j, k, l) = q_cons_vf(i + contxb - 1)%sf(j, k, l)/rho_K_s(i) @@ -230,7 +230,7 @@ contains real(wp), dimension(2) :: Re integer :: i, q - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids alpha_rho(i) = q_cons_vf(i)%sf(j, k, l) alpha(i) = q_cons_vf(E_idx + i)%sf(j, k, l) @@ -243,14 +243,14 @@ contains if (bubbles_euler) then if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids rho = rho + alpha_rho(i) gamma = gamma + alpha(i)*gammas(i) pi_inf = pi_inf + alpha(i)*pi_infs(i) end do else if ((model_eqns == 2) .and. (num_fluids > 2)) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - 1 rho = rho + alpha_rho(i) gamma = gamma + alpha(i)*gammas(i) @@ -264,7 +264,7 @@ contains else sum_alpha = 0._wp if (mpp_lim) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids alpha_rho(i) = max(0._wp, alpha_rho(i)) alpha(i) = min(max(0._wp, alpha(i)), 1._wp) @@ -273,7 +273,7 @@ contains alpha = alpha/max(sum_alpha, sgm_eps) end if - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids rho = rho + alpha_rho(i) gamma = gamma + alpha(i)*gammas(i) @@ -281,11 +281,11 @@ contains end do if (viscous) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, 2 Re(i) = dflt_real if (Re_size(i) > 0) Re(i) = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do q = 1, Re_size(i) Re(i) = alpha(Re_idx(i, q))/Res(i, q) + Re(i) end do @@ -296,7 +296,7 @@ contains ! Compute dynamic pressure and update internal energies dyn_pres = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = momxb, momxe dyn_pres = dyn_pres + 5e-1_wp*q_cons_vf(i)%sf(j, k, l)* & q_cons_vf(i)%sf(j, k, l)/max(rho, sgm_eps) @@ -304,7 +304,7 @@ contains pres_relax = (q_cons_vf(E_idx)%sf(j, k, l) - dyn_pres - pi_inf)/gamma - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids q_cons_vf(i + intxb - 1)%sf(j, k, l) = & q_cons_vf(i + advxb - 1)%sf(j, k, l)*(gammas(i)*pres_relax + pi_infs(i)) diff --git a/src/simulation/m_qbmm.fpp b/src/simulation/m_qbmm.fpp index 1311afc5d6..40301942b6 100644 --- a/src/simulation/m_qbmm.fpp +++ b/src/simulation/m_qbmm.fpp @@ -544,7 +544,7 @@ contains do i = 0, m rhs_vf(alf_idx)%sf(i, q, l) = rhs_vf(alf_idx)%sf(i, q, l) + mom_sp(2)%sf(i, q, l) j = bubxb - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do k = 1, nb rhs_vf(j)%sf(i, q, l) = rhs_vf(j)%sf(i, q, l) + mom_3d(0, 0, k)%sf(i, q, l) rhs_vf(j + 1)%sf(i, q, l) = rhs_vf(j + 1)%sf(i, q, l) + mom_3d(1, 0, k)%sf(i, q, l) @@ -563,11 +563,9 @@ contains !Coefficient array for non-polytropic model (pb and mv values are accounted in wght_pb and wght_mv) pure subroutine s_coeff_nonpoly(pres, rho, c, coeffs) -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_coeff_nonpoly -#else - $:GPU_ROUTINE(parallelism='[seq]') -#endif + $:GPU_ROUTINE(function_name='s_coeff_nonpoly',parallelism='[seq]', & + & cray_inline=True) + real(wp), intent(in) :: pres, rho, c real(wp), dimension(nterms, 0:2, 0:2), intent(out) :: coeffs @@ -636,11 +634,9 @@ contains !Coefficient array for polytropic model (pb for each R0 bin accounted for in wght_pb) pure subroutine s_coeff(pres, rho, c, coeffs) -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_coeff -#else - $:GPU_ROUTINE(parallelism='[seq]') -#endif + $:GPU_ROUTINE(function_name='s_coeff',parallelism='[seq]', & + & cray_inline=True) + real(wp), intent(in) :: pres, rho, c real(wp), dimension(nterms, 0:2, 0:2), intent(out) :: coeffs @@ -740,10 +736,10 @@ contains if (alf > small_alf) then nbub = q_cons_vf(bubxb)%sf(id1, id2, id3) - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do q = 1, nb ! Gather moments for this bubble bin - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do r = 2, nmom moms(r) = q_prim_vf(bubmoms(q, r))%sf(id1, id2, id3) end do @@ -751,12 +747,12 @@ contains call s_chyqmom(moms, wght(:, q), abscX(:, q), abscY(:, q)) if (polytropic) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do j = 1, nnode wght_pb(j, q) = wght(j, q)*(pb0(q) - pv) end do else - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do j = 1, nnode chi_vw = 1._wp/(1._wp + R_v/R_n*(pb(id1, id2, id3, j, q)/pv - 1._wp)) x_vw = M_n*chi_vw/(M_v + (M_n - M_v)*chi_vw) @@ -775,13 +771,13 @@ contains ! Compute change in moments due to bubble dynamics r = 1 - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i2 = 0, 2 - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i1 = 0, 2 if ((i1 + i2) <= 2) then momsum = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do j = 1, nterms select case (bubble_model) case (3) @@ -811,7 +807,7 @@ contains ! Compute change in pb and mv for non-polytropic model if (.not. polytropic) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do j = 1, nnode drdt = msum(2) drdt2 = merge(-1._wp, 1._wp, j == 1 .or. j == 2)/(2._wp*sqrt(merge(moms(4) - moms(2)**2._wp, verysmall, moms(4) - moms(2)**2._wp > 0._wp))) @@ -839,11 +835,11 @@ contains end if end if else - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do q = 1, nb - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i1 = 0, 2 - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i2 = 0, 2 moms3d(i1, i2, q)%sf(id1, id2, id3) = 0._wp end do @@ -861,11 +857,9 @@ contains contains ! Helper to select the correct coefficient routine subroutine s_coeff_selector(pres, rho, c, coeff, polytropic) -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_chyqmom -#else - $:GPU_ROUTINE(parallelism='[seq]') -#endif + $:GPU_ROUTINE(function_name='s_coeff_selector',parallelism='[seq]', & + & cray_inline=True) + real(wp), intent(in) :: pres, rho, c real(wp), dimension(nterms, 0:2, 0:2), intent(out) :: coeff logical, intent(in) :: polytropic @@ -877,11 +871,9 @@ contains end subroutine s_coeff_selector pure subroutine s_chyqmom(momin, wght, abscX, abscY) -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_chyqmom -#else - $:GPU_ROUTINE(parallelism='[seq]') -#endif + $:GPU_ROUTINE(function_name='s_chyqmom',parallelism='[seq]', & + & cray_inline=True) + real(wp), dimension(nmom), intent(in) :: momin real(wp), dimension(nnode), intent(inout) :: wght, abscX, abscY @@ -937,11 +929,9 @@ contains end subroutine s_chyqmom pure subroutine s_hyqmom(frho, fup, fmom) -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_hyqmom -#else - $:GPU_ROUTINE(parallelism='[seq]') -#endif + $:GPU_ROUTINE(function_name='s_hyqmom',parallelism='[seq]', & + & cray_inline=True) + real(wp), dimension(2), intent(inout) :: frho, fup real(wp), dimension(3), intent(in) :: fmom diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index 8e7ef5f628..acdfa769c3 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -626,11 +626,11 @@ contains do k = idwbuff(2)%beg, idwbuff(2)%end do j = idwbuff(1)%beg, idwbuff(1)%end alf_sum%sf(j, k, l) = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = advxb, advxe - 1 alf_sum%sf(j, k, l) = alf_sum%sf(j, k, l) + q_cons_qp%vf(i)%sf(j, k, l) end do - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = advxb, advxe - 1 q_cons_qp%vf(i)%sf(j, k, l) = q_cons_qp%vf(i)%sf(j, k, l)*(1._wp - q_cons_qp%vf(alf_idx)%sf(j, k, l)) & /alf_sum%sf(j, k, l) @@ -1429,7 +1429,7 @@ contains do l = 0, p do k = 0, n do j = 0, m - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = momxb, E_idx rhs_vf(i)%sf(j, k, l) = & rhs_vf(i)%sf(j, k, l) + 1._wp/dx(j)* & @@ -1478,7 +1478,7 @@ contains $:GPU_PARALLEL_LOOP(collapse=2) do l = 0, p do j = 0, m - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = momxb, E_idx rhs_vf(i)%sf(j, 0, l) = & rhs_vf(i)%sf(j, 0, l) + 1._wp/(y_cc(1) - y_cc(-1))* & @@ -1494,7 +1494,7 @@ contains do l = 0, p do k = 1, n do j = 0, m - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = momxb, E_idx rhs_vf(i)%sf(j, k, l) = & rhs_vf(i)%sf(j, k, l) + 1._wp/dy(k)* & @@ -1510,7 +1510,7 @@ contains do l = 0, p do k = 0, n do j = 0, m - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = momxb, E_idx rhs_vf(i)%sf(j, k, l) = & rhs_vf(i)%sf(j, k, l) + 1._wp/dy(k)* & @@ -1531,7 +1531,7 @@ contains do l = 0, p do k = 1, n do j = 0, m - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = momxb, E_idx rhs_vf(i)%sf(j, k, l) = & rhs_vf(i)%sf(j, k, l) - 5e-1_wp/y_cc(k)* & @@ -1546,7 +1546,7 @@ contains $:GPU_PARALLEL_LOOP(collapse=2) do l = 0, p do j = 0, m - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = momxb, E_idx rhs_vf(i)%sf(j, 0, l) = & rhs_vf(i)%sf(j, 0, l) - 1._wp/y_cc(0)* & @@ -1561,7 +1561,7 @@ contains do l = 0, p do k = 0, n do j = 0, m - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = momxb, E_idx rhs_vf(i)%sf(j, k, l) = & rhs_vf(i)%sf(j, k, l) - 5e-1_wp/y_cc(k)* & @@ -1596,7 +1596,7 @@ contains do l = 0, p do k = 0, n do j = 0, m - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = momxb, E_idx rhs_vf(i)%sf(j, k, l) = & rhs_vf(i)%sf(j, k, l) + 1._wp/dz(l)* & diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp index eec1e9fafd..3a46f629d2 100644 --- a/src/simulation/m_riemann_solvers.fpp +++ b/src/simulation/m_riemann_solvers.fpp @@ -367,13 +367,13 @@ contains do l = is3%beg, is3%end do k = is2%beg, is2%end do j = is1%beg, is1%end - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, contxe alpha_rho_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, i) alpha_rho_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) end do - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_vels vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) @@ -381,13 +381,13 @@ contains vel_L_rms = 0._wp; vel_R_rms = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_vels vel_L_rms = vel_L_rms + vel_L(i)**2._wp vel_R_rms = vel_R_rms + vel_R(i)**2._wp end do - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) @@ -431,7 +431,7 @@ contains pres_mag%R = 0._wp if (mpp_lim) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids alpha_rho_L(i) = max(0._wp, alpha_rho_L(i)) alpha_L(i) = min(max(0._wp, alpha_L(i)), 1._wp) @@ -440,7 +440,7 @@ contains alpha_L = alpha_L/max(alpha_L_sum, sgm_eps) - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids alpha_rho_R(i) = max(0._wp, alpha_rho_R(i)) alpha_R(i) = min(max(0._wp, alpha_R(i)), 1._wp) @@ -450,7 +450,7 @@ contains alpha_R = alpha_R/max(alpha_R_sum, sgm_eps) end if - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids rho_L = rho_L + alpha_rho_L(i) gamma_L = gamma_L + alpha_L(i)*gammas(i) @@ -464,13 +464,13 @@ contains end do if (viscous) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, 2 Re_L(i) = dflt_real if (Re_size(i) > 0) Re_L(i) = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do q = 1, Re_size(i) Re_L(i) = alpha_L(Re_idx(i, q))/Res(i, q) & + Re_L(i) @@ -480,13 +480,13 @@ contains end do - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, 2 Re_R(i) = dflt_real if (Re_size(i) > 0) Re_R(i) = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do q = 1, Re_size(i) Re_R(i) = alpha_R(Re_idx(i, q))/Res(i, q) & + Re_R(i) @@ -497,7 +497,7 @@ contains end if if (chemistry) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = chemxb, chemxe Ys_L(i - chemxb + 1) = qL_prim_rs${XYZ}$_vf(j, k, l, i) Ys_R(i - chemxb + 1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) @@ -594,7 +594,7 @@ contains if (hypoelasticity) then G_L = 0._wp; G_R = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids G_L = G_L + alpha_L(i)*Gs(i) G_R = G_R + alpha_R(i)*Gs(i) @@ -627,7 +627,7 @@ contains ! G_L = 0._wp ! G_R = 0._wp ! - ! $:GPU_LOOP() + ! $:GPU_LOOP(parallelism='[seq]') ! do i = 1, num_fluids ! G_L = G_L + alpha_L(i)*Gs(i) ! G_R = G_R + alpha_R(i)*Gs(i) @@ -636,17 +636,17 @@ contains ! if ((G_L > 1e-3_wp) .and. (G_R > 1e-3_wp)) then ! E_L = E_L + G_L*qL_prim_rs${XYZ}$_vf(j, k, l, xiend + 1) ! E_R = E_R + G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, xiend + 1) - ! $:GPU_LOOP() + ! $:GPU_LOOP(parallelism='[seq]') ! do i = 1, b_size-1 ! tau_e_L(i) = G_L*qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) ! tau_e_R(i) = G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) ! end do - ! $:GPU_LOOP() + ! $:GPU_LOOP(parallelism='[seq]') ! do i = 1, b_size-1 ! tau_e_L(i) = 0._wp ! tau_e_R(i) = 0._wp ! end do - ! $:GPU_LOOP() + ! $:GPU_LOOP(parallelism='[seq]') ! do i = 1, num_dims ! xi_field_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) ! xi_field_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, xibeg - 1 + i) @@ -674,7 +674,7 @@ contains end if if (viscous) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, 2 Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) end do @@ -753,7 +753,7 @@ contains ! Mass if (.not. relativity) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, contxe flux_rs${XYZ}$_vf(j, k, l, i) = & (s_M*alpha_rho_R(i)*vel_R(norm_dir) & @@ -763,7 +763,7 @@ contains /(s_M - s_P) end do elseif (relativity) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, contxe flux_rs${XYZ}$_vf(j, k, l, i) = & (s_M*Ga%R*alpha_rho_R(i)*vel_R(norm_dir) & @@ -844,7 +844,7 @@ contains + s_M*s_P*(cm%L(3) - cm%R(3))) & /(s_M - s_P) elseif (bubbles_euler) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_vels flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & (s_M*(rho_R*vel_R(dir_idx(1)) & @@ -859,7 +859,7 @@ contains + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R(dir_idx(i)) - vel_L(dir_idx(i))) end do else if (hypoelasticity) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_vels flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & (s_M*(rho_R*vel_R(dir_idx(1)) & @@ -875,7 +875,7 @@ contains /(s_M - s_P) end do else - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_vels flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & (s_M*(rho_R*vel_R(dir_idx(1)) & @@ -971,7 +971,7 @@ contains end if ! Advection - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = advxb, advxe flux_rs${XYZ}$_vf(j, k, l, i) = & (qL_prim_rs${XYZ}$_vf(j, k, l, i) & @@ -996,7 +996,7 @@ contains !end if ! Div(U)? - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_vels vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(i)) = & (xi_M*(rho_L*vel_L(dir_idx(i))* & @@ -1017,7 +1017,7 @@ contains end if if (chemistry) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = chemxb, chemxe Y_L = qL_prim_rs${XYZ}$_vf(j, k, l, i) Y_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) @@ -1067,7 +1067,7 @@ contains #:if (NORM_DIR == 2) if (cyl_coord) then !Substituting the advective flux into the inviscid geometrical source flux - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, E_idx flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) end do @@ -1076,7 +1076,7 @@ contains flux_rs${XYZ}$_vf(j, k, l, contxe + 2) & - (s_M*pres_R - s_P*pres_L)/(s_M - s_P) ! Geometrical source of the void fraction(s) is zero - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = advxb, advxe flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) end do @@ -1089,7 +1089,7 @@ contains (s_M*tau_e_R(4) - s_P*tau_e_L(4)) & /(s_M - s_P) - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = strxb, strxe flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) end do @@ -1301,7 +1301,7 @@ contains vel_L_rms = 0._wp; vel_R_rms = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) @@ -1326,32 +1326,32 @@ contains alpha_R_sum = 0._wp if (mpp_lim) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids qL_prim_rs${XYZ}$_vf(j, k, l, i) = max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, i)) qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = min(max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)), 1._wp) alpha_L_sum = alpha_L_sum + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) end do - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)/max(alpha_L_sum, sgm_eps) end do - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) = max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)) qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = min(max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)), 1._wp) alpha_R_sum = alpha_R_sum + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) end do - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)/max(alpha_R_sum, sgm_eps) end do end if - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) @@ -1368,13 +1368,13 @@ contains end do if (viscous) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, 2 Re_L(i) = dflt_real if (Re_size(i) > 0) Re_L(i) = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do q = 1, Re_size(i) Re_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q))/Res(i, q) & + Re_L(i) @@ -1384,13 +1384,13 @@ contains end do - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, 2 Re_R(i) = dflt_real if (Re_size(i) > 0) Re_R(i) = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do q = 1, Re_size(i) Re_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q))/Res(i, q) & + Re_R(i) @@ -1406,18 +1406,18 @@ contains ! ENERGY ADJUSTMENTS FOR HYPOELASTIC ENERGY if (hypoelasticity) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, strxe - strxb + 1 tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) end do G_L = 0._wp; G_R = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids G_L = G_L + alpha_L(i)*Gs(i) G_R = G_R + alpha_R(i)*Gs(i) end do - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, strxe - strxb + 1 ! Elastic contribution to energy if G large enough if ((G_L > verysmall) .and. (G_R > verysmall)) then @@ -1434,13 +1434,13 @@ contains ! ENERGY ADJUSTMENTS FOR HYPERELASTIC ENERGY if (hyperelasticity) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims xi_field_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) xi_field_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, xibeg - 1 + i) end do G_L = 0._wp; G_R = 0._wp; - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids ! Mixture left and right shear modulus G_L = G_L + alpha_L(i)*Gs(i) @@ -1451,7 +1451,7 @@ contains E_L = E_L + G_L*qL_prim_rs${XYZ}$_vf(j, k, l, xiend + 1) E_R = E_R + G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, xiend + 1) end if - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, b_size - 1 tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) @@ -1475,7 +1475,7 @@ contains vel_avg_rms, 0._wp, c_avg) if (viscous) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, 2 Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) end do @@ -1570,7 +1570,7 @@ contains ! COMPUTING FLUXES ! MASS FLUX. - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, contxe flux_rs${XYZ}$_vf(j, k, l, i) = & xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i)*(vel_L(idx1) + s_M*(xi_L - 1._wp)) + & @@ -1579,7 +1579,7 @@ contains ! MOMENTUM FLUX. ! f = \rho u u - \sigma, q = \rho u, q_star = \xi * \rho*(s_star, v, w) - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims idxi = dir_idx(i) flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) = rho_Star*vel_K_Star* & @@ -1595,7 +1595,7 @@ contains ! ELASTICITY. Elastic shear stress additions for the momentum and energy flux if (elasticity) then flux_ene_e = 0._wp; - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims idxi = dir_idx(i) ! MOMENTUM ELASTIC FLUX. @@ -1613,7 +1613,7 @@ contains end if ! VOLUME FRACTION FLUX. - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = advxb, advxe flux_rs${XYZ}$_vf(j, k, l, i) = & xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i)*s_S + & @@ -1621,7 +1621,7 @@ contains end do ! SOURCE TERM FOR VOLUME FRACTION ADVECTION FLUX. - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims idxi = dir_idx(i) vel_src_rs${XYZ}$_vf(j, k, l, idxi) = & @@ -1631,7 +1631,7 @@ contains ! INTERNAL ENERGIES ADVECTION FLUX. ! K-th pressure and velocity in preparation for the internal energy flux - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids p_K_Star = xi_M*(xi_MP*((pres_L + pi_infs(i)/(1._wp + gammas(i)))* & xi_L**(1._wp/gammas(i) + 1._wp) - pi_infs(i)/(1._wp + gammas(i)) - pres_L) + pres_L) + & @@ -1650,7 +1650,7 @@ contains ! HYPOELASTIC STRESS EVOLUTION FLUX. if (hypoelasticity) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, strxe - strxb + 1 flux_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) = & xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*tau_e_L(i) - rho_L*vel_L(idx1)*tau_e_L(i)) + & @@ -1660,7 +1660,7 @@ contains ! REFERENCE MAP FLUX. if (hyperelasticity) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims flux_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) = & xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*xi_field_L(i) & @@ -1681,11 +1681,11 @@ contains #:if (NORM_DIR == 2) if (cyl_coord) then !Substituting the advective flux into the inviscid geometrical source flux - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, E_idx flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) end do - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = intxb, intxe flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) end do @@ -1693,7 +1693,7 @@ contains flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb - 1 + dir_idx(1)) = & flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb - 1 + dir_idx(1)) - p_Star ! Geometrical source of the void fraction(s) is zero - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = advxb, advxe flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp end do @@ -1701,7 +1701,7 @@ contains #:endif #:if (NORM_DIR == 3) if (grid_geometry == 3) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, sys_size flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp end do @@ -1726,26 +1726,26 @@ contains do k = is2%beg, is2%end do j = is1%beg, is1%end - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, contxe alpha_rho_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, i) alpha_rho_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) end do - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) end do vel_L_rms = 0._wp; vel_R_rms = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims vel_L_rms = vel_L_rms + vel_L(i)**2._wp vel_R_rms = vel_R_rms + vel_R(i)**2._wp end do - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) @@ -1758,7 +1758,7 @@ contains gamma_L = 0._wp pi_inf_L = 0._wp qv_L = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids rho_L = rho_L + alpha_rho_L(i) gamma_L = gamma_L + alpha_L(i)*gammas(i) @@ -1770,7 +1770,7 @@ contains gamma_R = 0._wp pi_inf_R = 0._wp qv_R = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids rho_R = rho_R + alpha_rho_R(i) gamma_R = gamma_R + alpha_R(i)*gammas(i) @@ -1845,7 +1845,7 @@ contains xi_M = (5e-1_wp + sign(5e-1_wp, s_S)) xi_P = (5e-1_wp - sign(5e-1_wp, s_S)) - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, contxe flux_rs${XYZ}$_vf(j, k, l, i) = & xi_M*alpha_rho_L(i) & @@ -1856,7 +1856,7 @@ contains ! Momentum flux. ! f = \rho u u + p I, q = \rho u, q_star = \xi * \rho*(s_star, v, w) - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & xi_M*(rho_L*(vel_L(dir_idx(1))* & @@ -1875,7 +1875,7 @@ contains if (bubbles_euler) then ! Put p_tilde in - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) + & @@ -1886,7 +1886,7 @@ contains flux_rs${XYZ}$_vf(j, k, l, E_idx) = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = alf_idx, alf_idx !only advect the void fraction flux_rs${XYZ}$_vf(j, k, l, i) = & xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & @@ -1896,7 +1896,7 @@ contains end do ! Source for volume fraction advection equation - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(i)) = 0._wp @@ -1907,7 +1907,7 @@ contains ! Add advection flux for bubble variables if (bubbles_euler) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = bubxb, bubxe flux_rs${XYZ}$_vf(j, k, l, i) = & xi_M*nbub_L*qL_prim_rs${XYZ}$_vf(j, k, l, i) & @@ -1922,7 +1922,7 @@ contains #:if (NORM_DIR == 2) if (cyl_coord) then ! Substituting the advective flux into the inviscid geometrical source flux - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, E_idx flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) end do @@ -1939,7 +1939,7 @@ contains (1._wp - dir_flg(dir_idx(1)))* & vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) ! Geometrical source of the void fraction(s) is zero - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = advxb, advxe flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp end do @@ -1947,7 +1947,7 @@ contains #:endif #:if (NORM_DIR == 3) if (grid_geometry == 3) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, sys_size flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp end do @@ -1980,7 +1980,7 @@ contains do k = is2%beg, is2%end do j = is1%beg, is1%end - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) @@ -1988,7 +1988,7 @@ contains vel_L_rms = 0._wp; vel_R_rms = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) @@ -2006,7 +2006,7 @@ contains ! Retain this in the refactor if (mpp_lim .and. (num_fluids > 2)) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) @@ -2014,7 +2014,7 @@ contains qv_L = qv_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)*qvs(i) end do else if (num_fluids > 2) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - 1 rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) @@ -2034,7 +2034,7 @@ contains qv_R = 0._wp if (mpp_lim .and. (num_fluids > 2)) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*gammas(i) @@ -2042,7 +2042,7 @@ contains qv_R = qv_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*qvs(i) end do else if (num_fluids > 2) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - 1 rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*gammas(i) @@ -2058,13 +2058,13 @@ contains if (viscous) then if (num_fluids == 1) then ! Need to consider case with num_fluids >= 2 - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, 2 Re_L(i) = dflt_real if (Re_size(i) > 0) Re_L(i) = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do q = 1, Re_size(i) Re_L(i) = (1._wp - qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q)))/Res(i, q) & + Re_L(i) @@ -2074,13 +2074,13 @@ contains end do - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, 2 Re_R(i) = dflt_real if (Re_size(i) > 0) Re_R(i) = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do q = 1, Re_size(i) Re_R(i) = (1._wp - qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q)))/Res(i, q) & + Re_R(i) @@ -2099,7 +2099,7 @@ contains H_R = (E_R + pres_R)/rho_R if (avg_state == 2) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, nb R0_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, rs(i)) R0_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, rs(i)) @@ -2119,7 +2119,7 @@ contains else nbub_L_denom = 0._wp nbub_R_denom = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, nb nbub_L_denom = nbub_L_denom + (R0_L(i)**3._wp)*weight(i) nbub_R_denom = nbub_R_denom + (R0_R(i)**3._wp)*weight(i) @@ -2133,7 +2133,7 @@ contains nbub_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, bubxb) end if - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, nb if (.not. qbmm) then if (polytropic) then @@ -2166,7 +2166,7 @@ contains R3V2Lbar = 0._wp R3V2Rbar = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, nb PbwR3Lbar = PbwR3Lbar + pbw_L(i)*(R0_L(i)**3._wp)*weight(i) PbwR3Rbar = PbwR3Rbar + pbw_R(i)*(R0_R(i)**3._wp)*weight(i) @@ -2201,7 +2201,7 @@ contains gamma_avg = 5e-1_wp*(gamma_L + gamma_R) vel_avg_rms = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims vel_avg_rms = vel_avg_rms + (5e-1_wp*(vel_L(i) + vel_R(i)))**2._wp end do @@ -2220,7 +2220,7 @@ contains vel_avg_rms, 0._wp, c_avg) if (viscous) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, 2 Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) end do @@ -2284,7 +2284,7 @@ contains pcorr = 0._wp end if - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, contxe flux_rs${XYZ}$_vf(j, k, l, i) = & xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & @@ -2303,7 +2303,7 @@ contains ! Include p_tilde - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & xi_M*(rho_L*(vel_L(dir_idx(1))* & @@ -2335,7 +2335,7 @@ contains + (s_M/s_L)*(s_P/s_R)*pcorr*s_S ! Volume fraction flux - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = advxb, advxe flux_rs${XYZ}$_vf(j, k, l, i) = & xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & @@ -2345,7 +2345,7 @@ contains end do ! Source for volume fraction advection equation - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(i)) = & xi_M*(vel_L(dir_idx(i)) + & @@ -2361,7 +2361,7 @@ contains flux_src_rs${XYZ}$_vf(j, k, l, advxb) = vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(1)) ! Add advection flux for bubble variables - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = bubxb, bubxe flux_rs${XYZ}$_vf(j, k, l, i) = & xi_M*nbub_L*qL_prim_rs${XYZ}$_vf(j, k, l, i) & @@ -2390,7 +2390,7 @@ contains #:if (NORM_DIR == 2) if (cyl_coord) then ! Substituting the advective flux into the inviscid geometrical source flux - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, E_idx flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) end do @@ -2407,7 +2407,7 @@ contains (1._wp - dir_flg(dir_idx(1)))* & vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) ! Geometrical source of the void fraction(s) is zero - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = advxb, advxe flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp end do @@ -2415,7 +2415,7 @@ contains #:endif #:if (NORM_DIR == 3) if (grid_geometry == 3) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, sys_size flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp end do @@ -2455,14 +2455,14 @@ contains !idx1 = 1; if (dir_idx(1) == 2) idx1 = 2; if (dir_idx(1) == 3) idx1 = 3 - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) end do vel_L_rms = 0._wp; vel_R_rms = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) @@ -2489,32 +2489,32 @@ contains ! Change this by splitting it into the cases ! present in the bubbles_euler if (mpp_lim) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids qL_prim_rs${XYZ}$_vf(j, k, l, i) = max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, i)) qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = min(max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)), 1._wp) alpha_L_sum = alpha_L_sum + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) end do - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)/max(alpha_L_sum, sgm_eps) end do - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) = max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)) qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = min(max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)), 1._wp) alpha_R_sum = alpha_R_sum + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) end do - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)/max(alpha_R_sum, sgm_eps) end do end if - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) @@ -2528,13 +2528,13 @@ contains end do if (viscous) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, 2 Re_L(i) = dflt_real if (Re_size(i) > 0) Re_L(i) = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do q = 1, Re_size(i) Re_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q))/Res(i, q) & + Re_L(i) @@ -2544,13 +2544,13 @@ contains end do - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, 2 Re_R(i) = dflt_real if (Re_size(i) > 0) Re_R(i) = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do q = 1, Re_size(i) Re_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q))/Res(i, q) & + Re_R(i) @@ -2562,7 +2562,7 @@ contains if (chemistry) then c_sum_Yi_Phi = 0.0_wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = chemxb, chemxe Ys_L(i - chemxb + 1) = qL_prim_rs${XYZ}$_vf(j, k, l, i) Ys_R(i - chemxb + 1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) @@ -2621,19 +2621,19 @@ contains ! ENERGY ADJUSTMENTS FOR HYPOELASTIC ENERGY if (hypoelasticity) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, strxe - strxb + 1 tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) end do G_L = 0._wp G_R = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids G_L = G_L + alpha_L(i)*Gs(i) G_R = G_R + alpha_R(i)*Gs(i) end do - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, strxe - strxb + 1 ! Elastic contribution to energy if G large enough if ((G_L > verysmall) .and. (G_R > verysmall)) then @@ -2650,14 +2650,14 @@ contains ! ENERGY ADJUSTMENTS FOR HYPERELASTIC ENERGY if (hyperelasticity) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims xi_field_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) xi_field_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, xibeg - 1 + i) end do G_L = 0._wp G_R = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids ! Mixture left and right shear modulus G_L = G_L + alpha_L(i)*Gs(i) @@ -2668,7 +2668,7 @@ contains E_L = E_L + G_L*qL_prim_rs${XYZ}$_vf(j, k, l, xiend + 1) E_R = E_R + G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, xiend + 1) end if - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, b_size - 1 tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) @@ -2692,7 +2692,7 @@ contains vel_avg_rms, c_sum_Yi_Phi, c_avg) if (viscous) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, 2 Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) end do @@ -2768,7 +2768,7 @@ contains ! COMPUTING THE HLLC FLUXES ! MASS FLUX. - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, contxe flux_rs${XYZ}$_vf(j, k, l, i) = & xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & @@ -2779,7 +2779,7 @@ contains ! MOMENTUM FLUX. ! f = \rho u u - \sigma, q = \rho u, q_star = \xi * \rho*(s_star, v, w) - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims idxi = dir_idx(i) flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) = & @@ -2814,7 +2814,7 @@ contains ! ELASTICITY. Elastic shear stress additions for the momentum and energy flux if (elasticity) then flux_ene_e = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims idxi = dir_idx(i) ! MOMENTUM ELASTIC FLUX. @@ -2833,7 +2833,7 @@ contains ! HYPOELASTIC STRESS EVOLUTION FLUX. if (hypoelasticity) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, strxe - strxb + 1 flux_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) = & xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*tau_e_L(i) - rho_L*vel_L(idx1)*tau_e_L(i)) + & @@ -2842,7 +2842,7 @@ contains end if ! VOLUME FRACTION FLUX. - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = advxb, advxe flux_rs${XYZ}$_vf(j, k, l, i) = & xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & @@ -2852,7 +2852,7 @@ contains end do ! VOLUME FRACTION SOURCE FLUX. - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims idxi = dir_idx(i) vel_src_rs${XYZ}$_vf(j, k, l, idxi) = & @@ -2875,7 +2875,7 @@ contains ! REFERENCE MAP FLUX. if (hyperelasticity) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims flux_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) = & xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*xi_field_L(i) & @@ -2888,7 +2888,7 @@ contains flux_src_rs${XYZ}$_vf(j, k, l, advxb) = vel_src_rs${XYZ}$_vf(j, k, l, idx1) if (chemistry) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = chemxb, chemxe Y_L = qL_prim_rs${XYZ}$_vf(j, k, l, i) Y_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) @@ -2903,7 +2903,7 @@ contains #:if (NORM_DIR == 2) if (cyl_coord) then !Substituting the advective flux into the inviscid geometrical source flux - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, E_idx flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) end do @@ -2920,7 +2920,7 @@ contains (1._wp - dir_flg(idx1))* & vel_R(idx1)) - vel_R(idx1)))) ! Geometrical source of the void fraction(s) is zero - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = advxb, advxe flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp end do @@ -2928,7 +2928,7 @@ contains #:endif #:if (NORM_DIR == 3) if (grid_geometry == 3) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, sys_size flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp end do @@ -3119,7 +3119,7 @@ contains ! Sum properties of all fluid components rho%L = 0._wp; gamma%L = 0._wp; pi_inf%L = 0._wp; qv%L = 0._wp rho%R = 0._wp; gamma%R = 0._wp; pi_inf%R = 0._wp; qv%R = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids rho%L = rho%L + alpha_rho_L(i) gamma%L = gamma%L + alpha_L(i)*gammas(i) @@ -3289,7 +3289,7 @@ contains ! Energy flux_rs${XYZ}$_vf(j, k, l, E_idx) = F_hlld(7) ! Partial fraction - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = advxb, advxe flux_rs${XYZ}$_vf(j, k, l, i) = 0._wp ! TODO multi-component (zero for now) end do @@ -3996,7 +3996,7 @@ contains ! Average velocities and their derivatives at the interface ! For cylindrical: x-dir ~ axial (z_cyl), y-dir ~ radial (r_cyl), z-dir ~ azimuthal (theta_cyl) - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i_vel = 1, num_dims avg_v_int(i_vel) = 0.5_wp*(velL_vf(i_vel)%sf(j, k, l) + velR_vf(i_vel)%sf(idx_rp(1), idx_rp(2), idx_rp(3))) @@ -4074,7 +4074,7 @@ contains end if end select - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i_vel = 1, num_dims flux_src_vf(momxb + i_vel - 1)%sf(j, k, l) = flux_src_vf(momxb + i_vel - 1)%sf(j, k, l) - stress_vector_shear(i_vel) flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - vel_src_int(i_vel)*stress_vector_shear(i_vel) diff --git a/src/simulation/m_sim_helpers.fpp b/src/simulation/m_sim_helpers.fpp index e8efc4be29..b89c3a9c1d 100644 --- a/src/simulation/m_sim_helpers.fpp +++ b/src/simulation/m_sim_helpers.fpp @@ -31,11 +31,8 @@ contains !! @param k y index !! @param l z index pure subroutine s_compute_enthalpy(q_prim_vf, pres, rho, gamma, pi_inf, Re, H, alpha, vel, vel_sum, j, k, l) -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_compute_enthalpy -#else - $:GPU_ROUTINE(parallelism='[seq]') -#endif + $:GPU_ROUTINE(function_name='s_compute_enthalpy',parallelism='[seq]', & + & cray_inline=True) type(scalar_field), intent(in), dimension(sys_size) :: q_prim_vf real(wp), intent(inout), dimension(num_fluids) :: alpha @@ -49,7 +46,7 @@ contains integer :: i - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids alpha_rho(i) = q_prim_vf(i)%sf(j, k, l) alpha(i) = q_prim_vf(E_idx + i)%sf(j, k, l) @@ -64,13 +61,13 @@ contains call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv, alpha, alpha_rho, Re) end if - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_vels vel(i) = q_prim_vf(contxe + i)%sf(j, k, l) end do vel_sum = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_vels vel_sum = vel_sum + vel(i)**2._wp end do diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp index 16420ab3de..c1a438c355 100644 --- a/src/simulation/m_viscous.fpp +++ b/src/simulation/m_viscous.fpp @@ -81,7 +81,7 @@ contains do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = momxb, E_idx tau_Re_vf(i)%sf(j, k, l) = 0._wp end do @@ -95,7 +95,7 @@ contains do k = -1, 1 do j = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l) if (bubbles_euler .and. num_fluids == 1) then @@ -111,14 +111,14 @@ contains pi_inf_visc = 0._wp if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) end do else if ((model_eqns == 2) .and. (num_fluids > 2)) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - 1 rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) @@ -137,7 +137,7 @@ contains alpha_visc_sum = 0._wp if (mpp_lim) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i)) alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp) @@ -148,7 +148,7 @@ contains end if - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) @@ -156,12 +156,12 @@ contains end do if (viscous) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, 2 Re_visc(i) = dflt_real if (Re_size(i) > 0) Re_visc(i) = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do q = 1, Re_size(i) Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) & + Re_visc(i) @@ -181,7 +181,7 @@ contains - 2._wp*grad_x_vf(1)%sf(j, k, l) & - 2._wp*q_prim_vf(momxb + 1)%sf(j, k, l)/y_cc(k))/ & (3._wp*Re_visc(1)) - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, 2 tau_Re_vf(contxe + i)%sf(j, k, l) = & tau_Re_vf(contxe + i)%sf(j, k, l) - & @@ -203,7 +203,7 @@ contains do k = -1, 1 do j = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l) if (bubbles_euler .and. num_fluids == 1) then @@ -219,14 +219,14 @@ contains pi_inf_visc = 0._wp if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) end do else if ((model_eqns == 2) .and. (num_fluids > 2)) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - 1 rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) @@ -245,7 +245,7 @@ contains alpha_visc_sum = 0._wp if (mpp_lim) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i)) alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp) @@ -256,7 +256,7 @@ contains end if - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) @@ -264,12 +264,12 @@ contains end do if (viscous) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, 2 Re_visc(i) = dflt_real if (Re_size(i) > 0) Re_visc(i) = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do q = 1, Re_size(i) Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) & + Re_visc(i) @@ -308,7 +308,7 @@ contains do k = -1, 1 do j = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l) if (bubbles_euler .and. num_fluids == 1) then @@ -324,14 +324,14 @@ contains pi_inf_visc = 0._wp if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) end do else if ((model_eqns == 2) .and. (num_fluids > 2)) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - 1 rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) @@ -350,7 +350,7 @@ contains alpha_visc_sum = 0._wp if (mpp_lim) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i)) alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp) @@ -361,7 +361,7 @@ contains end if - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) @@ -369,12 +369,12 @@ contains end do if (viscous) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, 2 Re_visc(i) = dflt_real if (Re_size(i) > 0) Re_visc(i) = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do q = 1, Re_size(i) Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) & + Re_visc(i) @@ -394,7 +394,7 @@ contains y_cc(k) + grad_y_vf(3)%sf(j, k, l))/ & Re_visc(1) - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 2, 3 tau_Re_vf(contxe + i)%sf(j, k, l) = & tau_Re_vf(contxe + i)%sf(j, k, l) - & @@ -417,7 +417,7 @@ contains do k = -1, 1 do j = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l) if (bubbles_euler .and. num_fluids == 1) then @@ -433,14 +433,14 @@ contains pi_inf_visc = 0._wp if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) end do else if ((model_eqns == 2) .and. (num_fluids > 2)) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - 1 rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) @@ -459,7 +459,7 @@ contains alpha_visc_sum = 0._wp if (mpp_lim) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i)) alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp) @@ -470,7 +470,7 @@ contains end if - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) @@ -478,12 +478,12 @@ contains end do if (viscous) then - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, 2 Re_visc(i) = dflt_real if (Re_size(i) > 0) Re_visc(i) = 0._wp - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do q = 1, Re_size(i) Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) & + Re_visc(i) @@ -597,7 +597,7 @@ contains do l = is3_viscous%beg, is3_viscous%end do k = iy%beg, iy%end do j = is1_viscous%beg + 1, is1_viscous%end - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = iv%beg, iv%end dqL_prim_dx_n(1)%vf(i)%sf(j, k, l) = & (q_prim_qp%vf(i)%sf(j, k, l) - & @@ -612,7 +612,7 @@ contains do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg, is1_viscous%end - 1 - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = iv%beg, iv%end dqR_prim_dx_n(1)%vf(i)%sf(j, k, l) = & (q_prim_qp%vf(i)%sf(j + 1, k, l) - & @@ -629,7 +629,7 @@ contains do l = is3_viscous%beg, is3_viscous%end do j = is2_viscous%beg + 1, is2_viscous%end do k = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = iv%beg, iv%end dqL_prim_dy_n(2)%vf(i)%sf(k, j, l) = & (q_prim_qp%vf(i)%sf(k, j, l) - & @@ -644,7 +644,7 @@ contains do l = is3_viscous%beg, is3_viscous%end do j = is2_viscous%beg, is2_viscous%end - 1 do k = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = iv%beg, iv%end dqR_prim_dy_n(2)%vf(i)%sf(k, j, l) = & (q_prim_qp%vf(i)%sf(k, j + 1, l) - & @@ -659,7 +659,7 @@ contains do l = is3_viscous%beg, is3_viscous%end do j = is2_viscous%beg + 1, is2_viscous%end do k = is1_viscous%beg + 1, is1_viscous%end - 1 - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = iv%beg, iv%end dqL_prim_dx_n(2)%vf(i)%sf(k, j, l) = & (dqL_prim_dx_n(1)%vf(i)%sf(k, j, l) + & @@ -678,7 +678,7 @@ contains do l = is3_viscous%beg, is3_viscous%end do j = is2_viscous%beg, is2_viscous%end - 1 do k = is1_viscous%beg + 1, is1_viscous%end - 1 - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = iv%beg, iv%end dqR_prim_dx_n(2)%vf(i)%sf(k, j, l) = & (dqL_prim_dx_n(1)%vf(i)%sf(k, j + 1, l) + & @@ -698,7 +698,7 @@ contains do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg + 1, is2_viscous%end - 1 do j = is1_viscous%beg + 1, is1_viscous%end - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = iv%beg, iv%end dqL_prim_dy_n(1)%vf(i)%sf(j, k, l) = & (dqL_prim_dy_n(2)%vf(i)%sf(j, k, l) + & @@ -718,7 +718,7 @@ contains do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg + 1, is2_viscous%end - 1 do j = is1_viscous%beg, is1_viscous%end - 1 - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = iv%beg, iv%end dqR_prim_dy_n(1)%vf(i)%sf(j, k, l) = & (dqL_prim_dy_n(2)%vf(i)%sf(j + 1, k, l) + & @@ -740,7 +740,7 @@ contains do j = is3_viscous%beg + 1, is3_viscous%end do l = is2_viscous%beg, is2_viscous%end do k = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = iv%beg, iv%end dqL_prim_dz_n(3)%vf(i)%sf(k, l, j) = & @@ -756,7 +756,7 @@ contains do j = is3_viscous%beg, is3_viscous%end - 1 do l = is2_viscous%beg, is2_viscous%end do k = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = iv%beg, iv%end dqR_prim_dz_n(3)%vf(i)%sf(k, l, j) = & @@ -772,7 +772,7 @@ contains do l = is3_viscous%beg + 1, is3_viscous%end - 1 do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg + 1, is1_viscous%end - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = iv%beg, iv%end dqL_prim_dz_n(1)%vf(i)%sf(j, k, l) = & @@ -793,7 +793,7 @@ contains do l = is3_viscous%beg + 1, is3_viscous%end - 1 do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg, is1_viscous%end - 1 - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = iv%beg, iv%end dqR_prim_dz_n(1)%vf(i)%sf(j, k, l) = & @@ -814,7 +814,7 @@ contains do l = is3_viscous%beg + 1, is3_viscous%end - 1 do j = is2_viscous%beg + 1, is2_viscous%end do k = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = iv%beg, iv%end dqL_prim_dz_n(2)%vf(i)%sf(k, j, l) = & @@ -835,7 +835,7 @@ contains do l = is3_viscous%beg + 1, is3_viscous%end - 1 do j = is2_viscous%beg, is2_viscous%end - 1 do k = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = iv%beg, iv%end dqR_prim_dz_n(2)%vf(i)%sf(k, j, l) = & @@ -856,7 +856,7 @@ contains do j = is3_viscous%beg + 1, is3_viscous%end do l = is2_viscous%beg + 1, is2_viscous%end - 1 do k = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = iv%beg, iv%end dqL_prim_dy_n(3)%vf(i)%sf(k, l, j) = & @@ -877,7 +877,7 @@ contains do j = is3_viscous%beg, is3_viscous%end - 1 do l = is2_viscous%beg + 1, is2_viscous%end - 1 do k = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = iv%beg, iv%end dqR_prim_dy_n(3)%vf(i)%sf(k, l, j) = & @@ -897,7 +897,7 @@ contains do j = is3_viscous%beg + 1, is3_viscous%end do l = is2_viscous%beg, is2_viscous%end do k = is1_viscous%beg + 1, is1_viscous%end - 1 - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = iv%beg, iv%end dqL_prim_dx_n(3)%vf(i)%sf(k, l, j) = & @@ -917,7 +917,7 @@ contains do j = is3_viscous%beg, is3_viscous%end - 1 do l = is2_viscous%beg, is2_viscous%end do k = is1_viscous%beg + 1, is1_viscous%end - 1 - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = iv%beg, iv%end dqR_prim_dx_n(3)%vf(i)%sf(k, l, j) = & (dqL_prim_dx_n(1)%vf(i)%sf(k, l, j + 1) + & @@ -1214,7 +1214,7 @@ contains do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg + 1, is1_viscous%end - 1 - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = iv%beg, iv%end dv_ds_vf(i)%sf(j, k, l) = & 1._wp/((1._wp + wa_flg)*dL(j)) & @@ -1242,7 +1242,7 @@ contains do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg + 1, is2_viscous%end - 1 do j = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = iv%beg, iv%end dv_ds_vf(i)%sf(j, k, l) = & 1._wp/((1._wp + wa_flg)*dL(k)) & @@ -1270,7 +1270,7 @@ contains do l = is3_viscous%beg + 1, is3_viscous%end - 1 do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = iv%beg, iv%end dv_ds_vf(i)%sf(j, k, l) = & 1._wp/((1._wp + wa_flg)*dL(l)) & diff --git a/src/simulation/m_weno.fpp b/src/simulation/m_weno.fpp index 2dd5823668..fa740669b7 100644 --- a/src/simulation/m_weno.fpp +++ b/src/simulation/m_weno.fpp @@ -789,7 +789,7 @@ contains do l = is3_weno%beg, is3_weno%end do k = is2_weno%beg, is2_weno%end do j = is1_weno%beg, is1_weno%end - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, v_size ! reconstruct from left side @@ -908,7 +908,7 @@ contains do l = is3_weno%beg, is3_weno%end do k = is2_weno%beg, is2_weno%end do j = is1_weno%beg, is1_weno%end - $:GPU_LOOP() + $:GPU_LOOP(parallelism='[seq]') do i = 1, v_size if (teno) v = v_rs_ws_${XYZ}$ (j - 3:j + 3, k, l, i) ! temporary field value array for clarity From 41c8280b1d9dffea49521c40459597d3eb37a7d8 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Thu, 19 Jun 2025 18:10:50 -0400 Subject: [PATCH 63/75] Combined end directives with the beginning directive --- src/common/include/parallel_macros.fpp | 28 ++++++++++++++++++-------- src/simulation/m_fftw.fpp | 20 +++++++++--------- src/simulation/m_weno.fpp | 9 ++++++--- 3 files changed, 36 insertions(+), 21 deletions(-) diff --git a/src/common/include/parallel_macros.fpp b/src/common/include/parallel_macros.fpp index 87868d1285..c79a05e0f3 100644 --- a/src/common/include/parallel_macros.fpp +++ b/src/common/include/parallel_macros.fpp @@ -132,6 +132,7 @@ #:def GEN_COLLAPSE_STR(collapse) #:if collapse is not None + #:set collapse = int(collapse) #:assert isinstance(collapse, int) #:assert collapse > 1 #:set collapse_val = 'collapse(' + str(collapse) + ') ' @@ -327,8 +328,13 @@ $:acc_directive #:enddef -#:def GPU_DATA(copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, no_create=None, present=None, deviceptr=None, attach=None, default=None, extraAccArgs=None) - +#:def GPU_DATA(code, copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, no_create=None, present=None, deviceptr=None, attach=None, default=None, extraAccArgs=None) + #:assert code is not None + #:assert isinstance(code, str) + #:if code == '' or code.isspace() + #:stop 'GPU_DATA macro has no effect on the code as it is not surrounding any code' + #:endif + #:set copy_val = GEN_COPY_STR(copy) #:set copyin_val = GEN_COPYIN_STR(copyin, False).strip('\n') + GEN_COPYIN_STR(copyinReadOnly, True).strip('\n') @@ -355,21 +361,27 @@ & deviceptr_val.strip('\n') + attach_val.strip('\n') + & & default_val.strip('\n') #:set acc_directive = '!$acc data ' + clause_val + extraAccArgs_val.strip('\n') + #:set end_acc_directive = '!$acc end data' $:acc_directive + $:code + $:end_acc_directive #:enddef -#:def GPU_HOST_DATA(use_device=None, extraAccArgs=None) +#:def GPU_HOST_DATA(code, use_device=None, extraAccArgs=None) + #:assert code is not None + #:assert isinstance(code, str) + #:if code == '' or code.isspace() + #:stop 'GPU_HOST_DATA macro has no effect on the code as it is not surrounding any code' + #:endif #:set use_device_val = GEN_USE_DEVICE_STR(use_device) #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) #:set clause_val = use_device_val.strip('\n') #:set acc_directive = '!$acc host_data ' + clause_val + extraAccArgs_val.strip('\n') + #:set end_acc_directive = '!$acc end host_data' $:acc_directive -#:enddef - -#:def GPU_END_HOST_DATA() - #:set acc_directive = '!$acc end host_data' - $:acc_directive + $:code + $:end_acc_directive #:enddef #:def GPU_ENTER_DATA(copyin=None, copyinReadOnly=None, create=None, attach=None, extraAccArgs=None) diff --git a/src/simulation/m_fftw.fpp b/src/simulation/m_fftw.fpp index 8c64193e28..425e1be060 100644 --- a/src/simulation/m_fftw.fpp +++ b/src/simulation/m_fftw.fpp @@ -161,15 +161,15 @@ contains p_cmplx => data_cmplx_gpu p_fltr_cmplx => data_fltr_cmplx_gpu - $:GPU_DATA(attach='[p_real, p_cmplx, p_fltr_cmplx]') - $:GPU_HOST_DATA(use_device='[p_real, p_cmplx, p_fltr_cmplx]') + #:call GPU_DATA(attach='[p_real, p_cmplx, p_fltr_cmplx]') + #:call GPU_HOST_DATA(use_device='[p_real, p_cmplx, p_fltr_cmplx]') #if defined(__PGI) ierr = cufftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu) #else ierr = hipfftExecD2Z(fwd_plan_gpu, c_loc(p_real), c_loc(p_cmplx)) call hipCheck(hipDeviceSynchronize()) #endif - $:GPU_END_HOST_DATA() + #:endcall GPU_HOST_DATA Nfq = 3 $:GPU_UPDATE(device='[Nfq]') @@ -182,14 +182,14 @@ contains end do end do - $:GPU_HOST_DATA(use_device='[p_real, p_fltr_cmplx]') + #:call GPU_HOST_DATA(use_device='[p_real, p_fltr_cmplx]') #if defined(__PGI) ierr = cufftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu) #else ierr = hipfftExecZ2D(bwd_plan_gpu, c_loc(p_fltr_cmplx), c_loc(p_real)) call hipCheck(hipDeviceSynchronize()) #endif - $:GPU_END_HOST_DATA() + #:endcall GPU_HOST_DATA $:GPU_PARALLEL_LOOP(collapse=3) do k = 1, sys_size @@ -221,14 +221,14 @@ contains end do end do - $:GPU_HOST_DATA(use_device='[p_real, p_cmplx]') + #:call GPU_HOST_DATA(use_device='[p_real, p_cmplx]') #if defined(__PGI) ierr = cufftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu) #else ierr = hipfftExecD2Z(fwd_plan_gpu, c_loc(p_real), c_loc(p_cmplx)) call hipCheck(hipDeviceSynchronize()) #endif - $:GPU_END_HOST_DATA() + #:endcall GPU_HOST_DATA Nfq = min(floor(2_dp*real(i, dp)*pi), cmplx_size) $:GPU_UPDATE(device='[Nfq]') @@ -242,14 +242,14 @@ contains end do end do - $:GPU_HOST_DATA(use_device='[p_real, p_fltr_cmplx]') + #:call GPU_HOST_DATA(use_device='[p_real, p_fltr_cmplx]') #if defined(__PGI) ierr = cufftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu) #else ierr = hipfftExecZ2D(bwd_plan_gpu, c_loc(p_fltr_cmplx), c_loc(p_real)) call hipCheck(hipDeviceSynchronize()) #endif - $:GPU_END_HOST_DATA() + #:endcall GPU_HOST_DATA $:GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]') do k = 1, sys_size @@ -293,7 +293,7 @@ contains end do end do #endif -!$acc end data +#:endcall GPU_DATA end subroutine s_apply_fourier_filter !> The purpose of this subroutine is to destroy the fftw plan diff --git a/src/simulation/m_weno.fpp b/src/simulation/m_weno.fpp index fa740669b7..f7eeec4a7a 100644 --- a/src/simulation/m_weno.fpp +++ b/src/simulation/m_weno.fpp @@ -1148,15 +1148,17 @@ contains block use CuTensorEx - $:GPU_HOST_DATA(use_device='[v_rs_ws_x, v_rs_ws_y]') + #:call GPU_HOST_DATA(use_device='[v_rs_ws_x, v_rs_ws_y]') v_rs_ws_y = reshape(v_rs_ws_x, shape=[n + 1 + 2*buff_size, m + 2*buff_size + 1, p + 1, sys_size], order=[2, 1, 3, 4]) + #:endcall GPU_HOST_DATA end block else block use CuTensorEx - $:GPU_HOST_DATA(use_device='[v_rs_ws_x, v_rs_ws_y]') + #:call GPU_HOST_DATA(use_device='[v_rs_ws_x, v_rs_ws_y]') v_rs_ws_y = reshape(v_rs_ws_x, shape=[n + 1 + 2*buff_size, m + 2*buff_size + 1, p + 1 + 2*buff_size, sys_size], order=[2, 1, 3, 4]) + #:endcall GPU_HOST_DATA end block end if else @@ -1184,8 +1186,9 @@ contains block use CuTensorEx - $:GPU_HOST_DATA(use_device='[v_rs_ws_x, v_rs_ws_z]') + #:call GPU_HOST_DATA(use_device='[v_rs_ws_x, v_rs_ws_z]') v_rs_ws_z = reshape(v_rs_ws_x, shape=[p + 1 + 2*buff_size, n + 2*buff_size + 1, m + 2*buff_size + 1, sys_size], order=[3, 2, 1, 4]) + #:endcall end block else #endif From bfefe6aa345b8c217702681c8d4fc51c9663ef00 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Thu, 19 Jun 2025 18:52:45 -0400 Subject: [PATCH 64/75] Removed whitespace in macros, added mute, and ran formatter --- src/common/include/macros.fpp | 2 +- src/common/include/parallel_macros.fpp | 70 +---------- src/common/m_boundary_common.fpp | 2 +- src/common/m_variables_conversion.fpp | 3 - src/simulation/m_fftw.fpp | 168 ++++++++++++------------- src/simulation/m_qbmm.fpp | 1 - src/simulation/m_weno.fpp | 6 +- 7 files changed, 94 insertions(+), 158 deletions(-) diff --git a/src/common/include/macros.fpp b/src/common/include/macros.fpp index 759ee24175..fea730cbd1 100644 --- a/src/common/include/macros.fpp +++ b/src/common/include/macros.fpp @@ -106,4 +106,4 @@ //${message or '"No error description."'}$) end if #:enddef -! New line at end of file is required for FYPP \ No newline at end of file +! New line at end of file is required for FYPP diff --git a/src/common/include/parallel_macros.fpp b/src/common/include/parallel_macros.fpp index c79a05e0f3..5b0b8bf564 100644 --- a/src/common/include/parallel_macros.fpp +++ b/src/common/include/parallel_macros.fpp @@ -1,3 +1,5 @@ +#:mute + #:def ASSERT_LIST(data, datatype) #:assert data is not None #:assert isinstance(data, list) @@ -7,7 +9,6 @@ #:def GEN_PARENTHESES_CLAUSE(clause_name, clause_str) #:set clause_regex = re.compile(',(?![^(]*\\))') - #:assert isinstance(clause_name, str) #:if clause_str is not None #:set count = 0 @@ -203,35 +204,20 @@ & default='present', firstprivate=None, reduction=None, reductionOp=None, & & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, & & no_create=None, present=None, deviceptr=None, attach=None, extraAccArgs=None) - #:set collapse_val = GEN_COLLAPSE_STR(collapse) - #:set parallelism_val = GEN_PARALLELISM_STR(parallelism) - #:set default_val = GEN_DEFAULT_STR(default) - #:set private_val = GEN_PRIVATE_STR(private, False).strip('\n') + GEN_PRIVATE_STR(firstprivate, True).strip('\n') - #:set reduction_val = GEN_REDUCTION_STR(reduction, reductionOp) - #:set copy_val = GEN_COPY_STR(copy) - #:set copyin_val = GEN_COPYIN_STR(copyin, False).strip('\n') + GEN_COPYIN_STR(copyinReadOnly, True).strip('\n') - #:set copyout_val = GEN_COPYOUT_STR(copyout) - #:set create_val = GEN_CREATE_STR(create) - #:set no_create_val = GEN_NOCREATE_STR(no_create) - #:set present_val = GEN_PRESENT_STR(present) - #:set deviceptr_val = GEN_DEVICEPTR_STR(deviceptr) - #:set attach_val = GEN_ATTACH_STR(attach) - #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) - #:set clause_val = collapse_val.strip('\n') + parallelism_val.strip('\n') + & & default_val.strip('\n') + private_val.strip('\n') + reduction_val.strip('\n') + & & copy_val.strip('\n') + copyin_val.strip('\n') + & @@ -246,16 +232,13 @@ #:def GPU_ROUTINE(function_name=None, parallelism=None, nohost=False, cray_inline=False, extraAccArgs=None) #:assert isinstance(cray_inline, bool) #:set parallelism_val = GEN_PARALLELISM_STR(parallelism) - #:assert isinstance(nohost, bool) #:if nohost == True #:set nohost_val = 'nohost' #:else #:set nohost_val = '' #:endif - #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) - #:set clause_val = parallelism_val.strip('\n') + nohost_val.strip('\n') #:set acc_directive = '!$acc routine ' + & & clause_val + extraAccArgs_val.strip('\n') @@ -275,23 +258,14 @@ #:enddef #:def GPU_DECLARE(copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, present=None, deviceptr=None, link=None, extraAccArgs=None) - #:set copy_val = GEN_COPY_STR(copy) - #:set copyin_val = GEN_COPYIN_STR(copyin, False).strip('\n') + GEN_COPYIN_STR(copyinReadOnly, True).strip('\n') - #:set copyout_val = GEN_COPYOUT_STR(copyout) - #:set create_val = GEN_CREATE_STR(create) - #:set present_val = GEN_PRESENT_STR(present) - #:set deviceptr_val = GEN_DEVICEPTR_STR(deviceptr) - #:set link_val = GEN_LINK_STR(link) - #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) - #:set clause_val = copy_val.strip('\n') + copyin_val.strip('\n') + & & copyout_val.strip('\n') + create_val.strip('\n') + & & present_val.strip('\n') + deviceptr_val.strip('\n') + & @@ -301,11 +275,8 @@ #:enddef #:def GPU_LOOP(collapse=None, parallelism=None, data_dependency=None, reduction=None, reductionOp=None, private=None, extraAccArgs=None) - #:set collapse_val = GEN_COLLAPSE_STR(collapse) - #:set parallelism_val = GEN_PARALLELISM_STR(parallelism) - #:if data_dependency is not None #:assert isinstance(data_dependency, str) #:assert (data_dependency == 'auto' or data_dependency == 'independent') @@ -313,13 +284,9 @@ #:else #:set data_dependency_val = '' #:endif - #:set private_val = GEN_PRIVATE_STR(private, False) - #:set reduction_val = GEN_REDUCTION_STR(reduction, reductionOp) - #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) - #:set clause_val = collapse_val.strip('\n') + parallelism_val.strip('\n') + & & data_dependency_val.strip('\n') + private_val.strip('\n') + & & reduction_val.strip('\n') @@ -334,27 +301,16 @@ #:if code == '' or code.isspace() #:stop 'GPU_DATA macro has no effect on the code as it is not surrounding any code' #:endif - #:set copy_val = GEN_COPY_STR(copy) - #:set copyin_val = GEN_COPYIN_STR(copyin, False).strip('\n') + GEN_COPYIN_STR(copyinReadOnly, True).strip('\n') - #:set copyout_val = GEN_COPYOUT_STR(copyout) - #:set create_val = GEN_CREATE_STR(create) - #:set no_create_val = GEN_NOCREATE_STR(no_create) - #:set present_val = GEN_PRESENT_STR(present) - #:set deviceptr_val = GEN_DEVICEPTR_STR(deviceptr) - #:set attach_val = GEN_ATTACH_STR(attach) - #:set default_val = GEN_DEFAULT_STR(default) - #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) - #:set clause_val = copy_val.strip('\n') + copyin_val.strip('\n') + & & copyout_val.strip('\n') + create_val.strip('\n') + & & no_create_val.strip('\n') + present_val.strip('\n') + & @@ -375,7 +331,6 @@ #:endif #:set use_device_val = GEN_USE_DEVICE_STR(use_device) #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) - #:set clause_val = use_device_val.strip('\n') #:set acc_directive = '!$acc host_data ' + clause_val + extraAccArgs_val.strip('\n') #:set end_acc_directive = '!$acc end host_data' @@ -386,13 +341,9 @@ #:def GPU_ENTER_DATA(copyin=None, copyinReadOnly=None, create=None, attach=None, extraAccArgs=None) #:set copyin_val = GEN_COPYIN_STR(copyin, False).strip('\n') + GEN_COPYIN_STR(copyinReadOnly, True).strip('\n') - #:set create_val = GEN_CREATE_STR(create) - #:set attach_val = GEN_ATTACH_STR(attach) - #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) - #:set clause_val = copyin_val.strip('\n') + create_val.strip('\n') + attach_val.strip('\n') #:set acc_directive = '!$acc enter data ' + clause_val + extraAccArgs_val.strip('\n') $:acc_directive @@ -400,13 +351,9 @@ #:def GPU_EXIT_DATA(copyout=None, delete=None, detach=None, extraAccArgs=None) #:set copyout_val = GEN_COPYOUT_STR(copyout) - #:set delete_val = GEN_DELETE_STR(delete) - #:set detach_val = GEN_DETACH_STR(detach) - #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) - #:set clause_val = copyout_val.strip('\n') + delete_val.strip('\n') + detach_val.strip('\n') #:set acc_directive = '!$acc exit data ' + clause_val + extraAccArgs_val.strip('\n') $:acc_directive @@ -414,9 +361,7 @@ #:def GPU_CACHE(cache, extraAccArgs=None) #:set cache_val = GEN_PARENTHESES_CLAUSE('cache', cache) - #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) - #:set clause_val = cache_val.strip('\n') #:set acc_directive = '!$acc ' + clause_val + extraAccArgs_val.strip('\n') $:acc_directive @@ -425,11 +370,8 @@ #:def GPU_ATOMIC(atomic, extraAccArgs=None) #:assert isinstance(atomic, str) #:assert (atomic == 'read' or atomic == 'write' or atomic == 'update' or atomic == 'capture') - #:set atomic_val = atomic - #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) - #:set clause_val = atomic_val.strip('\n') #:set acc_directive = '!$acc atomic ' + clause_val + extraAccArgs_val.strip('\n') $:acc_directive @@ -437,11 +379,8 @@ #:def GPU_UPDATE(host=None, device=None, extraAccArgs=None) #:set host_val = GEN_HOST_STR(host) - #:set device_val = GEN_DEVICE_STR(device) - #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) - #:set clause_val = host_val.strip('\n') + device_val.strip('\n') #:set acc_directive = '!$acc update ' + clause_val + extraAccArgs_val.strip('\n') $:acc_directive @@ -449,9 +388,10 @@ #:def GPU_WAIT(extraAccArgs=None) #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) - #:set clause_val = '' #:set acc_directive = '!$acc wait ' + clause_val + extraAccArgs_val.strip('\n') $:acc_directive #:enddef -! New line at end of file is required for FYPP \ No newline at end of file + +#:endmute +! New line at end of file is required for FYPP diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index 2d92f371e1..2c48c760f2 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -1405,7 +1405,7 @@ contains end subroutine s_color_function_reflective pure subroutine s_color_function_ghost_cell_extrapolation(c_divs, bc_dir, bc_loc, k, l) - $:GPU_ROUTINE(function_name='s_color_function_ghost_cell_extrapolation', & + $:GPU_ROUTINE(function_name='s_color_function_ghost_cell_extrapolation', & & parallelism='[seq]', cray_inline=True) type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs integer, intent(in) :: bc_dir, bc_loc diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp index 08034d3106..3dff4c241f 100644 --- a/src/common/m_variables_conversion.fpp +++ b/src/common/m_variables_conversion.fpp @@ -457,7 +457,6 @@ contains $:GPU_ROUTINE(function_name='s_convert_species_to_mixture_variables_acc', & & parallelism='[seq]', cray_inline=True) - real(wp), intent(out) :: rho_K, gamma_K, pi_inf_K, qv_K real(wp), dimension(num_fluids), intent(inout) :: alpha_rho_K, alpha_K !< @@ -1597,7 +1596,6 @@ contains $:GPU_ROUTINE(function_name='s_compute_speed_of_sound', & & parallelism='[seq]', cray_inline=True) - real(wp), intent(in) :: pres real(wp), intent(in) :: rho, gamma, pi_inf real(wp), intent(in) :: H @@ -1665,7 +1663,6 @@ contains $:GPU_ROUTINE(function_name='s_compute_fast_magnetosonic_speed', & & parallelism='[seq]', cray_inline=True) - real(wp), intent(in) :: B(3), rho, c real(wp), intent(in) :: h ! only used for relativity real(wp), intent(out) :: c_fast diff --git a/src/simulation/m_fftw.fpp b/src/simulation/m_fftw.fpp index 425e1be060..87f612a76b 100644 --- a/src/simulation/m_fftw.fpp +++ b/src/simulation/m_fftw.fpp @@ -162,138 +162,138 @@ contains p_fltr_cmplx => data_fltr_cmplx_gpu #:call GPU_DATA(attach='[p_real, p_cmplx, p_fltr_cmplx]') - #:call GPU_HOST_DATA(use_device='[p_real, p_cmplx, p_fltr_cmplx]') + #:call GPU_HOST_DATA(use_device='[p_real, p_cmplx, p_fltr_cmplx]') #if defined(__PGI) - ierr = cufftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu) + ierr = cufftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu) #else - ierr = hipfftExecD2Z(fwd_plan_gpu, c_loc(p_real), c_loc(p_cmplx)) - call hipCheck(hipDeviceSynchronize()) + ierr = hipfftExecD2Z(fwd_plan_gpu, c_loc(p_real), c_loc(p_cmplx)) + call hipCheck(hipDeviceSynchronize()) #endif - #:endcall GPU_HOST_DATA - Nfq = 3 - $:GPU_UPDATE(device='[Nfq]') + #:endcall GPU_HOST_DATA + Nfq = 3 + $:GPU_UPDATE(device='[Nfq]') - $:GPU_PARALLEL_LOOP(collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 1, Nfq - data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) + $:GPU_PARALLEL_LOOP(collapse=3) + do k = 1, sys_size + do j = 0, m + do l = 1, Nfq + data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) + end do end do end do - end do - #:call GPU_HOST_DATA(use_device='[p_real, p_fltr_cmplx]') + #:call GPU_HOST_DATA(use_device='[p_real, p_fltr_cmplx]') #if defined(__PGI) - ierr = cufftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu) + ierr = cufftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu) #else - ierr = hipfftExecZ2D(bwd_plan_gpu, c_loc(p_fltr_cmplx), c_loc(p_real)) - call hipCheck(hipDeviceSynchronize()) + ierr = hipfftExecZ2D(bwd_plan_gpu, c_loc(p_fltr_cmplx), c_loc(p_real)) + call hipCheck(hipDeviceSynchronize()) #endif - #:endcall GPU_HOST_DATA - - $:GPU_PARALLEL_LOOP(collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 0, p - data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp) - q_cons_vf(k)%sf(j, 0, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) - end do - end do - end do - - do i = 1, fourier_rings + #:endcall GPU_HOST_DATA $:GPU_PARALLEL_LOOP(collapse=3) do k = 1, sys_size do j = 0, m - do l = 1, cmplx_size - data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = (0_dp, 0_dp) + do l = 0, p + data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp) + q_cons_vf(k)%sf(j, 0, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) end do end do end do - $:GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]') - do k = 1, sys_size - do j = 0, m - do l = 0, p - data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = q_cons_vf(k)%sf(j, i, l) + do i = 1, fourier_rings + + $:GPU_PARALLEL_LOOP(collapse=3) + do k = 1, sys_size + do j = 0, m + do l = 1, cmplx_size + data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = (0_dp, 0_dp) + end do + end do + end do + + $:GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]') + do k = 1, sys_size + do j = 0, m + do l = 0, p + data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = q_cons_vf(k)%sf(j, i, l) + end do end do end do - end do - #:call GPU_HOST_DATA(use_device='[p_real, p_cmplx]') + #:call GPU_HOST_DATA(use_device='[p_real, p_cmplx]') #if defined(__PGI) - ierr = cufftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu) + ierr = cufftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu) #else - ierr = hipfftExecD2Z(fwd_plan_gpu, c_loc(p_real), c_loc(p_cmplx)) - call hipCheck(hipDeviceSynchronize()) + ierr = hipfftExecD2Z(fwd_plan_gpu, c_loc(p_real), c_loc(p_cmplx)) + call hipCheck(hipDeviceSynchronize()) #endif - #:endcall GPU_HOST_DATA + #:endcall GPU_HOST_DATA - Nfq = min(floor(2_dp*real(i, dp)*pi), cmplx_size) - $:GPU_UPDATE(device='[Nfq]') + Nfq = min(floor(2_dp*real(i, dp)*pi), cmplx_size) + $:GPU_UPDATE(device='[Nfq]') - $:GPU_PARALLEL_LOOP(collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 1, Nfq - data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) + $:GPU_PARALLEL_LOOP(collapse=3) + do k = 1, sys_size + do j = 0, m + do l = 1, Nfq + data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) + end do end do end do - end do - #:call GPU_HOST_DATA(use_device='[p_real, p_fltr_cmplx]') + #:call GPU_HOST_DATA(use_device='[p_real, p_fltr_cmplx]') #if defined(__PGI) - ierr = cufftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu) + ierr = cufftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu) #else - ierr = hipfftExecZ2D(bwd_plan_gpu, c_loc(p_fltr_cmplx), c_loc(p_real)) - call hipCheck(hipDeviceSynchronize()) + ierr = hipfftExecZ2D(bwd_plan_gpu, c_loc(p_fltr_cmplx), c_loc(p_real)) + call hipCheck(hipDeviceSynchronize()) #endif - #:endcall GPU_HOST_DATA + #:endcall GPU_HOST_DATA - $:GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]') - do k = 1, sys_size - do j = 0, m - do l = 0, p - data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp) - q_cons_vf(k)%sf(j, i, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) + $:GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]') + do k = 1, sys_size + do j = 0, m + do l = 0, p + data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp) + q_cons_vf(k)%sf(j, i, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) + end do end do end do - end do - end do - -#else - Nfq = 3 - do j = 0, m - do k = 1, sys_size - data_fltr_cmplx(:) = (0_dp, 0_dp) - data_real(1:p + 1) = q_cons_vf(k)%sf(j, 0, 0:p) - call fftw_execute_dft_r2c(fwd_plan, data_real, data_cmplx) - data_fltr_cmplx(1:Nfq) = data_cmplx(1:Nfq) - call fftw_execute_dft_c2r(bwd_plan, data_fltr_cmplx, data_real) - data_real(:) = data_real(:)/real(real_size, dp) - q_cons_vf(k)%sf(j, 0, 0:p) = data_real(1:p + 1) end do - end do - ! Apply Fourier filter to additional rings - do i = 1, fourier_rings - Nfq = min(floor(2_dp*real(i, dp)*pi), cmplx_size) +#else + Nfq = 3 do j = 0, m do k = 1, sys_size data_fltr_cmplx(:) = (0_dp, 0_dp) - data_real(1:p + 1) = q_cons_vf(k)%sf(j, i, 0:p) + data_real(1:p + 1) = q_cons_vf(k)%sf(j, 0, 0:p) call fftw_execute_dft_r2c(fwd_plan, data_real, data_cmplx) data_fltr_cmplx(1:Nfq) = data_cmplx(1:Nfq) call fftw_execute_dft_c2r(bwd_plan, data_fltr_cmplx, data_real) data_real(:) = data_real(:)/real(real_size, dp) - q_cons_vf(k)%sf(j, i, 0:p) = data_real(1:p + 1) + q_cons_vf(k)%sf(j, 0, 0:p) = data_real(1:p + 1) + end do + end do + + ! Apply Fourier filter to additional rings + do i = 1, fourier_rings + Nfq = min(floor(2_dp*real(i, dp)*pi), cmplx_size) + do j = 0, m + do k = 1, sys_size + data_fltr_cmplx(:) = (0_dp, 0_dp) + data_real(1:p + 1) = q_cons_vf(k)%sf(j, i, 0:p) + call fftw_execute_dft_r2c(fwd_plan, data_real, data_cmplx) + data_fltr_cmplx(1:Nfq) = data_cmplx(1:Nfq) + call fftw_execute_dft_c2r(bwd_plan, data_fltr_cmplx, data_real) + data_real(:) = data_real(:)/real(real_size, dp) + q_cons_vf(k)%sf(j, i, 0:p) = data_real(1:p + 1) + end do end do end do - end do #endif -#:endcall GPU_DATA + #:endcall GPU_DATA end subroutine s_apply_fourier_filter !> The purpose of this subroutine is to destroy the fftw plan diff --git a/src/simulation/m_qbmm.fpp b/src/simulation/m_qbmm.fpp index 40301942b6..a104937575 100644 --- a/src/simulation/m_qbmm.fpp +++ b/src/simulation/m_qbmm.fpp @@ -637,7 +637,6 @@ contains $:GPU_ROUTINE(function_name='s_coeff',parallelism='[seq]', & & cray_inline=True) - real(wp), intent(in) :: pres, rho, c real(wp), dimension(nterms, 0:2, 0:2), intent(out) :: coeffs diff --git a/src/simulation/m_weno.fpp b/src/simulation/m_weno.fpp index f7eeec4a7a..73ff699454 100644 --- a/src/simulation/m_weno.fpp +++ b/src/simulation/m_weno.fpp @@ -1149,7 +1149,7 @@ contains use CuTensorEx #:call GPU_HOST_DATA(use_device='[v_rs_ws_x, v_rs_ws_y]') - v_rs_ws_y = reshape(v_rs_ws_x, shape=[n + 1 + 2*buff_size, m + 2*buff_size + 1, p + 1, sys_size], order=[2, 1, 3, 4]) + v_rs_ws_y = reshape(v_rs_ws_x, shape=[n + 1 + 2*buff_size, m + 2*buff_size + 1, p + 1, sys_size], order=[2, 1, 3, 4]) #:endcall GPU_HOST_DATA end block else @@ -1157,7 +1157,7 @@ contains use CuTensorEx #:call GPU_HOST_DATA(use_device='[v_rs_ws_x, v_rs_ws_y]') - v_rs_ws_y = reshape(v_rs_ws_x, shape=[n + 1 + 2*buff_size, m + 2*buff_size + 1, p + 1 + 2*buff_size, sys_size], order=[2, 1, 3, 4]) + v_rs_ws_y = reshape(v_rs_ws_x, shape=[n + 1 + 2*buff_size, m + 2*buff_size + 1, p + 1 + 2*buff_size, sys_size], order=[2, 1, 3, 4]) #:endcall GPU_HOST_DATA end block end if @@ -1187,7 +1187,7 @@ contains use CuTensorEx #:call GPU_HOST_DATA(use_device='[v_rs_ws_x, v_rs_ws_z]') - v_rs_ws_z = reshape(v_rs_ws_x, shape=[p + 1 + 2*buff_size, n + 2*buff_size + 1, m + 2*buff_size + 1, sys_size], order=[3, 2, 1, 4]) + v_rs_ws_z = reshape(v_rs_ws_x, shape=[p + 1 + 2*buff_size, n + 2*buff_size + 1, m + 2*buff_size + 1, sys_size], order=[3, 2, 1, 4]) #:endcall end block else From 31b078279f8b2ccdf9da01c3c63ed1ea7c41ceb6 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Thu, 19 Jun 2025 22:30:01 -0400 Subject: [PATCH 65/75] Had extra variables from a merge --- src/simulation/m_cbc.fpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/simulation/m_cbc.fpp b/src/simulation/m_cbc.fpp index 557ad6d1ae..55c385c78f 100644 --- a/src/simulation/m_cbc.fpp +++ b/src/simulation/m_cbc.fpp @@ -50,7 +50,6 @@ module m_cbc real(wp), allocatable, dimension(:, :, :, :) :: q_prim_rsz_vf $:GPU_DECLARE(create='[q_prim_rsx_vf,q_prim_rsy_vf,q_prim_rsz_vf]') - type(scalar_field), allocatable, dimension(:) :: F_rs_vf, F_src_rs_vf !< !! Cell-average fluxes (src - source). These are directly determined from the !! cell-average primitive variables, q_prims_rs_vf, and not a Riemann solver. From 50af22748163829db7d04d2b5ad00a441f38de13 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Thu, 19 Jun 2025 22:36:55 -0400 Subject: [PATCH 66/75] Ran formatter --- src/simulation/m_cbc.fpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/simulation/m_cbc.fpp b/src/simulation/m_cbc.fpp index 55c385c78f..b92812ae7a 100644 --- a/src/simulation/m_cbc.fpp +++ b/src/simulation/m_cbc.fpp @@ -50,7 +50,6 @@ module m_cbc real(wp), allocatable, dimension(:, :, :, :) :: q_prim_rsz_vf $:GPU_DECLARE(create='[q_prim_rsx_vf,q_prim_rsy_vf,q_prim_rsz_vf]') - !! Cell-average fluxes (src - source). These are directly determined from the !! cell-average primitive variables, q_prims_rs_vf, and not a Riemann solver. From 70c679bb6176a131aa4cfbc6af2ea0ffc7272669 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Mon, 23 Jun 2025 20:54:16 -0400 Subject: [PATCH 67/75] Added gpu nacro docs --- docs/documentation/gpuParallelization.md | 328 +++++++++++++++++++++++ 1 file changed, 328 insertions(+) create mode 100644 docs/documentation/gpuParallelization.md diff --git a/docs/documentation/gpuParallelization.md b/docs/documentation/gpuParallelization.md new file mode 100644 index 0000000000..34602c42ce --- /dev/null +++ b/docs/documentation/gpuParallelization.md @@ -0,0 +1,328 @@ +# Parallelization via GPUs + +MFC compiles GPU code via OpenACC and in the future OpenMP as well. + +In order to swap between OpenACC and OpenMP, ACC and MP directives are implemented via FYPP macros. + +[OpenACC Quick start Guide](https://openacc-best-practices-guide.readthedocs.io/en/latest/01-Introduction.html) +[OpenACC API Documentation](https://www.openacc.org/sites/default/files/inline-files/API%20Guide%202.7.pdf) + +------------------------------------------------------------------------------------------ + +## Macro API Documentation + +Note: Ordering is not guarennteed or stable, so use key-value pairing when using macros + +### Data Type Meanings + +- Integer is a number + +- Boolean is a pythonic boolean - Valid options: `True` or `False` + +- String List is given as a comma separated list surrounding by brackets and inside quotations + - Ex: `'[hello, world, Fortran]'` + +- 2-level string list is given as a comma separated list of string lists surrounding by brackets and inside quotations + - Ex: `'[[hello, world], [Fortran, MFC]]'` or `[[hello]]` + +### Data Flow + +- Data on the GPU has a reference counter +- When a variable is referred to being allocated, it means that GPU memory is allocated if it is not already present in GPU memory. If a variable is already present, the reference counter is just incremented. +- When a variable is referred to being dellocated, it means that the reference counter is decremented. If the reference counter is zero, then the data is actually deallocated from GPU memory +- When a variable is referred to being attached, it means that the device pointer attaches to target if it not already attached. If pointer is already attached, then the attachment counter is just incremented +- When a variable is referred to being detached, it means that the attachment counter is decremented. If attachment counter is zero, then actually detached + +### Creating new/overwriting existing stubs & proxy configs + +
+ GPU_PARALLEL_LOOP (Execute the following loop on the GPU in parallel) + +#### Macro Invocation + +Uses FYPP eval directive using `$:` +> `$:GPU_PARALLEL_LOOP(...)` + +#### Parameters + +> | name | data type | Default Value | description | +> |------------------|---------------------|-------------------|----------------------------------------------------------------------------------------------| +> | `collapse` | integer | None | Number of loops to combine into 1 loop | +> | `parallelism` | string list | '\[gang,vector\]' | Parallelism granularity to use for this loop | +> | `default` | string | 'present' | Implicit assumptions compiler should make | +> | `private` | string list | None | Variables that are private to each iteration/thread | +> | `firstprivate` | string list | None | Initialized variables that are private to each iteration/thread | +> | `reduction` | 2-level string list | None | Variables unique to each iteration and reduced at the end | +> | `reductionOp` | string list | None | Operator that each list of reduction will reduce with | +> | `copy` | string list | None | Allocates and copies variable to GPU on entrance, then deallocated and copies to CPU on exit | +> | `copyin` | string list | None | Allocates and copies a variable to GPU on entrance and then deallocated on exit | +> | `copyinReadOnly` | string list | None | Allocates and copies a readonly variable to GPU and then deallocated on exit | +> | `copyout` | string list | None | Allocates a variable on GPU on entrance and then deallocates and copies to CPU on exit | +> | `create` | string list | None | Allocates a variable on GPU on entrance and then deallocates on exit | +> | `no_create` | string list | None | Use data in CPU memory unless data is already in GPU memory | +> | `present` | string list | None | Data that must be present in GPU memory. Increment counter on entrance, decrement on exit | +> | `deviceptr` | string list | None | Pointer variables that are already allocated on GPU memory | +> | `attach` | string list | None | Attaches device pointer to device targets on entrance, then detach on exit | +> | `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | + +#### Parameter Restrictions + +| name | Restricted range | +|-------------|---------------------------------------------------| +| collapse | Must be greater than 1 | +| parallelism | Valid elements: 'gang', 'worker', 'vector', 'seq' | +| default | 'present' or 'none' | + +#### Additional information + +- default present means that the any non-scalar data in assumed to be present on the GPU +- default none means that the compiler should not implicitly determine the data attributes for any variable +- reduction and reductionOp must match in length +- With `reduction='[[sum1, sum2], [largest]]'` and `reductionOp='[+, max]'`, `sum1` and `sum2` will be the sum of sum1/sum2 in each loop iteration, and `largest` will the maximum value of `largest` all the loop iterations +- A reduction implies a copy, so it does not need to be added for both + +#### Example + +> ```python +> $:GPU_PARALLEL_LOOP(collapse=3, private='[tmp, r]', reduction='[[vol, avg], [max_val]]', reductionOp='[+, MAX]') +> $:GPU_PARALLEL_LOOP(collapse=2, private='[sum_holder]', copyin='[starting_sum]', copyout='[eigenval,C]') +> ``` + +
+ +------------------------------------------------------------------------------------------ + +### Listing existing stubs & proxy configs as YAML string + +
+ GPU_ROUTINE (Compile a procedure for the GPU) + +#### Macro Invocation + +Uses FYPP eval directive using `$:` +> `$:GPU_ROUTINE(...)` + + +#### Parameters + +| name | data type | Default Value | description | +|-----------------|-------------|---------------|--------------------------------------------------------------| +| `function_name` | string | None | Name of subroutine/function | +| `parallelism` | string list | None | Parallelism granularity to use for this routine | +| `nohost` | boolean | False | Do not compile procedure code for CPU | +| `cray_inline` | boolean | False | Inline procedure on cray compiler | +| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | + +#### Parameter Restrictions + +| name | Restricted range | +|-------------|---------------------------------------------------| +| parallelism | Valid elements: 'gang', 'worker', 'vector', 'seq' | + +#### Additional information + +- Function name only needs to be given when cray_inline is True +- Future capability is to parse function header for function name +- Routine parallelism is most commonly `'[seq]'` + +#### Example + +> ```python +> $:GPU_ROUTINE(parallelism='[seq]') +> $:GPU_ROUTINE(function_name='s_matmult', parallelism='[seq]', cray_inline=True) +> ``` + +
+ +
+ GPU_DECLARE (Allocate module variables on GPU or for implicit data region ) + +#### Macro Invocation + +Uses FYPP eval directive using `$:` +> `$:GPU_DECLARE(...)` + +#### Parameters + +| name | data type | Default Value | description | +|------------------|-------------|---------------|----------------------------------------------------------------------------------------------| +| `copy` | string list | None | Allocates and copies variable to GPU on entrance, then deallocated and copies to CPU on exit | +| `copyin` | string list | None | Allocates and copies a variable to GPU on entrance and then deallocated on exit | +| `copyinReadOnly` | string list | None | Allocates and copies a readonly variable to GPU and then deallocated on exit | +| `copyout` | string list | None | Allocates a variable on GPU on entrance and then deallocates and copies to CPU on exit | +| `create` | string list | None | Allocates a variable on GPU on entrance and then deallocates on exit | +| `present` | string list | None | Data that must be present in GPU memory. Increment counter on entrance, decrement on exit | +| `deviceptr` | string list | None | Pointer variables that are already allocated on GPU memory | +| `link` | string list | None | Declare global link, and only allocate when variable used in data clause. | +| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | + +#### Additional information + +- An implicit data region is created at the start of each procedure +and ends after the last executable statement in that procedure. +- Use only create, copyin, device_resident or link clauses for module variables +- GPU_DECLARE exit is the end of the implicit data region +- Link is useful for large global static data objects + +#### Example + +> ```python +> $:GPU_DECLARE(create='[x_cb,y_cb,z_cb,x_cc,y_cc,z_cc,dx,dy,dz,dt,m,n,p]') +> $:GPU_DECLARE(create='[x_cb,y_cb,z_cb]', copyin='[x_cc,y_cc,z_cc]', link='[dx,dy,dz,dt,m,n,p]') +> ``` + +
+ +
+ GPU_LOOP (Execute loop on GPU) + +#### Macro Invocation + +Uses FYPP eval directive using `$:` +> `$:GPU_LOOP(...)` + +#### Parameters + +| name | data type | Default Value | description | +|-------------------|---------------------|---------------|--------------------------------------------------------------------------------------------------| +| `collapse` | integer | None | Number of loops to combine into 1 loop | +| `parallelism` | string list | None | Parallelism granularity to use for this loop | +| `data_dependency` | string | None | 'independent'-> assert loop iterations are independent, 'auto->let compiler analyze dependencies | +| `private` | string list | None | Variables that are private to each iteration/thread | +| `reduction` | 2-level string list | None | Variables unique to each iteration and reduced at the end | +| `reductionOp` | string list | None | Operator that each list of reduction will reduce with | +| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | + +#### Parameter Restrictions + +| name | Restricted range | +|-----------------|---------------------------------------------------| +| collapse | Must be greater than 1 | +| parallelism | Valid elements: 'gang', 'worker', 'vector', 'seq' | +| data_dependency | 'auto' or 'independent' | + +#### Additional information + +- Loop parallelism is most commonly `'[seq]'` +- reduction and reductionOp must match in length +- With `reduction='[[sum1, sum2], [largest]]'` and `reductionOp='[+, max]'`, `sum1` and `sum2` will be the sum of sum1/sum2 in each loop iteration, and `largest` will the maximum value of `largest` all the loop iterations + +#### Example + +> ```python +> $:GPU_PARALLEL_LOOP(parallelism='[seq]') +> $:GPU_PARALLEL_LOOP(collapse=3, parallelism='[seq]',private='[tmp, r]') +> ``` + +
+ +
+ GPU_DATA (Allocate module variables on GPU or for implicit data region ) + +#### Macro Invocation + +Uses FYPP call directive using `#:call` +> ```C +> +> #:call GPU_DATA(...) +> {code} +> #:endcall GPU_DATA +>``` +> + +#### Parameters + +| name | data type | Default Value | description | +|------------------|-------------|---------------|----------------------------------------------------------------------------------------------| +| `code` | code | Required | Region of code where defined data is accessible | +| `copy` | string list | None | Allocates and copies variable to GPU on entrance, then deallocated and copies to CPU on exit | +| `copyin` | string list | None | Allocates and copies a variable to GPU on entrance and then deallocated on exit | +| `copyinReadOnly` | string list | None | Allocates and copies a readonly variable to GPU and then deallocated on exit | +| `copyout` | string list | None | Allocates a variable on GPU on entrance and then deallocates and copies to CPU on exit | +| `create` | string list | None | Allocates a variable on GPU on entrance and then deallocates on exit | +| `no_create` | string list | None | Use data in CPU memory unless data is already in GPU memory | +| `present` | string list | None | Data that must be present in GPU memory. Increment counter on entrance, decrement on exit | +| `deviceptr` | string list | None | Pointer variables that are already allocated on GPU memory | +| `attach` | string list | None | Attaches device pointer to device targets on entrance, then detach on exit | +| `default` | string | None | Implicit assumptions compiler should make | +| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | + +#### Parameter Restrictions + +| name | Restricted range | +|-----------------|---------------------------------------------------| +| code | Do not assign it manually with key-value pairing | + +#### Additional information + +#### Example + +> ```C +> #:call GPU_DATA(copy='[pixel_arr]', copyin='[starting_pixels, inital_index]',attach='[p_real, p_cmplx, p_fltr_cmplx]') +> {code} +> ... +> #:endcall +> #:call GPU_DATA(create='[pixel_arr]', copyin='[inital_index]') +> {code} +> ... +> #:endcall +> ``` + +
+ +
+ GPU_DATA (Allocate module variables on GPU or for implicit data region ) + +#### Macro Invocation + +Uses FYPP call directive using `#:call` +> ```C +> +> #:call GPU_DATA(...) +> {code} +> #:endcall GPU_DATA +>``` +> + +#### Parameters + +| name | data type | Default Value | description | +|------------------|-------------|---------------|----------------------------------------------------------------------------------------------| +| `code` | code | Required | Region of code where defined data is accessible | +| `copy` | string list | None | Allocates and copies variable to GPU on entrance, then deallocated and copies to CPU on exit | +| `copyin` | string list | None | Allocates and copies a variable to GPU on entrance and then deallocated on exit | +| `copyinReadOnly` | string list | None | Allocates and copies a readonly variable to GPU and then deallocated on exit | +| `copyout` | string list | None | Allocates a variable on GPU on entrance and then deallocates and copies to CPU on exit | +| `create` | string list | None | Allocates a variable on GPU on entrance and then deallocates on exit | +| `no_create` | string list | None | Use data in CPU memory unless data is already in GPU memory | +| `present` | string list | None | Data that must be present in GPU memory. Increment counter on entrance, decrement on exit | +| `deviceptr` | string list | None | Pointer variables that are already allocated on GPU memory | +| `attach` | string list | None | Attaches device pointer to device targets on entrance, then detach on exit | +| `default` | string | None | Implicit assumptions compiler should make | +| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | + +#### Parameter Restrictions + +| name | Restricted range | +|-----------------|---------------------------------------------------| +| code | Do not assign it manually with key-value pairing | + +#### Additional information + +#### Example + +> ```C +> #:call GPU_DATA(copy='[pixel_arr]', copyin='[starting_pixels, inital_index]',attach='[p_real, p_cmplx, p_fltr_cmplx]') +> {code} +> ... +> #:endcall +> #:call GPU_DATA(create='[pixel_arr]', copyin='[inital_index]') +> {code} +> ... +> #:endcall +> ``` + +
+ + +------------------------------------------------------------------------------------------ From 58994926b106d8462fadf17873a8c249111189c3 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Mon, 23 Jun 2025 21:37:07 -0400 Subject: [PATCH 68/75] more gpu macro docs --- docs/documentation/gpuParallelization.md | 195 ++++++++++++++++++----- 1 file changed, 158 insertions(+), 37 deletions(-) diff --git a/docs/documentation/gpuParallelization.md b/docs/documentation/gpuParallelization.md index 34602c42ce..9f4953bcc1 100644 --- a/docs/documentation/gpuParallelization.md +++ b/docs/documentation/gpuParallelization.md @@ -54,11 +54,11 @@ Uses FYPP eval directive using `$:` > | `firstprivate` | string list | None | Initialized variables that are private to each iteration/thread | > | `reduction` | 2-level string list | None | Variables unique to each iteration and reduced at the end | > | `reductionOp` | string list | None | Operator that each list of reduction will reduce with | -> | `copy` | string list | None | Allocates and copies variable to GPU on entrance, then deallocated and copies to CPU on exit | -> | `copyin` | string list | None | Allocates and copies a variable to GPU on entrance and then deallocated on exit | -> | `copyinReadOnly` | string list | None | Allocates and copies a readonly variable to GPU and then deallocated on exit | -> | `copyout` | string list | None | Allocates a variable on GPU on entrance and then deallocates and copies to CPU on exit | -> | `create` | string list | None | Allocates a variable on GPU on entrance and then deallocates on exit | +> | `copy` | string list | None | Allocates and copies data to GPU on entrance, then deallocated and copies to CPU on exit | +> | `copyin` | string list | None | Allocates and copies data to GPU on entrance and then deallocated on exit | +> | `copyinReadOnly` | string list | None | Allocates and copies readonly data to GPU and then deallocated on exit | +> | `copyout` | string list | None | Allocates data on GPU on entrance and then deallocates and copies to CPU on exit | +> | `create` | string list | None | Allocates data on GPU on entrance and then deallocates on exit | > | `no_create` | string list | None | Use data in CPU memory unless data is already in GPU memory | > | `present` | string list | None | Data that must be present in GPU memory. Increment counter on entrance, decrement on exit | > | `deviceptr` | string list | None | Pointer variables that are already allocated on GPU memory | @@ -146,11 +146,11 @@ Uses FYPP eval directive using `$:` | name | data type | Default Value | description | |------------------|-------------|---------------|----------------------------------------------------------------------------------------------| -| `copy` | string list | None | Allocates and copies variable to GPU on entrance, then deallocated and copies to CPU on exit | -| `copyin` | string list | None | Allocates and copies a variable to GPU on entrance and then deallocated on exit | +| `copy` | string list | None | Allocates and copies data to GPU on entrance, then deallocated and copies to CPU on exit | +| `copyin` | string list | None | Allocates and copies data to GPU on entrance and then deallocated on exit | | `copyinReadOnly` | string list | None | Allocates and copies a readonly variable to GPU and then deallocated on exit | -| `copyout` | string list | None | Allocates a variable on GPU on entrance and then deallocates and copies to CPU on exit | -| `create` | string list | None | Allocates a variable on GPU on entrance and then deallocates on exit | +| `copyout` | string list | None | Allocates data on GPU on entrance and then deallocates and copies to CPU on exit | +| `create` | string list | None | Allocates data on GPU on entrance and then deallocates on exit | | `present` | string list | None | Data that must be present in GPU memory. Increment counter on entrance, decrement on exit | | `deviceptr` | string list | None | Pointer variables that are already allocated on GPU memory | | `link` | string list | None | Declare global link, and only allocate when variable used in data clause. | @@ -217,52 +217,43 @@ Uses FYPP eval directive using `$:`
- GPU_DATA (Allocate module variables on GPU or for implicit data region ) + GPU_HOST_DATA (Make GPU memory address avaliable on CPU) #### Macro Invocation Uses FYPP call directive using `#:call` > ```C > -> #:call GPU_DATA(...) +> #:call GPU_HOST_DATA(...) > {code} -> #:endcall GPU_DATA +> #:endcall GPU_HOST_DATA >``` > #### Parameters -| name | data type | Default Value | description | -|------------------|-------------|---------------|----------------------------------------------------------------------------------------------| -| `code` | code | Required | Region of code where defined data is accessible | -| `copy` | string list | None | Allocates and copies variable to GPU on entrance, then deallocated and copies to CPU on exit | -| `copyin` | string list | None | Allocates and copies a variable to GPU on entrance and then deallocated on exit | -| `copyinReadOnly` | string list | None | Allocates and copies a readonly variable to GPU and then deallocated on exit | -| `copyout` | string list | None | Allocates a variable on GPU on entrance and then deallocates and copies to CPU on exit | -| `create` | string list | None | Allocates a variable on GPU on entrance and then deallocates on exit | -| `no_create` | string list | None | Use data in CPU memory unless data is already in GPU memory | -| `present` | string list | None | Data that must be present in GPU memory. Increment counter on entrance, decrement on exit | -| `deviceptr` | string list | None | Pointer variables that are already allocated on GPU memory | -| `attach` | string list | None | Attaches device pointer to device targets on entrance, then detach on exit | -| `default` | string | None | Implicit assumptions compiler should make | -| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | +| name | data type | Default Value | description | +|----------------|-------------|---------------|------------------------------------------------------------------| +| `code` | code | Required | Region of code where GPU memory addresses is accessible | +| `use_device` | string list | None | Use GPU memory address of variable instead of CPU memory address | +| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | #### Parameter Restrictions -| name | Restricted range | -|-----------------|---------------------------------------------------| -| code | Do not assign it manually with key-value pairing | +| name | Restricted range | +|------|--------------------------------------------------| +| code | Do not assign it manually with key-value pairing | #### Additional information #### Example > ```C -> #:call GPU_DATA(copy='[pixel_arr]', copyin='[starting_pixels, inital_index]',attach='[p_real, p_cmplx, p_fltr_cmplx]') +> #:call GPU_HOST_DATA(use_device='[addr1, addr2]') > {code} > ... -> #:endcall -> #:call GPU_DATA(create='[pixel_arr]', copyin='[inital_index]') +> #:endcall GPU_HOST_DATA +> #:call GPU_HOST_DATA(use_device='[display_arr]') > {code} > ... > #:endcall @@ -271,7 +262,7 @@ Uses FYPP call directive using `#:call`
- GPU_DATA (Allocate module variables on GPU or for implicit data region ) + GPU_DATA (Make data accessible on GPU in specified region) #### Macro Invocation @@ -290,10 +281,10 @@ Uses FYPP call directive using `#:call` |------------------|-------------|---------------|----------------------------------------------------------------------------------------------| | `code` | code | Required | Region of code where defined data is accessible | | `copy` | string list | None | Allocates and copies variable to GPU on entrance, then deallocated and copies to CPU on exit | -| `copyin` | string list | None | Allocates and copies a variable to GPU on entrance and then deallocated on exit | +| `copyin` | string list | None | Allocates and copies data to GPU on entrance and then deallocated on exit | | `copyinReadOnly` | string list | None | Allocates and copies a readonly variable to GPU and then deallocated on exit | -| `copyout` | string list | None | Allocates a variable on GPU on entrance and then deallocates and copies to CPU on exit | -| `create` | string list | None | Allocates a variable on GPU on entrance and then deallocates on exit | +| `copyout` | string list | None | Allocates data on GPU on entrance and then deallocates and copies to CPU on exit | +| `create` | string list | None | Allocates data on GPU on entrance and then deallocates on exit | | `no_create` | string list | None | Use data in CPU memory unless data is already in GPU memory | | `present` | string list | None | Data that must be present in GPU memory. Increment counter on entrance, decrement on exit | | `deviceptr` | string list | None | Pointer variables that are already allocated on GPU memory | @@ -315,7 +306,7 @@ Uses FYPP call directive using `#:call` > #:call GPU_DATA(copy='[pixel_arr]', copyin='[starting_pixels, inital_index]',attach='[p_real, p_cmplx, p_fltr_cmplx]') > {code} > ... -> #:endcall +> #:endcall GPU_DATA > #:call GPU_DATA(create='[pixel_arr]', copyin='[inital_index]') > {code} > ... @@ -324,5 +315,135 @@ Uses FYPP call directive using `#:call`
+
+ GPU_ENTER_DATA (Allocate/move data to GPU until matching GPU_EXIT_DATA or program termination) + +#### Macro Invocation + +Uses FYPP eval directive using `$:` +> `$:GPU_ENTER_DATA(...)` + +#### Parameters + +| name | data type | Default Value | description | +|------------------|-------------|---------------|--------------------------------------------------------------| +| `copyin` | string list | None | Allocates and copies data to GPU on entrance | +| `copyinReadOnly` | string list | None | Allocates and copies a readonly variable to GPU on entrance | +| `create` | string list | None | Allocates data on GPU on entrance | +| `attach` | string list | None | Attaches device pointer to device targets on entrance | +| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | + +#### Additional information + +#### Example + +> ```python +> $:GPU_ENTER_DATA(copyin='[pixels_arr]', copyinReadOnly='[starting_pixels, inital_index]') +> $:GPU_ENTER_DATA(create='[bc_buffers(1:num_dims, -1:1)]', copyin='[inital_index]') +> ``` + +
+ +
+ GPU_EXIT_DATA (Deallocate/move data from GPU created by GPU_ENTER_DATA) + +#### Macro Invocation + +Uses FYPP eval directive using `$:` +> `$:GPU_EXIT_DATA(...)` + +#### Parameters + +| name | data type | Default Value | description | +|------------------|-------------|---------------|--------------------------------------------------------------| +| `copyout` | string list | None | Deallocates and copies data from GPU to CPU on exit | +| `delete` | string list | None | Deallocates data on GPU on exit | +| `detach` | string list | None | Detach device pointer from device targets on exit | +| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | + +#### Additional information + +#### Example + +> ```python +> $:GPU_EXIT_DATA(copyout='[pixels_arr]', delete='[starting_pixels, inital_index]') +> $:GPU_EXIT_DATA(delete='[bc_buffers(1:num_dims, -1:1)]', copyout='[inital_index]') +> ``` + +
+ +
+ GPU_CACHE (Data to be cache in software-managed cache) + +#### Macro Invocation + +Uses FYPP eval directive using `$:` +> `$:GPU_CACHE(...)` + +#### Parameters + +| name | data type | Default Value | description | +|------------------|-------------|---------------|--------------------------------------------------------------| +| `cache` | string list | Required | Data that should to stored in cache | +| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | + +#### Additional information + +#### Example + +> ```python +> $:GPU_CACHE(cache='[pixels_arr]') +> ``` + +
+ +GPU_ATOMIC(atomic, extraAccArgs=None) + +
+ GPU_ATOMIC (Do an atomic operation on the GPU) + +#### Macro Invocation + +Uses FYPP eval directive using `$:` +> `$:GPU_ATOMIC(...)` + +#### Parameters + +| name | data type | Default Value | description | +|------------------|-------------|---------------|--------------------------------------------------------------| +| `atomic` | string | Required | Which atomic operation is performed | +| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | + +#### Parameter Restrictions + +| name | Restricted range | +|-----------------|---------------------------------------------------| +| atomic | 'read', 'write', 'update', or 'capture' | + +#### Additional information + +- read atomic is reading in a value + - Ex: `v=x` +- write atomic is writing a value to a variable + - Ex:`x=square(tmp)` +- update atomic is updating a variable in-place + - Ex:`x= x .and. 1` +- Capture is a pair of read/write/update operations with one dependent on the other + - Ex: ``` + `x=x .and. 1` + `v=x` + ``` + +#### Example + +> ```python +> $:GPU_CACHE(cache='[update]') +> x = square(x) +> $:GPU_CACHE(cache='[capture]') +> x = square(x) +> v = x +> ``` + +
------------------------------------------------------------------------------------------ From df1a27b1c9100a19f555979f80102b8bacaf4244 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Tue, 24 Jun 2025 13:02:01 -0400 Subject: [PATCH 69/75] Finished up gpu docs --- docs/documentation/gpuParallelization.md | 125 ++++++++++++++++------- 1 file changed, 87 insertions(+), 38 deletions(-) diff --git a/docs/documentation/gpuParallelization.md b/docs/documentation/gpuParallelization.md index 9f4953bcc1..2b60d17258 100644 --- a/docs/documentation/gpuParallelization.md +++ b/docs/documentation/gpuParallelization.md @@ -28,10 +28,10 @@ Note: Ordering is not guarennteed or stable, so use key-value pairing when using ### Data Flow - Data on the GPU has a reference counter -- When a variable is referred to being allocated, it means that GPU memory is allocated if it is not already present in GPU memory. If a variable is already present, the reference counter is just incremented. -- When a variable is referred to being dellocated, it means that the reference counter is decremented. If the reference counter is zero, then the data is actually deallocated from GPU memory -- When a variable is referred to being attached, it means that the device pointer attaches to target if it not already attached. If pointer is already attached, then the attachment counter is just incremented -- When a variable is referred to being detached, it means that the attachment counter is decremented. If attachment counter is zero, then actually detached +- When data is referred to being allocated, it means that GPU memory is allocated if it is not already present in GPU memory. If a variable is already present, the reference counter is just incremented. +- When data is referred to being dellocated, it means that the reference counter is decremented. If the reference counter is zero, then the data is actually deallocated from GPU memory +- When data is referred to being attached, it means that the device pointer attaches to target if it not already attached. If pointer is already attached, then the attachment counter is just incremented +- When data is referred to being detached, it means that the attachment counter is decremented. If attachment counter is zero, then actually detached ### Creating new/overwriting existing stubs & proxy configs @@ -45,25 +45,25 @@ Uses FYPP eval directive using `$:` #### Parameters -> | name | data type | Default Value | description | -> |------------------|---------------------|-------------------|----------------------------------------------------------------------------------------------| -> | `collapse` | integer | None | Number of loops to combine into 1 loop | -> | `parallelism` | string list | '\[gang,vector\]' | Parallelism granularity to use for this loop | -> | `default` | string | 'present' | Implicit assumptions compiler should make | -> | `private` | string list | None | Variables that are private to each iteration/thread | -> | `firstprivate` | string list | None | Initialized variables that are private to each iteration/thread | -> | `reduction` | 2-level string list | None | Variables unique to each iteration and reduced at the end | -> | `reductionOp` | string list | None | Operator that each list of reduction will reduce with | -> | `copy` | string list | None | Allocates and copies data to GPU on entrance, then deallocated and copies to CPU on exit | -> | `copyin` | string list | None | Allocates and copies data to GPU on entrance and then deallocated on exit | -> | `copyinReadOnly` | string list | None | Allocates and copies readonly data to GPU and then deallocated on exit | -> | `copyout` | string list | None | Allocates data on GPU on entrance and then deallocates and copies to CPU on exit | -> | `create` | string list | None | Allocates data on GPU on entrance and then deallocates on exit | -> | `no_create` | string list | None | Use data in CPU memory unless data is already in GPU memory | -> | `present` | string list | None | Data that must be present in GPU memory. Increment counter on entrance, decrement on exit | -> | `deviceptr` | string list | None | Pointer variables that are already allocated on GPU memory | -> | `attach` | string list | None | Attaches device pointer to device targets on entrance, then detach on exit | -> | `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | +> | name | data type | Default Value | description | +> |------------------|---------------------|-------------------|-------------------------------------------------------------------------------------------| +> | `collapse` | integer | None | Number of loops to combine into 1 loop | +> | `parallelism` | string list | '\[gang,vector\]' | Parallelism granularity to use for this loop | +> | `default` | string | 'present' | Implicit assumptions compiler should make | +> | `private` | string list | None | Variables that are private to each iteration/thread | +> | `firstprivate` | string list | None | Initialized variables that are private to each iteration/thread | +> | `reduction` | 2-level string list | None | Variables unique to each iteration and reduced at the end | +> | `reductionOp` | string list | None | Operator that each list of reduction will reduce with | +> | `copy` | string list | None | Allocates and copies data to GPU on entrance, then deallocated and copies to CPU on exit | +> | `copyin` | string list | None | Allocates and copies data to GPU on entrance and then deallocated on exit | +> | `copyinReadOnly` | string list | None | Allocates and copies readonly data to GPU and then deallocated on exit | +> | `copyout` | string list | None | Allocates data on GPU on entrance and then deallocates and copies to CPU on exit | +> | `create` | string list | None | Allocates data on GPU on entrance and then deallocates on exit | +> | `no_create` | string list | None | Use data in CPU memory unless data is already in GPU memory | +> | `present` | string list | None | Data that must be present in GPU memory. Increment counter on entrance, decrement on exit | +> | `deviceptr` | string list | None | Pointer variables that are already allocated on GPU memory | +> | `attach` | string list | None | Attaches device pointer to device targets on entrance, then detach on exit | +> | `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | #### Parameter Restrictions @@ -217,7 +217,7 @@ Uses FYPP eval directive using `$:`
- GPU_HOST_DATA (Make GPU memory address avaliable on CPU) + GPU_HOST_DATA (Make GPU memory address available on CPU) #### Macro Invocation @@ -303,11 +303,11 @@ Uses FYPP call directive using `#:call` #### Example > ```C -> #:call GPU_DATA(copy='[pixel_arr]', copyin='[starting_pixels, inital_index]',attach='[p_real, p_cmplx, p_fltr_cmplx]') +> #:call GPU_DATA(copy='[pixel_arr]', copyin='[starting_pixels, initial_index]',attach='[p_real, p_cmplx, p_fltr_cmplx]') > {code} > ... > #:endcall GPU_DATA -> #:call GPU_DATA(create='[pixel_arr]', copyin='[inital_index]') +> #:call GPU_DATA(create='[pixel_arr]', copyin='[initial_index]') > {code} > ... > #:endcall @@ -338,8 +338,8 @@ Uses FYPP eval directive using `$:` #### Example > ```python -> $:GPU_ENTER_DATA(copyin='[pixels_arr]', copyinReadOnly='[starting_pixels, inital_index]') -> $:GPU_ENTER_DATA(create='[bc_buffers(1:num_dims, -1:1)]', copyin='[inital_index]') +> $:GPU_ENTER_DATA(copyin='[pixels_arr]', copyinReadOnly='[starting_pixels, initial_index]') +> $:GPU_ENTER_DATA(create='[bc_buffers(1:num_dims, -1:1)]', copyin='[initial_index]') > ```
@@ -366,8 +366,8 @@ Uses FYPP eval directive using `$:` #### Example > ```python -> $:GPU_EXIT_DATA(copyout='[pixels_arr]', delete='[starting_pixels, inital_index]') -> $:GPU_EXIT_DATA(delete='[bc_buffers(1:num_dims, -1:1)]', copyout='[inital_index]') +> $:GPU_EXIT_DATA(copyout='[pixels_arr]', delete='[starting_pixels, initial_index]') +> $:GPU_EXIT_DATA(delete='[bc_buffers(1:num_dims, -1:1)]', copyout='[initial_index]') > ``` @@ -397,8 +397,6 @@ Uses FYPP eval directive using `$:` -GPU_ATOMIC(atomic, extraAccArgs=None) -
GPU_ATOMIC (Do an atomic operation on the GPU) @@ -409,10 +407,10 @@ Uses FYPP eval directive using `$:` #### Parameters -| name | data type | Default Value | description | -|------------------|-------------|---------------|--------------------------------------------------------------| -| `atomic` | string | Required | Which atomic operation is performed | -| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | +| name | data type | Default Value | description | +|----------------|-----------|---------------|--------------------------------------------------------------| +| `atomic` | string | Required | Which atomic operation is performed | +| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | #### Parameter Restrictions @@ -438,12 +436,63 @@ Uses FYPP eval directive using `$:` #### Example > ```python -> $:GPU_CACHE(cache='[update]') +> $:GPU_ATOMIC(atomic='update') > x = square(x) -> $:GPU_CACHE(cache='[capture]') +> $:GPU_ATOMIC(atomic='capture') > x = square(x) > v = x > ``` +
+ +
+ GPU_UPDATE (Updates data from CPU to GPU or GPU to CPU) + +#### Macro Invocation + +Uses FYPP eval directive using `$:` +> `$:GPU_UPDATE(...)` + +#### Parameters + +| name | data type | Default Value | description | +|----------------|-----------|---------------|--------------------------------------------------------------| +| `host` | string list | None | Updates data from GPU to CPU | +| `device` | string list | None | Updates data from CPU to GPU | +| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | + +#### Additional information + +#### Example + +> ```python +> $:GPU_UPDATE(host='[arr1, arr2]') +> $:GPU_UPDATE(host='[updated_gpu_val]', device='[updated_cpu_val]') +> ``` + +
+ +
+ GPU_WAIT (Makes CPU wait for async GPU activities) + +#### Macro Invocation + +Uses FYPP eval directive using `$:` +> `$:GPU_WAIT(...)` + +#### Parameters + +| name | data type | Default Value | description | +|----------------|-----------|---------------|--------------------------------------------------------------| +| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | + +#### Additional information + +#### Example + +> ```python +> $:GPU_WAIT() +> ``` +
------------------------------------------------------------------------------------------ From 9036bfdf25a21fc323298a98afe8cb7bd4771e62 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Tue, 24 Jun 2025 13:20:31 -0400 Subject: [PATCH 70/75] Reorganized macros into logical groups --- docs/documentation/gpuParallelization.md | 324 ++++++++++++----------- 1 file changed, 167 insertions(+), 157 deletions(-) diff --git a/docs/documentation/gpuParallelization.md b/docs/documentation/gpuParallelization.md index 2b60d17258..75f657c855 100644 --- a/docs/documentation/gpuParallelization.md +++ b/docs/documentation/gpuParallelization.md @@ -33,7 +33,9 @@ Note: Ordering is not guarennteed or stable, so use key-value pairing when using - When data is referred to being attached, it means that the device pointer attaches to target if it not already attached. If pointer is already attached, then the attachment counter is just incremented - When data is referred to being detached, it means that the attachment counter is decremented. If attachment counter is zero, then actually detached -### Creating new/overwriting existing stubs & proxy configs +------------------------------------------------------------------------------------------ + +### Computation Macros
GPU_PARALLEL_LOOP (Execute the following loop on the GPU in parallel) @@ -69,9 +71,9 @@ Uses FYPP eval directive using `$:` | name | Restricted range | |-------------|---------------------------------------------------| -| collapse | Must be greater than 1 | -| parallelism | Valid elements: 'gang', 'worker', 'vector', 'seq' | -| default | 'present' or 'none' | +| `collapse` | Must be greater than 1 | +| `parallelism` | Valid elements: 'gang', 'worker', 'vector', 'seq' | +| `default` | 'present' or 'none' | #### Additional information @@ -90,89 +92,6 @@ Uses FYPP eval directive using `$:`
------------------------------------------------------------------------------------------- - -### Listing existing stubs & proxy configs as YAML string - -
- GPU_ROUTINE (Compile a procedure for the GPU) - -#### Macro Invocation - -Uses FYPP eval directive using `$:` -> `$:GPU_ROUTINE(...)` - - -#### Parameters - -| name | data type | Default Value | description | -|-----------------|-------------|---------------|--------------------------------------------------------------| -| `function_name` | string | None | Name of subroutine/function | -| `parallelism` | string list | None | Parallelism granularity to use for this routine | -| `nohost` | boolean | False | Do not compile procedure code for CPU | -| `cray_inline` | boolean | False | Inline procedure on cray compiler | -| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | - -#### Parameter Restrictions - -| name | Restricted range | -|-------------|---------------------------------------------------| -| parallelism | Valid elements: 'gang', 'worker', 'vector', 'seq' | - -#### Additional information - -- Function name only needs to be given when cray_inline is True -- Future capability is to parse function header for function name -- Routine parallelism is most commonly `'[seq]'` - -#### Example - -> ```python -> $:GPU_ROUTINE(parallelism='[seq]') -> $:GPU_ROUTINE(function_name='s_matmult', parallelism='[seq]', cray_inline=True) -> ``` - -
- -
- GPU_DECLARE (Allocate module variables on GPU or for implicit data region ) - -#### Macro Invocation - -Uses FYPP eval directive using `$:` -> `$:GPU_DECLARE(...)` - -#### Parameters - -| name | data type | Default Value | description | -|------------------|-------------|---------------|----------------------------------------------------------------------------------------------| -| `copy` | string list | None | Allocates and copies data to GPU on entrance, then deallocated and copies to CPU on exit | -| `copyin` | string list | None | Allocates and copies data to GPU on entrance and then deallocated on exit | -| `copyinReadOnly` | string list | None | Allocates and copies a readonly variable to GPU and then deallocated on exit | -| `copyout` | string list | None | Allocates data on GPU on entrance and then deallocates and copies to CPU on exit | -| `create` | string list | None | Allocates data on GPU on entrance and then deallocates on exit | -| `present` | string list | None | Data that must be present in GPU memory. Increment counter on entrance, decrement on exit | -| `deviceptr` | string list | None | Pointer variables that are already allocated on GPU memory | -| `link` | string list | None | Declare global link, and only allocate when variable used in data clause. | -| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | - -#### Additional information - -- An implicit data region is created at the start of each procedure -and ends after the last executable statement in that procedure. -- Use only create, copyin, device_resident or link clauses for module variables -- GPU_DECLARE exit is the end of the implicit data region -- Link is useful for large global static data objects - -#### Example - -> ```python -> $:GPU_DECLARE(create='[x_cb,y_cb,z_cb,x_cc,y_cc,z_cc,dx,dy,dz,dt,m,n,p]') -> $:GPU_DECLARE(create='[x_cb,y_cb,z_cb]', copyin='[x_cc,y_cc,z_cc]', link='[dx,dy,dz,dt,m,n,p]') -> ``` - -
-
GPU_LOOP (Execute loop on GPU) @@ -197,9 +116,9 @@ Uses FYPP eval directive using `$:` | name | Restricted range | |-----------------|---------------------------------------------------| -| collapse | Must be greater than 1 | -| parallelism | Valid elements: 'gang', 'worker', 'vector', 'seq' | -| data_dependency | 'auto' or 'independent' | +| `collapse` | Must be greater than 1 | +| `parallelism` | Valid elements: 'gang', 'worker', 'vector', 'seq' | +| `data_dependency` | 'auto' or 'independent' | #### Additional information @@ -216,50 +135,9 @@ Uses FYPP eval directive using `$:`
-
- GPU_HOST_DATA (Make GPU memory address available on CPU) - -#### Macro Invocation - -Uses FYPP call directive using `#:call` -> ```C -> -> #:call GPU_HOST_DATA(...) -> {code} -> #:endcall GPU_HOST_DATA ->``` -> - -#### Parameters - -| name | data type | Default Value | description | -|----------------|-------------|---------------|------------------------------------------------------------------| -| `code` | code | Required | Region of code where GPU memory addresses is accessible | -| `use_device` | string list | None | Use GPU memory address of variable instead of CPU memory address | -| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | - -#### Parameter Restrictions - -| name | Restricted range | -|------|--------------------------------------------------| -| code | Do not assign it manually with key-value pairing | - -#### Additional information - -#### Example - -> ```C -> #:call GPU_HOST_DATA(use_device='[addr1, addr2]') -> {code} -> ... -> #:endcall GPU_HOST_DATA -> #:call GPU_HOST_DATA(use_device='[display_arr]') -> {code} -> ... -> #:endcall -> ``` +------------------------------------------------------------------------------------------ -
+### Data Control Macros
GPU_DATA (Make data accessible on GPU in specified region) @@ -296,7 +174,7 @@ Uses FYPP call directive using `#:call` | name | Restricted range | |-----------------|---------------------------------------------------| -| code | Do not assign it manually with key-value pairing | +| `code` | Do not assign it manually with key-value pairing | #### Additional information @@ -373,26 +251,140 @@ Uses FYPP eval directive using `$:`
- GPU_CACHE (Data to be cache in software-managed cache) + GPU_DECLARE (Allocate module variables on GPU or for implicit data region ) #### Macro Invocation Uses FYPP eval directive using `$:` -> `$:GPU_CACHE(...)` +> `$:GPU_DECLARE(...)` #### Parameters -| name | data type | Default Value | description | -|------------------|-------------|---------------|--------------------------------------------------------------| -| `cache` | string list | Required | Data that should to stored in cache | -| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | +| name | data type | Default Value | description | +|------------------|-------------|---------------|----------------------------------------------------------------------------------------------| +| `copy` | string list | None | Allocates and copies data to GPU on entrance, then deallocated and copies to CPU on exit | +| `copyin` | string list | None | Allocates and copies data to GPU on entrance and then deallocated on exit | +| `copyinReadOnly` | string list | None | Allocates and copies a readonly variable to GPU and then deallocated on exit | +| `copyout` | string list | None | Allocates data on GPU on entrance and then deallocates and copies to CPU on exit | +| `create` | string list | None | Allocates data on GPU on entrance and then deallocates on exit | +| `present` | string list | None | Data that must be present in GPU memory. Increment counter on entrance, decrement on exit | +| `deviceptr` | string list | None | Pointer variables that are already allocated on GPU memory | +| `link` | string list | None | Declare global link, and only allocate when variable used in data clause. | +| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | + +#### Additional information + +- An implicit data region is created at the start of each procedure +and ends after the last executable statement in that procedure. +- Use only create, copyin, device_resident or link clauses for module variables +- GPU_DECLARE exit is the end of the implicit data region +- Link is useful for large global static data objects + +#### Example + +> ```python +> $:GPU_DECLARE(create='[x_cb,y_cb,z_cb,x_cc,y_cc,z_cc,dx,dy,dz,dt,m,n,p]') +> $:GPU_DECLARE(create='[x_cb,y_cb,z_cb]', copyin='[x_cc,y_cc,z_cc]', link='[dx,dy,dz,dt,m,n,p]') +> ``` + +
+ +
+ GPU_UPDATE (Updates data from CPU to GPU or GPU to CPU) + +#### Macro Invocation + +Uses FYPP eval directive using `$:` +> `$:GPU_UPDATE(...)` + +#### Parameters + +| name | data type | Default Value | description | +|----------------|-----------|---------------|--------------------------------------------------------------| +| `host` | string list | None | Updates data from GPU to CPU | +| `device` | string list | None | Updates data from CPU to GPU | +| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | #### Additional information #### Example > ```python -> $:GPU_CACHE(cache='[pixels_arr]') +> $:GPU_UPDATE(host='[arr1, arr2]') +> $:GPU_UPDATE(host='[updated_gpu_val]', device='[updated_cpu_val]') +> ``` + +
+ +
+ GPU_HOST_DATA (Make GPU memory address available on CPU) + +#### Macro Invocation + +Uses FYPP call directive using `#:call` +> ```C +> +> #:call GPU_HOST_DATA(...) +> {code} +> #:endcall GPU_HOST_DATA +>``` +> + +#### Parameters + +| name | data type | Default Value | description | +|----------------|-------------|---------------|------------------------------------------------------------------| +| `code` | code | Required | Region of code where GPU memory addresses is accessible | +| `use_device` | string list | None | Use GPU memory address of variable instead of CPU memory address | +| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | + +#### Parameter Restrictions + +| name | Restricted range | +|------|--------------------------------------------------| +| `code` | Do not assign it manually with key-value pairing | + +#### Additional information + +#### Example + +> ```C +> #:call GPU_HOST_DATA(use_device='[addr1, addr2]') +> {code} +> ... +> #:endcall GPU_HOST_DATA +> #:call GPU_HOST_DATA(use_device='[display_arr]') +> {code} +> ... +> #:endcall +> ``` + +
+ +------------------------------------------------------------------------------------------ + +### Synchronization Macros + +
+ GPU_WAIT (Makes CPU wait for async GPU activities) + +#### Macro Invocation + +Uses FYPP eval directive using `$:` +> `$:GPU_WAIT(...)` + +#### Parameters + +| name | data type | Default Value | description | +|----------------|-----------|---------------|--------------------------------------------------------------| +| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | + +#### Additional information + +#### Example + +> ```python +> $:GPU_WAIT() > ```
@@ -416,7 +408,7 @@ Uses FYPP eval directive using `$:` | name | Restricted range | |-----------------|---------------------------------------------------| -| atomic | 'read', 'write', 'update', or 'capture' | +| `atomic` | 'read', 'write', 'update', or 'capture' | #### Additional information @@ -445,54 +437,72 @@ Uses FYPP eval directive using `$:` +------------------------------------------------------------------------------------------ + +### Miscellaneous Macros +
- GPU_UPDATE (Updates data from CPU to GPU or GPU to CPU) + GPU_ROUTINE (Compile a procedure for the GPU) #### Macro Invocation Uses FYPP eval directive using `$:` -> `$:GPU_UPDATE(...)` +> `$:GPU_ROUTINE(...)` #### Parameters -| name | data type | Default Value | description | -|----------------|-----------|---------------|--------------------------------------------------------------| -| `host` | string list | None | Updates data from GPU to CPU | -| `device` | string list | None | Updates data from CPU to GPU | -| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | +| name | data type | Default Value | description | +|-----------------|-------------|---------------|--------------------------------------------------------------| +| `function_name` | string | None | Name of subroutine/function | +| `parallelism` | string list | None | Parallelism granularity to use for this routine | +| `nohost` | boolean | False | Do not compile procedure code for CPU | +| `cray_inline` | boolean | False | Inline procedure on cray compiler | +| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | + +#### Parameter Restrictions + +| name | Restricted range | +|-------------|---------------------------------------------------| +| `parallelism` | Valid elements: 'gang', 'worker', 'vector', 'seq' | #### Additional information +- Function name only needs to be given when cray_inline is True +- Future capability is to parse function header for function name +- Routine parallelism is most commonly `'[seq]'` + #### Example > ```python -> $:GPU_UPDATE(host='[arr1, arr2]') -> $:GPU_UPDATE(host='[updated_gpu_val]', device='[updated_cpu_val]') +> $:GPU_ROUTINE(parallelism='[seq]') +> $:GPU_ROUTINE(function_name='s_matmult', parallelism='[seq]', cray_inline=True) > ```
- GPU_WAIT (Makes CPU wait for async GPU activities) + GPU_CACHE (Data to be cache in software-managed cache) #### Macro Invocation Uses FYPP eval directive using `$:` -> `$:GPU_WAIT(...)` +> `$:GPU_CACHE(...)` #### Parameters -| name | data type | Default Value | description | -|----------------|-----------|---------------|--------------------------------------------------------------| -| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | +| name | data type | Default Value | description | +|------------------|-------------|---------------|--------------------------------------------------------------| +| `cache` | string list | Required | Data that should to stored in cache | +| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | #### Additional information #### Example > ```python -> $:GPU_WAIT() +> $:GPU_CACHE(cache='[pixels_arr]') > ```
+ ------------------------------------------------------------------------------------------ From 7a8cdc2475d539c44cab8ea2021b011f129ca26f Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Tue, 24 Jun 2025 13:37:16 -0400 Subject: [PATCH 71/75] Cleaned up docs --- docs/documentation/gpuParallelization.md | 234 +++++++++++------------ 1 file changed, 110 insertions(+), 124 deletions(-) diff --git a/docs/documentation/gpuParallelization.md b/docs/documentation/gpuParallelization.md index 75f657c855..9c4a783a72 100644 --- a/docs/documentation/gpuParallelization.md +++ b/docs/documentation/gpuParallelization.md @@ -69,11 +69,11 @@ Uses FYPP eval directive using `$:` #### Parameter Restrictions -| name | Restricted range | -|-------------|---------------------------------------------------| -| `collapse` | Must be greater than 1 | -| `parallelism` | Valid elements: 'gang', 'worker', 'vector', 'seq' | -| `default` | 'present' or 'none' | +> | name | Restricted range | +> |---------------|---------------------------------------------------| +> | `collapse` | Must be greater than 1 | +> | `parallelism` | Valid elements: 'gang', 'worker', 'vector', 'seq' | +> | `default` | 'present' or 'none' | #### Additional information @@ -102,23 +102,23 @@ Uses FYPP eval directive using `$:` #### Parameters -| name | data type | Default Value | description | -|-------------------|---------------------|---------------|--------------------------------------------------------------------------------------------------| -| `collapse` | integer | None | Number of loops to combine into 1 loop | -| `parallelism` | string list | None | Parallelism granularity to use for this loop | -| `data_dependency` | string | None | 'independent'-> assert loop iterations are independent, 'auto->let compiler analyze dependencies | -| `private` | string list | None | Variables that are private to each iteration/thread | -| `reduction` | 2-level string list | None | Variables unique to each iteration and reduced at the end | -| `reductionOp` | string list | None | Operator that each list of reduction will reduce with | -| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | +> | name | data type | Default Value | description | +> |-------------------|---------------------|---------------|--------------------------------------------------------------------------------------------------| +> | `collapse` | integer | None | Number of loops to combine into 1 loop | +> | `parallelism` | string list | None | Parallelism granularity to use for this loop | +> | `data_dependency` | string | None | 'independent'-> assert loop iterations are independent, 'auto->let compiler analyze dependencies | +> | `private` | string list | None | Variables that are private to each iteration/thread | +> | `reduction` | 2-level string list | None | Variables unique to each iteration and reduced at the end | +> | `reductionOp` | string list | None | Operator that each list of reduction will reduce with | +> | `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | #### Parameter Restrictions -| name | Restricted range | -|-----------------|---------------------------------------------------| -| `collapse` | Must be greater than 1 | -| `parallelism` | Valid elements: 'gang', 'worker', 'vector', 'seq' | -| `data_dependency` | 'auto' or 'independent' | +> | name | Restricted range | +> |-------------------|---------------------------------------------------| +> | `collapse` | Must be greater than 1 | +> | `parallelism` | Valid elements: 'gang', 'worker', 'vector', 'seq' | +> | `data_dependency` | 'auto' or 'independent' | #### Additional information @@ -145,38 +145,35 @@ Uses FYPP eval directive using `$:` #### Macro Invocation Uses FYPP call directive using `#:call` + > ```C -> > #:call GPU_DATA(...) > {code} > #:endcall GPU_DATA >``` -> #### Parameters -| name | data type | Default Value | description | -|------------------|-------------|---------------|----------------------------------------------------------------------------------------------| -| `code` | code | Required | Region of code where defined data is accessible | -| `copy` | string list | None | Allocates and copies variable to GPU on entrance, then deallocated and copies to CPU on exit | -| `copyin` | string list | None | Allocates and copies data to GPU on entrance and then deallocated on exit | -| `copyinReadOnly` | string list | None | Allocates and copies a readonly variable to GPU and then deallocated on exit | -| `copyout` | string list | None | Allocates data on GPU on entrance and then deallocates and copies to CPU on exit | -| `create` | string list | None | Allocates data on GPU on entrance and then deallocates on exit | -| `no_create` | string list | None | Use data in CPU memory unless data is already in GPU memory | -| `present` | string list | None | Data that must be present in GPU memory. Increment counter on entrance, decrement on exit | -| `deviceptr` | string list | None | Pointer variables that are already allocated on GPU memory | -| `attach` | string list | None | Attaches device pointer to device targets on entrance, then detach on exit | -| `default` | string | None | Implicit assumptions compiler should make | -| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | +> | name | data type | Default Value | description | +> |------------------|-------------|---------------|----------------------------------------------------------------------------------------------| +> | `code` | code | Required | Region of code where defined data is accessible | +> | `copy` | string list | None | Allocates and copies variable to GPU on entrance, then deallocated and copies to CPU on exit | +> | `copyin` | string list | None | Allocates and copies data to GPU on entrance and then deallocated on exit | +> | `copyinReadOnly` | string list | None | Allocates and copies a readonly variable to GPU and then deallocated on exit | +> | `copyout` | string list | None | Allocates data on GPU on entrance and then deallocates and copies to CPU on exit | +> | `create` | string list | None | Allocates data on GPU on entrance and then deallocates on exit | +> | `no_create` | string list | None | Use data in CPU memory unless data is already in GPU memory | +> | `present` | string list | None | Data that must be present in GPU memory. Increment counter on entrance, decrement on exit | +> | `deviceptr` | string list | None | Pointer variables that are already allocated on GPU memory | +> | `attach` | string list | None | Attaches device pointer to device targets on entrance, then detach on exit | +> | `default` | string | None | Implicit assumptions compiler should make | +> | `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | #### Parameter Restrictions -| name | Restricted range | -|-----------------|---------------------------------------------------| -| `code` | Do not assign it manually with key-value pairing | - -#### Additional information +> | name | Restricted range | +> |--------|--------------------------------------------------| +> | `code` | Do not assign it manually with key-value pairing | #### Example @@ -188,7 +185,7 @@ Uses FYPP call directive using `#:call` > #:call GPU_DATA(create='[pixel_arr]', copyin='[initial_index]') > {code} > ... -> #:endcall +> #:endcall > ``` @@ -203,15 +200,13 @@ Uses FYPP eval directive using `$:` #### Parameters -| name | data type | Default Value | description | -|------------------|-------------|---------------|--------------------------------------------------------------| -| `copyin` | string list | None | Allocates and copies data to GPU on entrance | -| `copyinReadOnly` | string list | None | Allocates and copies a readonly variable to GPU on entrance | -| `create` | string list | None | Allocates data on GPU on entrance | -| `attach` | string list | None | Attaches device pointer to device targets on entrance | -| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | - -#### Additional information +> | name | data type | Default Value | description | +> |------------------|-------------|---------------|--------------------------------------------------------------| +> | `copyin` | string list | None | Allocates and copies data to GPU on entrance | +> | `copyinReadOnly` | string list | None | Allocates and copies a readonly variable to GPU on entrance | +> | `create` | string list | None | Allocates data on GPU on entrance | +> | `attach` | string list | None | Attaches device pointer to device targets on entrance | +> | `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | #### Example @@ -232,14 +227,12 @@ Uses FYPP eval directive using `$:` #### Parameters -| name | data type | Default Value | description | -|------------------|-------------|---------------|--------------------------------------------------------------| -| `copyout` | string list | None | Deallocates and copies data from GPU to CPU on exit | -| `delete` | string list | None | Deallocates data on GPU on exit | -| `detach` | string list | None | Detach device pointer from device targets on exit | -| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | - -#### Additional information +> | name | data type | Default Value | description | +> |----------------|-------------|---------------|--------------------------------------------------------------| +> | `copyout` | string list | None | Deallocates and copies data from GPU to CPU on exit | +> | `delete` | string list | None | Deallocates data on GPU on exit | +> | `detach` | string list | None | Detach device pointer from device targets on exit | +> | `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | #### Example @@ -260,22 +253,21 @@ Uses FYPP eval directive using `$:` #### Parameters -| name | data type | Default Value | description | -|------------------|-------------|---------------|----------------------------------------------------------------------------------------------| -| `copy` | string list | None | Allocates and copies data to GPU on entrance, then deallocated and copies to CPU on exit | -| `copyin` | string list | None | Allocates and copies data to GPU on entrance and then deallocated on exit | -| `copyinReadOnly` | string list | None | Allocates and copies a readonly variable to GPU and then deallocated on exit | -| `copyout` | string list | None | Allocates data on GPU on entrance and then deallocates and copies to CPU on exit | -| `create` | string list | None | Allocates data on GPU on entrance and then deallocates on exit | -| `present` | string list | None | Data that must be present in GPU memory. Increment counter on entrance, decrement on exit | -| `deviceptr` | string list | None | Pointer variables that are already allocated on GPU memory | -| `link` | string list | None | Declare global link, and only allocate when variable used in data clause. | -| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | +> | name | data type | Default Value | description | +> |------------------|-------------|---------------|-------------------------------------------------------------------------------------------| +> | `copy` | string list | None | Allocates and copies data to GPU on entrance, then deallocated and copies to CPU on exit | +> | `copyin` | string list | None | Allocates and copies data to GPU on entrance and then deallocated on exit | +> | `copyinReadOnly` | string list | None | Allocates and copies a readonly variable to GPU and then deallocated on exit | +> | `copyout` | string list | None | Allocates data on GPU on entrance and then deallocates and copies to CPU on exit | +> | `create` | string list | None | Allocates data on GPU on entrance and then deallocates on exit | +> | `present` | string list | None | Data that must be present in GPU memory. Increment counter on entrance, decrement on exit | +> | `deviceptr` | string list | None | Pointer variables that are already allocated on GPU memory | +> | `link` | string list | None | Declare global link, and only allocate when variable used in data clause. | +> | `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | #### Additional information -- An implicit data region is created at the start of each procedure -and ends after the last executable statement in that procedure. +- An implicit data region is created at the start of each procedure and ends after the last executable statement in that procedure. - Use only create, copyin, device_resident or link clauses for module variables - GPU_DECLARE exit is the end of the implicit data region - Link is useful for large global static data objects @@ -299,13 +291,11 @@ Uses FYPP eval directive using `$:` #### Parameters -| name | data type | Default Value | description | -|----------------|-----------|---------------|--------------------------------------------------------------| -| `host` | string list | None | Updates data from GPU to CPU | -| `device` | string list | None | Updates data from CPU to GPU | -| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | - -#### Additional information +> | name | data type | Default Value | description | +> |----------------|-------------|---------------|--------------------------------------------------------------| +> | `host` | string list | None | Updates data from GPU to CPU | +> | `device` | string list | None | Updates data from CPU to GPU | +> | `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | #### Example @@ -322,27 +312,26 @@ Uses FYPP eval directive using `$:` #### Macro Invocation Uses FYPP call directive using `#:call` + > ```C -> > #:call GPU_HOST_DATA(...) > {code} > #:endcall GPU_HOST_DATA >``` -> #### Parameters -| name | data type | Default Value | description | -|----------------|-------------|---------------|------------------------------------------------------------------| -| `code` | code | Required | Region of code where GPU memory addresses is accessible | -| `use_device` | string list | None | Use GPU memory address of variable instead of CPU memory address | -| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | +> | name | data type | Default Value | description | +> |----------------|-------------|---------------|------------------------------------------------------------------| +> | `code` | code | Required | Region of code where GPU memory addresses is accessible | +> | `use_device` | string list | None | Use GPU memory address of variable instead of CPU memory address | +> | `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | #### Parameter Restrictions -| name | Restricted range | -|------|--------------------------------------------------| -| `code` | Do not assign it manually with key-value pairing | +> | name | Restricted range | +> |--------|--------------------------------------------------| +> | `code` | Do not assign it manually with key-value pairing | #### Additional information @@ -375,11 +364,9 @@ Uses FYPP eval directive using `$:` #### Parameters -| name | data type | Default Value | description | -|----------------|-----------|---------------|--------------------------------------------------------------| -| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | - -#### Additional information +> | name | data type | Default Value | description | +> |----------------|-----------|---------------|--------------------------------------------------------------| +> | `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | #### Example @@ -399,31 +386,32 @@ Uses FYPP eval directive using `$:` #### Parameters -| name | data type | Default Value | description | -|----------------|-----------|---------------|--------------------------------------------------------------| -| `atomic` | string | Required | Which atomic operation is performed | -| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | +> | name | data type | Default Value | description | +> |----------------|-----------|---------------|--------------------------------------------------------------| +> | `atomic` | string | Required | Which atomic operation is performed | +> | `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | #### Parameter Restrictions -| name | Restricted range | -|-----------------|---------------------------------------------------| -| `atomic` | 'read', 'write', 'update', or 'capture' | +> | name | Restricted range | +> |----------|-----------------------------------------| +> | `atomic` | 'read', 'write', 'update', or 'capture' | #### Additional information - read atomic is reading in a value - - Ex: `v=x` + - Ex: `v=x` - write atomic is writing a value to a variable - - Ex:`x=square(tmp)` + - Ex:`x=square(tmp)` - update atomic is updating a variable in-place - - Ex:`x= x .and. 1` + - Ex:`x= x .and. 1` - Capture is a pair of read/write/update operations with one dependent on the other - - Ex: ``` - `x=x .and. 1` - `v=x` - ``` - + - Ex: + + ```Fortran + x=x .and. 1 + v=x + ``` #### Example @@ -451,19 +439,19 @@ Uses FYPP eval directive using `$:` #### Parameters -| name | data type | Default Value | description | -|-----------------|-------------|---------------|--------------------------------------------------------------| -| `function_name` | string | None | Name of subroutine/function | -| `parallelism` | string list | None | Parallelism granularity to use for this routine | -| `nohost` | boolean | False | Do not compile procedure code for CPU | -| `cray_inline` | boolean | False | Inline procedure on cray compiler | -| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | +> | name | data type | Default Value | description | +> |-----------------|-------------|---------------|--------------------------------------------------------------| +> | `function_name` | string | None | Name of subroutine/function | +> | `parallelism` | string list | None | Parallelism granularity to use for this routine | +> | `nohost` | boolean | False | Do not compile procedure code for CPU | +> | `cray_inline` | boolean | False | Inline procedure on cray compiler | +> | `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | #### Parameter Restrictions -| name | Restricted range | -|-------------|---------------------------------------------------| -| `parallelism` | Valid elements: 'gang', 'worker', 'vector', 'seq' | +> | name | Restricted range | +> |---------------|---------------------------------------------------| +> | `parallelism` | Valid elements: 'gang', 'worker', 'vector', 'seq' | #### Additional information @@ -490,12 +478,10 @@ Uses FYPP eval directive using `$:` #### Parameters -| name | data type | Default Value | description | -|------------------|-------------|---------------|--------------------------------------------------------------| -| `cache` | string list | Required | Data that should to stored in cache | -| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | - -#### Additional information +> | name | data type | Default Value | description | +> |------------------|-------------|---------------|--------------------------------------------------------------| +> | `cache` | string list | Required | Data that should to stored in cache | +> | `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | #### Example From 6bc96a8a6395845de49d80d746d807501152a3a4 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Tue, 24 Jun 2025 15:45:08 -0400 Subject: [PATCH 72/75] Fixes render issues with doxygen --- docs/documentation/gpuParallelization.md | 566 ++++++++++++----------- 1 file changed, 288 insertions(+), 278 deletions(-) diff --git a/docs/documentation/gpuParallelization.md b/docs/documentation/gpuParallelization.md index 9c4a783a72..bb0fbc667d 100644 --- a/docs/documentation/gpuParallelization.md +++ b/docs/documentation/gpuParallelization.md @@ -1,17 +1,19 @@ -# Parallelization via GPUs +# GPU Parallelization MFC compiles GPU code via OpenACC and in the future OpenMP as well. -In order to swap between OpenACC and OpenMP, ACC and MP directives are implemented via FYPP macros. +In order to swap between OpenACC and OpenMP, custom GPU macros are used that translate to equivalent OpenACC and OpenMP directives. +FYPP is used to process the GPU macros. [OpenACC Quick start Guide](https://openacc-best-practices-guide.readthedocs.io/en/latest/01-Introduction.html) + [OpenACC API Documentation](https://www.openacc.org/sites/default/files/inline-files/API%20Guide%202.7.pdf) ------------------------------------------------------------------------------------------ ## Macro API Documentation -Note: Ordering is not guarennteed or stable, so use key-value pairing when using macros +Note: Ordering is not guaranteed or stable, so use key-value pairing when using macros ### Data Type Meanings @@ -20,10 +22,10 @@ Note: Ordering is not guarennteed or stable, so use key-value pairing when using - Boolean is a pythonic boolean - Valid options: `True` or `False` - String List is given as a comma separated list surrounding by brackets and inside quotations - - Ex: `'[hello, world, Fortran]'` + - Ex: ``'[hello, world, Fortran]'`` - 2-level string list is given as a comma separated list of string lists surrounding by brackets and inside quotations - - Ex: `'[[hello, world], [Fortran, MFC]]'` or `[[hello]]` + - Ex: ``'[[hello, world], [Fortran, MFC]]'`` or ``'[[hello]]'`` ### Data Flow @@ -38,100 +40,102 @@ Note: Ordering is not guarennteed or stable, so use key-value pairing when using ### Computation Macros
- GPU_PARALLEL_LOOP (Execute the following loop on the GPU in parallel) + GPU_PARALLEL_LOOP -- (Execute the following loop on the GPU in parallel) -#### Macro Invocation +**Macro Invocation** Uses FYPP eval directive using `$:` -> `$:GPU_PARALLEL_LOOP(...)` - -#### Parameters - -> | name | data type | Default Value | description | -> |------------------|---------------------|-------------------|-------------------------------------------------------------------------------------------| -> | `collapse` | integer | None | Number of loops to combine into 1 loop | -> | `parallelism` | string list | '\[gang,vector\]' | Parallelism granularity to use for this loop | -> | `default` | string | 'present' | Implicit assumptions compiler should make | -> | `private` | string list | None | Variables that are private to each iteration/thread | -> | `firstprivate` | string list | None | Initialized variables that are private to each iteration/thread | -> | `reduction` | 2-level string list | None | Variables unique to each iteration and reduced at the end | -> | `reductionOp` | string list | None | Operator that each list of reduction will reduce with | -> | `copy` | string list | None | Allocates and copies data to GPU on entrance, then deallocated and copies to CPU on exit | -> | `copyin` | string list | None | Allocates and copies data to GPU on entrance and then deallocated on exit | -> | `copyinReadOnly` | string list | None | Allocates and copies readonly data to GPU and then deallocated on exit | -> | `copyout` | string list | None | Allocates data on GPU on entrance and then deallocates and copies to CPU on exit | -> | `create` | string list | None | Allocates data on GPU on entrance and then deallocates on exit | -> | `no_create` | string list | None | Use data in CPU memory unless data is already in GPU memory | -> | `present` | string list | None | Data that must be present in GPU memory. Increment counter on entrance, decrement on exit | -> | `deviceptr` | string list | None | Pointer variables that are already allocated on GPU memory | -> | `attach` | string list | None | Attaches device pointer to device targets on entrance, then detach on exit | -> | `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | - -#### Parameter Restrictions - -> | name | Restricted range | -> |---------------|---------------------------------------------------| -> | `collapse` | Must be greater than 1 | -> | `parallelism` | Valid elements: 'gang', 'worker', 'vector', 'seq' | -> | `default` | 'present' or 'none' | - -#### Additional information + +`$:GPU_PARALLEL_LOOP(...)` + +**Parameters** + +| name | data type | Default Value | description | +|------------------|---------------------|-------------------|-------------------------------------------------------------------------------------------| +| `collapse` | integer | None | Number of loops to combine into 1 loop | +| `parallelism` | string list | '\[gang,vector\]' | Parallelism granularity to use for this loop | +| `default` | string | 'present' | Implicit assumptions compiler should make | +| `private` | string list | None | Variables that are private to each iteration/thread | +| `firstprivate` | string list | None | Initialized variables that are private to each iteration/thread | +| `reduction` | 2-level string list | None | Variables unique to each iteration and reduced at the end | +| `reductionOp` | string list | None | Operator that each list of reduction will reduce with | +| `copy` | string list | None | Allocates and copies data to GPU on entrance, then deallocated and copies to CPU on exit | +| `copyin` | string list | None | Allocates and copies data to GPU on entrance and then deallocated on exit | +| `copyinReadOnly` | string list | None | Allocates and copies readonly data to GPU and then deallocated on exit | +| `copyout` | string list | None | Allocates data on GPU on entrance and then deallocates and copies to CPU on exit | +| `create` | string list | None | Allocates data on GPU on entrance and then deallocates on exit | +| `no_create` | string list | None | Use data in CPU memory unless data is already in GPU memory | +| `present` | string list | None | Data that must be present in GPU memory. Increment counter on entrance, decrement on exit | +| `deviceptr` | string list | None | Pointer variables that are already allocated on GPU memory | +| `attach` | string list | None | Attaches device pointer to device targets on entrance, then detach on exit | +| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | + +**Parameter Restrictions** + +| name | Restricted range | +|---------------|---------------------------------------------------| +| `collapse` | Must be greater than 1 | +| `parallelism` | Valid elements: 'gang', 'worker', 'vector', 'seq' | +| `default` | 'present' or 'none' | + +**Additional information** - default present means that the any non-scalar data in assumed to be present on the GPU - default none means that the compiler should not implicitly determine the data attributes for any variable - reduction and reductionOp must match in length -- With `reduction='[[sum1, sum2], [largest]]'` and `reductionOp='[+, max]'`, `sum1` and `sum2` will be the sum of sum1/sum2 in each loop iteration, and `largest` will the maximum value of `largest` all the loop iterations +- With ``reduction='[[sum1, sum2], [largest]]'`` and ``reductionOp='[+, max]'``, `sum1` and `sum2` will be the sum of sum1/sum2 in each loop iteration, and `largest` will the maximum value of `largest` all the loop iterations - A reduction implies a copy, so it does not need to be added for both -#### Example +**Example** -> ```python -> $:GPU_PARALLEL_LOOP(collapse=3, private='[tmp, r]', reduction='[[vol, avg], [max_val]]', reductionOp='[+, MAX]') -> $:GPU_PARALLEL_LOOP(collapse=2, private='[sum_holder]', copyin='[starting_sum]', copyout='[eigenval,C]') -> ``` +```python + $:GPU_PARALLEL_LOOP(collapse=3, private='[tmp, r]', reduction='[[vol, avg], [max_val]]', reductionOp='[+, MAX]') + $:GPU_PARALLEL_LOOP(collapse=2, private='[sum_holder]', copyin='[starting_sum]', copyout='[eigenval,C]') +```
- GPU_LOOP (Execute loop on GPU) + GPU_LOOP -- (Execute loop on GPU) -#### Macro Invocation +**Macro Invocation** Uses FYPP eval directive using `$:` -> `$:GPU_LOOP(...)` -#### Parameters +`$:GPU_LOOP(...)` + +**Parameters** -> | name | data type | Default Value | description | -> |-------------------|---------------------|---------------|--------------------------------------------------------------------------------------------------| -> | `collapse` | integer | None | Number of loops to combine into 1 loop | -> | `parallelism` | string list | None | Parallelism granularity to use for this loop | -> | `data_dependency` | string | None | 'independent'-> assert loop iterations are independent, 'auto->let compiler analyze dependencies | -> | `private` | string list | None | Variables that are private to each iteration/thread | -> | `reduction` | 2-level string list | None | Variables unique to each iteration and reduced at the end | -> | `reductionOp` | string list | None | Operator that each list of reduction will reduce with | -> | `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | +| name | data type | Default Value | description | +|-------------------|---------------------|---------------|--------------------------------------------------------------------------------------------------| +| `collapse` | integer | None | Number of loops to combine into 1 loop | +| `parallelism` | string list | None | Parallelism granularity to use for this loop | +| `data_dependency` | string | None | 'independent'-> assert loop iterations are independent, 'auto->let compiler analyze dependencies | +| `private` | string list | None | Variables that are private to each iteration/thread | +| `reduction` | 2-level string list | None | Variables unique to each iteration and reduced at the end | +| `reductionOp` | string list | None | Operator that each list of reduction will reduce with | +| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | -#### Parameter Restrictions +**Parameter Restrictions** -> | name | Restricted range | -> |-------------------|---------------------------------------------------| -> | `collapse` | Must be greater than 1 | -> | `parallelism` | Valid elements: 'gang', 'worker', 'vector', 'seq' | -> | `data_dependency` | 'auto' or 'independent' | +| name | Restricted range | +|-------------------|---------------------------------------------------| +| `collapse` | Must be greater than 1 | +| `parallelism` | Valid elements: 'gang', 'worker', 'vector', 'seq' | +| `data_dependency` | 'auto' or 'independent' | -#### Additional information +**Additional information** -- Loop parallelism is most commonly `'[seq]'` +- Loop parallelism is most commonly ``'[seq]'`` - reduction and reductionOp must match in length -- With `reduction='[[sum1, sum2], [largest]]'` and `reductionOp='[+, max]'`, `sum1` and `sum2` will be the sum of sum1/sum2 in each loop iteration, and `largest` will the maximum value of `largest` all the loop iterations +- With ``reduction='[[sum1, sum2], [largest]]'`` and ``reductionOp='[+, max]'``, `sum1` and `sum2` will be the sum of sum1/sum2 in each loop iteration, and `largest` will the maximum value of `largest` all the loop iterations -#### Example +**Example** -> ```python -> $:GPU_PARALLEL_LOOP(parallelism='[seq]') -> $:GPU_PARALLEL_LOOP(collapse=3, parallelism='[seq]',private='[tmp, r]') -> ``` +```python + $:GPU_LOOP(parallelism='[seq]') + $:GPU_LOOP(collapse=3, parallelism='[seq]',private='[tmp, r]') +```
@@ -140,213 +144,215 @@ Uses FYPP eval directive using `$:` ### Data Control Macros
- GPU_DATA (Make data accessible on GPU in specified region) + GPU_DATA -- (Make data accessible on GPU in specified region) -#### Macro Invocation +**Macro Invocation** Uses FYPP call directive using `#:call` -> ```C -> #:call GPU_DATA(...) -> {code} -> #:endcall GPU_DATA ->``` - -#### Parameters - -> | name | data type | Default Value | description | -> |------------------|-------------|---------------|----------------------------------------------------------------------------------------------| -> | `code` | code | Required | Region of code where defined data is accessible | -> | `copy` | string list | None | Allocates and copies variable to GPU on entrance, then deallocated and copies to CPU on exit | -> | `copyin` | string list | None | Allocates and copies data to GPU on entrance and then deallocated on exit | -> | `copyinReadOnly` | string list | None | Allocates and copies a readonly variable to GPU and then deallocated on exit | -> | `copyout` | string list | None | Allocates data on GPU on entrance and then deallocates and copies to CPU on exit | -> | `create` | string list | None | Allocates data on GPU on entrance and then deallocates on exit | -> | `no_create` | string list | None | Use data in CPU memory unless data is already in GPU memory | -> | `present` | string list | None | Data that must be present in GPU memory. Increment counter on entrance, decrement on exit | -> | `deviceptr` | string list | None | Pointer variables that are already allocated on GPU memory | -> | `attach` | string list | None | Attaches device pointer to device targets on entrance, then detach on exit | -> | `default` | string | None | Implicit assumptions compiler should make | -> | `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | - -#### Parameter Restrictions - -> | name | Restricted range | -> |--------|--------------------------------------------------| -> | `code` | Do not assign it manually with key-value pairing | - -#### Example - -> ```C -> #:call GPU_DATA(copy='[pixel_arr]', copyin='[starting_pixels, initial_index]',attach='[p_real, p_cmplx, p_fltr_cmplx]') -> {code} -> ... -> #:endcall GPU_DATA -> #:call GPU_DATA(create='[pixel_arr]', copyin='[initial_index]') -> {code} -> ... -> #:endcall -> ``` +```C +#:call GPU_DATA(...) + {code} +#:endcall GPU_DATA +``` + +**Parameters** + +| name | data type | Default Value | description | +|------------------|-------------|---------------|----------------------------------------------------------------------------------------------| +| `code` | code | Required | Region of code where defined data is accessible | +| `copy` | string list | None | Allocates and copies variable to GPU on entrance, then deallocated and copies to CPU on exit | +| `copyin` | string list | None | Allocates and copies data to GPU on entrance and then deallocated on exit | +| `copyinReadOnly` | string list | None | Allocates and copies a readonly variable to GPU and then deallocated on exit | +| `copyout` | string list | None | Allocates data on GPU on entrance and then deallocates and copies to CPU on exit | +| `create` | string list | None | Allocates data on GPU on entrance and then deallocates on exit | +| `no_create` | string list | None | Use data in CPU memory unless data is already in GPU memory | +| `present` | string list | None | Data that must be present in GPU memory. Increment counter on entrance, decrement on exit | +| `deviceptr` | string list | None | Pointer variables that are already allocated on GPU memory | +| `attach` | string list | None | Attaches device pointer to device targets on entrance, then detach on exit | +| `default` | string | None | Implicit assumptions compiler should make | +| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | + +**Parameter Restrictions** + +| name | Restricted range | +|--------|--------------------------------------------------| +| `code` | Do not assign it manually with key-value pairing | + +**Example** + +```C + #:call GPU_DATA(copy='[pixel_arr]', copyin='[starting_pixels, initial_index]',attach='[p_real, p_cmplx, p_fltr_cmplx]') + {code} + ... + #:endcall GPU_DATA + #:call GPU_DATA(create='[pixel_arr]', copyin='[initial_index]') + {code} + ... + #:endcall +```
- GPU_ENTER_DATA (Allocate/move data to GPU until matching GPU_EXIT_DATA or program termination) + GPU_ENTER_DATA -- (Allocate/move data to GPU until matching GPU_EXIT_DATA or program termination) -#### Macro Invocation +**Macro Invocation** Uses FYPP eval directive using `$:` -> `$:GPU_ENTER_DATA(...)` -#### Parameters +`$:GPU_ENTER_DATA(...)` -> | name | data type | Default Value | description | -> |------------------|-------------|---------------|--------------------------------------------------------------| -> | `copyin` | string list | None | Allocates and copies data to GPU on entrance | -> | `copyinReadOnly` | string list | None | Allocates and copies a readonly variable to GPU on entrance | -> | `create` | string list | None | Allocates data on GPU on entrance | -> | `attach` | string list | None | Attaches device pointer to device targets on entrance | -> | `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | +**Parameter** -#### Example +| name | data type | Default Value | description | +|------------------|-------------|---------------|--------------------------------------------------------------| +| `copyin` | string list | None | Allocates and copies data to GPU on entrance | +| `copyinReadOnly` | string list | None | Allocates and copies a readonly variable to GPU on entrance | +| `create` | string list | None | Allocates data on GPU on entrance | +| `attach` | string list | None | Attaches device pointer to device targets on entrance | +| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | -> ```python -> $:GPU_ENTER_DATA(copyin='[pixels_arr]', copyinReadOnly='[starting_pixels, initial_index]') -> $:GPU_ENTER_DATA(create='[bc_buffers(1:num_dims, -1:1)]', copyin='[initial_index]') -> ``` +**Example** + +```python + $:GPU_ENTER_DATA(copyin='[pixels_arr]', copyinReadOnly='[starting_pixels, initial_index]') + $:GPU_ENTER_DATA(create='[bc_buffers(1:num_dims, -1:1)]', copyin='[initial_index]') +```
- GPU_EXIT_DATA (Deallocate/move data from GPU created by GPU_ENTER_DATA) + GPU_EXIT_DATA -- (Deallocate/move data from GPU created by GPU_ENTER_DATA) -#### Macro Invocation +**Macro Invocation** Uses FYPP eval directive using `$:` -> `$:GPU_EXIT_DATA(...)` -#### Parameters +`$:GPU_EXIT_DATA(...)` + +**Parameters** -> | name | data type | Default Value | description | -> |----------------|-------------|---------------|--------------------------------------------------------------| -> | `copyout` | string list | None | Deallocates and copies data from GPU to CPU on exit | -> | `delete` | string list | None | Deallocates data on GPU on exit | -> | `detach` | string list | None | Detach device pointer from device targets on exit | -> | `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | +| name | data type | Default Value | description | +|----------------|-------------|---------------|--------------------------------------------------------------| +| `copyout` | string list | None | Deallocates and copies data from GPU to CPU on exit | +| `delete` | string list | None | Deallocates data on GPU on exit | +| `detach` | string list | None | Detach device pointer from device targets on exit | +| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | -#### Example +**Example** -> ```python -> $:GPU_EXIT_DATA(copyout='[pixels_arr]', delete='[starting_pixels, initial_index]') -> $:GPU_EXIT_DATA(delete='[bc_buffers(1:num_dims, -1:1)]', copyout='[initial_index]') -> ``` +```python + $:GPU_EXIT_DATA(copyout='[pixels_arr]', delete='[starting_pixels, initial_index]') + $:GPU_EXIT_DATA(delete='[bc_buffers(1:num_dims, -1:1)]', copyout='[initial_index]') +```
- GPU_DECLARE (Allocate module variables on GPU or for implicit data region ) + GPU_DECLARE -- (Allocate module variables on GPU or for implicit data region ) -#### Macro Invocation +**Macro Invocation** Uses FYPP eval directive using `$:` -> `$:GPU_DECLARE(...)` -#### Parameters +`$:GPU_DECLARE(...)` + +**Parameters** -> | name | data type | Default Value | description | -> |------------------|-------------|---------------|-------------------------------------------------------------------------------------------| -> | `copy` | string list | None | Allocates and copies data to GPU on entrance, then deallocated and copies to CPU on exit | -> | `copyin` | string list | None | Allocates and copies data to GPU on entrance and then deallocated on exit | -> | `copyinReadOnly` | string list | None | Allocates and copies a readonly variable to GPU and then deallocated on exit | -> | `copyout` | string list | None | Allocates data on GPU on entrance and then deallocates and copies to CPU on exit | -> | `create` | string list | None | Allocates data on GPU on entrance and then deallocates on exit | -> | `present` | string list | None | Data that must be present in GPU memory. Increment counter on entrance, decrement on exit | -> | `deviceptr` | string list | None | Pointer variables that are already allocated on GPU memory | -> | `link` | string list | None | Declare global link, and only allocate when variable used in data clause. | -> | `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | +| name | data type | Default Value | description | +|------------------|-------------|---------------|-------------------------------------------------------------------------------------------| +| `copy` | string list | None | Allocates and copies data to GPU on entrance, then deallocated and copies to CPU on exit | +| `copyin` | string list | None | Allocates and copies data to GPU on entrance and then deallocated on exit | +| `copyinReadOnly` | string list | None | Allocates and copies a readonly variable to GPU and then deallocated on exit | +| `copyout` | string list | None | Allocates data on GPU on entrance and then deallocates and copies to CPU on exit | +| `create` | string list | None | Allocates data on GPU on entrance and then deallocates on exit | +| `present` | string list | None | Data that must be present in GPU memory. Increment counter on entrance, decrement on exit | +| `deviceptr` | string list | None | Pointer variables that are already allocated on GPU memory | +| `link` | string list | None | Declare global link, and only allocate when variable used in data clause. | +| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | -#### Additional information +**Additional information** - An implicit data region is created at the start of each procedure and ends after the last executable statement in that procedure. - Use only create, copyin, device_resident or link clauses for module variables - GPU_DECLARE exit is the end of the implicit data region - Link is useful for large global static data objects -#### Example +**Example** -> ```python -> $:GPU_DECLARE(create='[x_cb,y_cb,z_cb,x_cc,y_cc,z_cc,dx,dy,dz,dt,m,n,p]') -> $:GPU_DECLARE(create='[x_cb,y_cb,z_cb]', copyin='[x_cc,y_cc,z_cc]', link='[dx,dy,dz,dt,m,n,p]') -> ``` +```python + $:GPU_DECLARE(create='[x_cb,y_cb,z_cb,x_cc,y_cc,z_cc,dx,dy,dz,dt,m,n,p]') + $:GPU_DECLARE(create='[x_cb,y_cb,z_cb]', copyin='[x_cc,y_cc,z_cc]', link='[dx,dy,dz,dt,m,n,p]') +```
- GPU_UPDATE (Updates data from CPU to GPU or GPU to CPU) + GPU_UPDATE -- (Updates data from CPU to GPU or GPU to CPU) -#### Macro Invocation +**Macro Invocation** Uses FYPP eval directive using `$:` -> `$:GPU_UPDATE(...)` -#### Parameters +`$:GPU_UPDATE(...)` -> | name | data type | Default Value | description | -> |----------------|-------------|---------------|--------------------------------------------------------------| -> | `host` | string list | None | Updates data from GPU to CPU | -> | `device` | string list | None | Updates data from CPU to GPU | -> | `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | +**Parameters** -#### Example +| name | data type | Default Value | description | +|----------------|-------------|---------------|--------------------------------------------------------------| +| `host` | string list | None | Updates data from GPU to CPU | +| `device` | string list | None | Updates data from CPU to GPU | +| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | -> ```python -> $:GPU_UPDATE(host='[arr1, arr2]') -> $:GPU_UPDATE(host='[updated_gpu_val]', device='[updated_cpu_val]') -> ``` +**Example** + +```python + $:GPU_UPDATE(host='[arr1, arr2]') + $:GPU_UPDATE(host='[updated_gpu_val]', device='[updated_cpu_val]') +```
- GPU_HOST_DATA (Make GPU memory address available on CPU) + GPU_HOST_DATA -- (Make GPU memory address available on CPU) -#### Macro Invocation +**Macro Invocation** Uses FYPP call directive using `#:call` -> ```C -> #:call GPU_HOST_DATA(...) -> {code} -> #:endcall GPU_HOST_DATA ->``` - -#### Parameters +```C + #:call GPU_HOST_DATA(...) + {code} + #:endcall GPU_HOST_DATA +``` -> | name | data type | Default Value | description | -> |----------------|-------------|---------------|------------------------------------------------------------------| -> | `code` | code | Required | Region of code where GPU memory addresses is accessible | -> | `use_device` | string list | None | Use GPU memory address of variable instead of CPU memory address | -> | `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | +**Parameters** -#### Parameter Restrictions +| name | data type | Default Value | description | +|----------------|-------------|---------------|------------------------------------------------------------------| +| `code` | code | Required | Region of code where GPU memory addresses is accessible | +| `use_device` | string list | None | Use GPU memory address of variable instead of CPU memory address | +| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | -> | name | Restricted range | -> |--------|--------------------------------------------------| -> | `code` | Do not assign it manually with key-value pairing | +**Parameter Restrictions** -#### Additional information +| name | Restricted range | +|--------|--------------------------------------------------| +| `code` | Do not assign it manually with key-value pairing | -#### Example +**Example** -> ```C -> #:call GPU_HOST_DATA(use_device='[addr1, addr2]') -> {code} -> ... -> #:endcall GPU_HOST_DATA -> #:call GPU_HOST_DATA(use_device='[display_arr]') -> {code} -> ... -> #:endcall -> ``` +```C + #:call GPU_HOST_DATA(use_device='[addr1, addr2]') + {code} + ... + #:endcall GPU_HOST_DATA + #:call GPU_HOST_DATA(use_device='[display_arr]') + {code} + ... + #:endcall +```
@@ -355,49 +361,51 @@ Uses FYPP call directive using `#:call` ### Synchronization Macros
- GPU_WAIT (Makes CPU wait for async GPU activities) + GPU_WAIT -- (Makes CPU wait for async GPU activities) -#### Macro Invocation +**Macro Invocation** Uses FYPP eval directive using `$:` -> `$:GPU_WAIT(...)` -#### Parameters +`$:GPU_WAIT(...)` + +**Parameters** -> | name | data type | Default Value | description | -> |----------------|-----------|---------------|--------------------------------------------------------------| -> | `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | +| name | data type | Default Value | description | +|----------------|-----------|---------------|--------------------------------------------------------------| +| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | -#### Example +**Example** -> ```python -> $:GPU_WAIT() -> ``` +```python + $:GPU_WAIT() +```
- GPU_ATOMIC (Do an atomic operation on the GPU) + GPU_ATOMIC -- (Do an atomic operation on the GPU) -#### Macro Invocation +**Macro Invocation** Uses FYPP eval directive using `$:` -> `$:GPU_ATOMIC(...)` -#### Parameters +`$:GPU_ATOMIC(...)` -> | name | data type | Default Value | description | -> |----------------|-----------|---------------|--------------------------------------------------------------| -> | `atomic` | string | Required | Which atomic operation is performed | -> | `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | +**Parameters** -#### Parameter Restrictions +| name | data type | Default Value | description | +|----------------|-----------|---------------|--------------------------------------------------------------| +| `atomic` | string | Required | Which atomic operation is performed | +| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | -> | name | Restricted range | -> |----------|-----------------------------------------| -> | `atomic` | 'read', 'write', 'update', or 'capture' | +**Parameter Restrictions** -#### Additional information +| name | Restricted range | +|----------|-----------------------------------------| +| `atomic` | 'read', 'write', 'update', or 'capture' | + +**Additional information** - read atomic is reading in a value - Ex: `v=x` @@ -413,15 +421,15 @@ Uses FYPP eval directive using `$:` v=x ``` -#### Example +**Example** -> ```python -> $:GPU_ATOMIC(atomic='update') -> x = square(x) -> $:GPU_ATOMIC(atomic='capture') -> x = square(x) -> v = x -> ``` +```python + $:GPU_ATOMIC(atomic='update') + x = square(x) + $:GPU_ATOMIC(atomic='capture') + x = square(x) + v = x +```
@@ -430,64 +438,66 @@ Uses FYPP eval directive using `$:` ### Miscellaneous Macros
- GPU_ROUTINE (Compile a procedure for the GPU) + GPU_ROUTINE -- (Compile a procedure for the GPU) -#### Macro Invocation +**Macro Invocation** Uses FYPP eval directive using `$:` -> `$:GPU_ROUTINE(...)` -#### Parameters +`$:GPU_ROUTINE(...)` + +**Parameters** -> | name | data type | Default Value | description | -> |-----------------|-------------|---------------|--------------------------------------------------------------| -> | `function_name` | string | None | Name of subroutine/function | -> | `parallelism` | string list | None | Parallelism granularity to use for this routine | -> | `nohost` | boolean | False | Do not compile procedure code for CPU | -> | `cray_inline` | boolean | False | Inline procedure on cray compiler | -> | `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | +| name | data type | Default Value | description | +|-----------------|-------------|---------------|--------------------------------------------------------------| +| `function_name` | string | None | Name of subroutine/function | +| `parallelism` | string list | None | Parallelism granularity to use for this routine | +| `nohost` | boolean | False | Do not compile procedure code for CPU | +| `cray_inline` | boolean | False | Inline procedure on cray compiler | +| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | -#### Parameter Restrictions +**Parameter Restrictions** -> | name | Restricted range | -> |---------------|---------------------------------------------------| -> | `parallelism` | Valid elements: 'gang', 'worker', 'vector', 'seq' | +| name | Restricted range | +|---------------|---------------------------------------------------| +| `parallelism` | Valid elements: 'gang', 'worker', 'vector', 'seq' | -#### Additional information +**Additional information** - Function name only needs to be given when cray_inline is True - Future capability is to parse function header for function name -- Routine parallelism is most commonly `'[seq]'` +- Routine parallelism is most commonly ``'[seq]'`` -#### Example +**Example** -> ```python -> $:GPU_ROUTINE(parallelism='[seq]') -> $:GPU_ROUTINE(function_name='s_matmult', parallelism='[seq]', cray_inline=True) -> ``` +```python + $:GPU_ROUTINE(parallelism='[seq]') + $:GPU_ROUTINE(function_name='s_matmult', parallelism='[seq]', cray_inline=True) +```
- GPU_CACHE (Data to be cache in software-managed cache) + GPU_CACHE -- (Data to be cache in software-managed cache) -#### Macro Invocation +**Macro Invocation** Uses FYPP eval directive using `$:` -> `$:GPU_CACHE(...)` -#### Parameters +`$:GPU_CACHE(...)` + +**Parameters** -> | name | data type | Default Value | description | -> |------------------|-------------|---------------|--------------------------------------------------------------| -> | `cache` | string list | Required | Data that should to stored in cache | -> | `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | +| name | data type | Default Value | description | +|------------------|-------------|---------------|--------------------------------------------------------------| +| `cache` | string list | Required | Data that should to stored in cache | +| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | -#### Example +**Example** -> ```python -> $:GPU_CACHE(cache='[pixels_arr]') -> ``` +```python + $:GPU_CACHE(cache='[pixels_arr]') +```
From f976b66cc1c3cfd2f60cb816e220dee6ddc0f923 Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Wed, 2 Jul 2025 14:01:02 -0400 Subject: [PATCH 73/75] Added GPU_PARALLEL and replaced rest of acc directives --- docs/documentation/gpuParallelization.md | 62 +++++++++++++++++++ src/common/include/parallel_macros.fpp | 28 +++++++++ src/common/m_mpi_common.fpp | 79 +++++++++++++----------- src/simulation/m_data_output.fpp | 9 ++- src/simulation/m_riemann_solvers.fpp | 10 +-- src/simulation/m_sim_helpers.fpp | 4 +- src/simulation/m_time_steppers.fpp | 4 +- 7 files changed, 145 insertions(+), 51 deletions(-) diff --git a/docs/documentation/gpuParallelization.md b/docs/documentation/gpuParallelization.md index bb0fbc667d..8579914485 100644 --- a/docs/documentation/gpuParallelization.md +++ b/docs/documentation/gpuParallelization.md @@ -139,6 +139,68 @@ Uses FYPP eval directive using `$:` +
+ GPU_PARALLEL -- (Execute the following on the GPU in parallel) + +**Macro Invocation** + +Uses FYPP call directive using `#:call` + +```C +#:call GPU_PARALLEL(...) + {code} +#:endcall GPU_PARALLEL +``` + +**Parameters** + +| name | data type | Default Value | description | +|------------------|---------------------|-------------------|-------------------------------------------------------------------------------------------| +| `default` | string | 'present' | Implicit assumptions compiler should make | +| `private` | string list | None | Variables that are private to each iteration/thread | +| `firstprivate` | string list | None | Initialized variables that are private to each iteration/thread | +| `reduction` | 2-level string list | None | Variables unique to each iteration and reduced at the end | +| `reductionOp` | string list | None | Operator that each list of reduction will reduce with | +| `copy` | string list | None | Allocates and copies data to GPU on entrance, then deallocated and copies to CPU on exit | +| `copyin` | string list | None | Allocates and copies data to GPU on entrance and then deallocated on exit | +| `copyinReadOnly` | string list | None | Allocates and copies readonly data to GPU and then deallocated on exit | +| `copyout` | string list | None | Allocates data on GPU on entrance and then deallocates and copies to CPU on exit | +| `create` | string list | None | Allocates data on GPU on entrance and then deallocates on exit | +| `no_create` | string list | None | Use data in CPU memory unless data is already in GPU memory | +| `present` | string list | None | Data that must be present in GPU memory. Increment counter on entrance, decrement on exit | +| `deviceptr` | string list | None | Pointer variables that are already allocated on GPU memory | +| `attach` | string list | None | Attaches device pointer to device targets on entrance, then detach on exit | +| `extraAccArgs` | string | None | String of any extra arguments added to the OpenACC directive | + +**Parameter Restrictions** + +| name | Restricted range | +|---------------|---------------------------------------------------| +| `default` | 'present' or 'none' | + +**Additional information** + +- default present means that the any non-scalar data in assumed to be present on the GPU +- default none means that the compiler should not implicitly determine the data attributes for any variable +- reduction and reductionOp must match in length +- With ``reduction='[[sum1, sum2], [largest]]'`` and ``reductionOp='[+, max]'``, `sum1` and `sum2` will be the sum of sum1/sum2 in each loop iteration, and `largest` will the maximum value of `largest` all the loop iterations +- A reduction implies a copy, so it does not need to be added for both + +**Example** + +```C + #:call GPU_PARALLEL() + {code} + ... + #:endcall GPU_PARALLEL + #:call GPU_PARALLEL(create='[pixel_arr]', copyin='[initial_index]') + {code} + ... + #:endcall +``` + +
+ ------------------------------------------------------------------------------------------ ### Data Control Macros diff --git a/src/common/include/parallel_macros.fpp b/src/common/include/parallel_macros.fpp index 5b0b8bf564..8d0a5a673b 100644 --- a/src/common/include/parallel_macros.fpp +++ b/src/common/include/parallel_macros.fpp @@ -199,6 +199,34 @@ $:use_device_val #:enddef +#:def GPU_PARALLEL(code, private=None, default='present', firstprivate=None, reduction=None, reductionOp=None, & + & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, & + & no_create=None, present=None, deviceptr=None, attach=None, extraAccArgs=None) + #:set default_val = GEN_DEFAULT_STR(default) + #:set private_val = GEN_PRIVATE_STR(private, False).strip('\n') + GEN_PRIVATE_STR(firstprivate, True).strip('\n') + #:set reduction_val = GEN_REDUCTION_STR(reduction, reductionOp) + #:set copy_val = GEN_COPY_STR(copy) + #:set copyin_val = GEN_COPYIN_STR(copyin, False).strip('\n') + GEN_COPYIN_STR(copyinReadOnly, True).strip('\n') + #:set copyout_val = GEN_COPYOUT_STR(copyout) + #:set create_val = GEN_CREATE_STR(create) + #:set no_create_val = GEN_NOCREATE_STR(no_create) + #:set present_val = GEN_PRESENT_STR(present) + #:set deviceptr_val = GEN_DEVICEPTR_STR(deviceptr) + #:set attach_val = GEN_ATTACH_STR(attach) + #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) + #:set clause_val = default_val.strip('\n') + private_val.strip('\n') + reduction_val.strip('\n') + & + & copy_val.strip('\n') + copyin_val.strip('\n') + & + & copyout_val.strip('\n') + create_val.strip('\n') + & + & no_create_val.strip('\n') + present_val.strip('\n') + & + & deviceptr_val.strip('\n') + attach_val.strip('\n') + #:set acc_directive = '!$acc parallel ' + & + & clause_val + extraAccArgs_val.strip('\n') + #:set end_acc_directive = '!$acc end parallel' + $:acc_directive + $:code + $:end_acc_directive +#:enddef + #:def GPU_PARALLEL_LOOP(collapse=None, private=None, parallelism='[gang, vector]', & & default='present', firstprivate=None, reduction=None, reductionOp=None, & diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index b920151488..568dddb299 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -25,7 +25,7 @@ module m_mpi_common implicit none integer, private :: ierr, v_size !< - !$acc declare create(v_size) + $:GPU_DECLARE(create='[v_size]') !! Generic flags used to identify and report MPI errors real(wp), private, allocatable, dimension(:) :: buff_send !< @@ -38,10 +38,10 @@ module m_mpi_common !! average primitive variables, for a single computational domain boundary !! at the time, from the relevant neighboring processor. - !$acc declare create(buff_send, buff_recv) + $:GPU_DECLARE(create='[buff_send, buff_recv]') integer :: halo_size - !$acc declare create(halo_size) + $:GPU_DECLARE(create='[halo_size]') contains @@ -76,7 +76,7 @@ contains halo_size = -1 + buff_size*(v_size) end if - !$acc update device(halo_size, v_size) + $:GPU_UPDATE(device='[halo_size, v_size]') @:ALLOCATE(buff_send(0:halo_size), buff_recv(0:halo_size)) #endif @@ -631,7 +631,7 @@ contains /) end if - !$acc update device(v_size) + $:GPU_UPDATE(device='[v_size]') buffer_count = buffer_counts(mpi_dir) boundary_conditions = (/bc_x, bc_y, bc_z/) @@ -667,7 +667,7 @@ contains #:for mpi_dir in [1, 2, 3] if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 - !$acc parallel loop collapse(4) gang vector default(present) private(r) + $:GPU_PARALLEL_LOOP(collapse=4,private='[r]') do l = 0, p do k = 0, n do j = 0, buff_size - 1 @@ -680,7 +680,7 @@ contains end do if (qbmm_comm) then - !$acc parallel loop collapse(4) gang vector default(present) private(r) + $:GPU_PARALLEL_LOOP(collapse=4,private='[r]') do l = 0, p do k = 0, n do j = 0, buff_size - 1 @@ -695,7 +695,7 @@ contains end do end do - !$acc parallel loop collapse(5) gang vector default(present) private(r) + $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') do l = 0, p do k = 0, n do j = 0, buff_size - 1 @@ -711,7 +711,7 @@ contains end do end if #:elif mpi_dir == 2 - !$acc parallel loop collapse(4) gang vector default(present) private(r) + $:GPU_PARALLEL_LOOP(collapse=4,private='[r]') do i = 1, nVar do l = 0, p do k = 0, buff_size - 1 @@ -726,7 +726,7 @@ contains end do if (qbmm_comm) then - !$acc parallel loop collapse(5) gang vector default(present) private(r) + $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') do i = nVar + 1, nVar + 4 do l = 0, p do k = 0, buff_size - 1 @@ -742,7 +742,7 @@ contains end do end do - !$acc parallel loop collapse(5) gang vector default(present) private(r) + $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') do i = nVar + 1, nVar + 4 do l = 0, p do k = 0, buff_size - 1 @@ -759,7 +759,7 @@ contains end do end if #:else - !$acc parallel loop collapse(4) gang vector default(present) private(r) + $:GPU_PARALLEL_LOOP(collapse=4,private='[r]') do i = 1, nVar do l = 0, buff_size - 1 do k = -buff_size, n + buff_size @@ -774,7 +774,7 @@ contains end do if (qbmm_comm) then - !$acc parallel loop collapse(5) gang vector default(present) private(r) + $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') do i = nVar + 1, nVar + 4 do l = 0, buff_size - 1 do k = -buff_size, n + buff_size @@ -790,7 +790,7 @@ contains end do end do - !$acc parallel loop collapse(5) gang vector default(present) private(r) + $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') do i = nVar + 1, nVar + 4 do l = 0, buff_size - 1 do k = -buff_size, n + buff_size @@ -816,28 +816,33 @@ contains #:for rdma_mpi in [False, True] if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then #:if rdma_mpi - !$acc host_data use_device(buff_send, buff_recv) - call nvtxStartRange("RHS-COMM-SENDRECV-RDMA") + #:call GPU_HOST_DATA(use_device='[buff_send, buff_recv]') + call nvtxStartRange("RHS-COMM-SENDRECV-RDMA") + + call MPI_SENDRECV( & + buff_send, buffer_count, mpi_p, dst_proc, send_tag, & + buff_recv, buffer_count, mpi_p, src_proc, recv_tag, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + call nvtxEndRange ! RHS-MPI-SENDRECV-(NO)-RDMA + + #:endcall GPU_HOST_DATA + $:GPU_WAIT() #:else call nvtxStartRange("RHS-COMM-DEV2HOST") - !$acc update host(buff_send) + $:GPU_UPDATE(host='[buff_send]') call nvtxEndRange call nvtxStartRange("RHS-COMM-SENDRECV-NO-RMDA") - #:endif - call MPI_SENDRECV( & - buff_send, buffer_count, mpi_p, dst_proc, send_tag, & - buff_recv, buffer_count, mpi_p, src_proc, recv_tag, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + call MPI_SENDRECV( & + buff_send, buffer_count, mpi_p, dst_proc, send_tag, & + buff_recv, buffer_count, mpi_p, src_proc, recv_tag, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - call nvtxEndRange ! RHS-MPI-SENDRECV-(NO)-RDMA + call nvtxEndRange ! RHS-MPI-SENDRECV-(NO)-RDMA - #:if rdma_mpi - !$acc end host_data - !$acc wait - #:else call nvtxStartRange("RHS-COMM-HOST2DEV") - !$acc update device(buff_recv) + $:GPU_UPDATE(device='[buff_recv]') call nvtxEndRange #:endif end if @@ -854,7 +859,7 @@ contains #:for mpi_dir in [1, 2, 3] if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 - !$acc parallel loop collapse(4) gang vector default(present) private(r) + $:GPU_PARALLEL_LOOP(collapse=4,private='[r]') do l = 0, p do k = 0, n do j = -buff_size, -1 @@ -874,7 +879,7 @@ contains end do if (qbmm_comm) then - !$acc parallel loop collapse(5) gang vector default(present) private(r) + $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') do l = 0, p do k = 0, n do j = -buff_size, -1 @@ -889,7 +894,7 @@ contains end do end do - !$acc parallel loop collapse(5) gang vector default(present) private(r) + $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') do l = 0, p do k = 0, n do j = -buff_size, -1 @@ -905,7 +910,7 @@ contains end do end if #:elif mpi_dir == 2 - !$acc parallel loop collapse(4) gang vector default(present) private(r) + $:GPU_PARALLEL_LOOP(collapse=4,private='[r]') do i = 1, nVar do l = 0, p do k = -buff_size, -1 @@ -926,7 +931,7 @@ contains end do if (qbmm_comm) then - !$acc parallel loop collapse(5) gang vector default(present) private(r) + $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') do i = nVar + 1, nVar + 4 do l = 0, p do k = -buff_size, -1 @@ -942,7 +947,7 @@ contains end do end do - !$acc parallel loop collapse(5) gang vector default(present) private(r) + $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') do i = nVar + 1, nVar + 4 do l = 0, p do k = -buff_size, -1 @@ -960,7 +965,7 @@ contains end if #:else ! Unpacking buffer from bc_z%beg - !$acc parallel loop collapse(4) gang vector default(present) private(r) + $:GPU_PARALLEL_LOOP(collapse=4,private='[r]') do i = 1, nVar do l = -buff_size, -1 do k = -buff_size, n + buff_size @@ -982,7 +987,7 @@ contains end do if (qbmm_comm) then - !$acc parallel loop collapse(5) gang vector default(present) private(r) + $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') do i = nVar + 1, nVar + 4 do l = -buff_size, -1 do k = -buff_size, n + buff_size @@ -999,7 +1004,7 @@ contains end do end do - !$acc parallel loop collapse(5) gang vector default(present) private(r) + $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') do i = nVar + 1, nVar + 4 do l = -buff_size, -1 do k = -buff_size, n + buff_size diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp index e459c91831..4d702106f1 100644 --- a/src/simulation/m_data_output.fpp +++ b/src/simulation/m_data_output.fpp @@ -316,15 +316,14 @@ contains Rc_min_loc = minval(Rc_sf) end if #else - !$acc kernels + #:call GPU_PARALLEL() icfl_max_loc = maxval(icfl_sf) - !$acc end kernels - + #:endcall GPU_PARALLEL if (viscous) then - !$acc kernels + #:call GPU_PARALLEL() vcfl_max_loc = maxval(vcfl_sf) Rc_min_loc = minval(Rc_sf) - !$acc end kernels + #:endcall GPU_PARALLEL end if #endif diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp index d168acc752..092af7fbd7 100644 --- a/src/simulation/m_riemann_solvers.fpp +++ b/src/simulation/m_riemann_solvers.fpp @@ -748,7 +748,7 @@ contains ! Momentum if (mhd .and. (.not. relativity)) then - !$acc loop seq + $:GPU_LOOP(parallelism='[seq]') do i = 1, 3 ! Flux of rho*v_i in the ${XYZ}$ direction ! = rho * v_i * v_${XYZ}$ - B_i * B_${XYZ}$ + delta_(${XYZ}$,i) * p_tot @@ -763,7 +763,7 @@ contains /(s_M - s_P) end do elseif (mhd .and. relativity) then - !$acc loop seq + $:GPU_LOOP(parallelism='[seq]') do i = 1, 3 ! Flux of m_i in the ${XYZ}$ direction ! = m_i * v_${XYZ}$ - b_i/Gamma * B_${XYZ}$ + delta_(${XYZ}$,i) * p_tot @@ -850,7 +850,7 @@ contains + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R_rms - vel_L_rms)/2._wp else if (hypoelasticity) then flux_tau_L = 0._wp; flux_tau_R = 0._wp - !$acc loop seq + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims flux_tau_L = flux_tau_L + tau_e_L(dir_idx_tau(i))*vel_L(dir_idx(i)) flux_tau_R = flux_tau_R + tau_e_R(dir_idx_tau(i))*vel_R(dir_idx(i)) @@ -946,7 +946,7 @@ contains if (n == 0) then ! 1D: d/dx flux only & Bx = Bx0 = const. ! B_y flux = v_x * B_y - v_y * Bx0 ! B_z flux = v_x * B_z - v_z * Bx0 - !$acc loop seq + $:GPU_LOOP(parallelism='[seq]') do i = 0, 1 flux_rsx_vf(j, k, l, B_idx%beg + i) = (s_M*(vel_R(1)*B%R(2 + i) - vel_R(2 + i)*Bx0) & - s_P*(vel_L(1)*B%L(2 + i) - vel_L(2 + i)*Bx0) & @@ -956,7 +956,7 @@ contains ! B_x d/d${XYZ}$ flux = (1 - delta(x,${XYZ}$)) * (v_${XYZ}$ * B_x - v_x * B_${XYZ}$) ! B_y d/d${XYZ}$ flux = (1 - delta(y,${XYZ}$)) * (v_${XYZ}$ * B_y - v_y * B_${XYZ}$) ! B_z d/d${XYZ}$ flux = (1 - delta(z,${XYZ}$)) * (v_${XYZ}$ * B_z - v_z * B_${XYZ}$) - !$acc loop seq + $:GPU_LOOP(parallelism='[seq]') do i = 0, 2 flux_rs${XYZ}$_vf(j, k, l, B_idx%beg + i) = (1 - dir_flg(i + 1))*( & s_M*(vel_R(dir_idx(1))*B%R(i + 1) - vel_R(i + 1)*B%R(norm_dir)) - & diff --git a/src/simulation/m_sim_helpers.fpp b/src/simulation/m_sim_helpers.fpp index 11a16c6bcb..cf8cf80bd7 100644 --- a/src/simulation/m_sim_helpers.fpp +++ b/src/simulation/m_sim_helpers.fpp @@ -21,7 +21,7 @@ contains !! @param l z coordinate index !! @return fltr_dtheta Modified dtheta value for cylindrical coordinates pure function f_compute_filtered_dtheta(k, l) result(fltr_dtheta) - !$acc routine seq + $:GPU_ROUTINE(parallelism='[seq]') integer, intent(in) :: k, l real(wp) :: fltr_dtheta integer :: Nfq @@ -48,7 +48,7 @@ contains !! @param l z coordinate index !! @return cfl_terms computed CFL terms for 2D/3D cases pure function f_compute_multidim_cfl_terms(vel, c, j, k, l) result(cfl_terms) - !$acc routine seq + $:GPU_ROUTINE(parallelism='[seq]') real(wp), dimension(num_vels), intent(in) :: vel real(wp), intent(in) :: c integer, intent(in) :: j, k, l diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 761ebdd792..8a9d692fff 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -993,9 +993,9 @@ contains end do end do - !$acc kernels + #:call GPU_PARALLEL() dt_local = minval(max_dt) - !$acc end kernels + #:endcall GPU_PARALLEL if (num_procs == 1) then dt = dt_local From 2beb457c343c1e9b113e7df8a73caa737f723f3d Mon Sep 17 00:00:00 2001 From: Tanush Prathi Date: Wed, 2 Jul 2025 14:07:12 -0400 Subject: [PATCH 74/75] Ran formatter --- src/simulation/m_data_output.fpp | 6 +++--- src/simulation/m_time_steppers.fpp | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp index 4d702106f1..f233bb5374 100644 --- a/src/simulation/m_data_output.fpp +++ b/src/simulation/m_data_output.fpp @@ -317,12 +317,12 @@ contains end if #else #:call GPU_PARALLEL() - icfl_max_loc = maxval(icfl_sf) + icfl_max_loc = maxval(icfl_sf) #:endcall GPU_PARALLEL if (viscous) then #:call GPU_PARALLEL() - vcfl_max_loc = maxval(vcfl_sf) - Rc_min_loc = minval(Rc_sf) + vcfl_max_loc = maxval(vcfl_sf) + Rc_min_loc = minval(Rc_sf) #:endcall GPU_PARALLEL end if #endif diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 8a9d692fff..381455be2b 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -994,7 +994,7 @@ contains end do #:call GPU_PARALLEL() - dt_local = minval(max_dt) + dt_local = minval(max_dt) #:endcall GPU_PARALLEL if (num_procs == 1) then From df54d6f1b4b21326605cc338f2ac2731af2f16e6 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Sun, 6 Jul 2025 09:52:34 -0400 Subject: [PATCH 75/75] fix bug --- src/simulation/m_rhs.fpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index 744d54c31b..5c16a5ba9f 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -173,8 +173,8 @@ contains integer :: num_eqns_after_adv - $:GPU_ENTER_DATA(copyin='[idwbuff,idwbuff]') - $:GPU_UPDATE(device='[idwbuff, idwbuff]') + $:GPU_ENTER_DATA(copyin='[idwbuff]') + $:GPU_UPDATE(device='[idwbuff]') @:ALLOCATE(q_cons_qp%vf(1:sys_size)) @:ALLOCATE(q_prim_qp%vf(1:sys_size))